gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2016 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "backend.h"
  26 #include "target.h"
  27 #include "rtl.h"
  28 #include "tree.h"
  29 #include "predict.h"
  30 #include "tm_p.h"
  31 #include "expmed.h"
  32 #include "optabs.h"
  33 #include "emit-rtl.h"
  34 #include "diagnostic-core.h"
  35 #include "fold-const.h"
  36 #include "stor-layout.h"
  37 #include "dojump.h"
  38 #include "explow.h"
  39 #include "expr.h"
  40 #include "langhooks.h"
  41
  42 struct target_expmed default_target_expmed;
  43 #if SWITCHABLE_TARGET
  44 struct target_expmed *this_target_expmed = &default_target_expmed;
  45 #endif
  46
  47 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  48                                    unsigned HOST_WIDE_INT,
  49                                    unsigned HOST_WIDE_INT,
  50                                    unsigned HOST_WIDE_INT,
  51                                    rtx, bool);
  52 static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT,
  53                                      unsigned HOST_WIDE_INT,
  54                                      rtx, bool);
  55 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  56                                    unsigned HOST_WIDE_INT,
  57                                    unsigned HOST_WIDE_INT,
  58                                    unsigned HOST_WIDE_INT,
  59                                    rtx, bool);
  60 static rtx extract_fixed_bit_field (machine_mode, rtx,
  61                                     unsigned HOST_WIDE_INT,
  62                                     unsigned HOST_WIDE_INT, rtx, int, bool);
  63 static rtx extract_fixed_bit_field_1 (machine_mode, rtx,
  64                                       unsigned HOST_WIDE_INT,
  65                                       unsigned HOST_WIDE_INT, rtx, int, bool);
  66 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
  67 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  68                                     unsigned HOST_WIDE_INT, int, bool);
  69 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
  70 static rtx expand_smod_pow2 (machine_mode, rtx, HOST_WIDE_INT);
  71 static rtx expand_sdiv_pow2 (machine_mode, rtx, HOST_WIDE_INT);
  72
  73 /* Return a constant integer mask value of mode MODE with BITSIZE ones
  74    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
  75    The mask is truncated if necessary to the width of mode MODE.  The
  76    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
  77
  78 static inline rtx
  79 mask_rtx (machine_mode mode, int bitpos, int bitsize, bool complement)
  80 {
  81   return immed_wide_int_const
  82     (wi::shifted_mask (bitpos, bitsize, complement,
  83                        GET_MODE_PRECISION (mode)), mode);
  84 }
  85
  86 /* Test whether a value is zero of a power of two.  */
  87 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
  88   (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
  89
  90 struct init_expmed_rtl
  91 {
  92   rtx reg;
  93   rtx plus;
  94   rtx neg;
  95   rtx mult;
  96   rtx sdiv;
  97   rtx udiv;
  98   rtx sdiv_32;
  99   rtx smod_32;
 100   rtx wide_mult;
 101   rtx wide_lshr;
 102   rtx wide_trunc;
 103   rtx shift;
 104   rtx shift_mult;
 105   rtx shift_add;
 106   rtx shift_sub0;
 107   rtx shift_sub1;
 108   rtx zext;
 109   rtx trunc;
 110
 111   rtx pow2[MAX_BITS_PER_WORD];
 112   rtx cint[MAX_BITS_PER_WORD];
 113 };
 114
 115 static void
 116 init_expmed_one_conv (struct init_expmed_rtl *all, machine_mode to_mode,
 117                       machine_mode from_mode, bool speed)
 118 {
 119   int to_size, from_size;
 120   rtx which;
 121
 122   to_size = GET_MODE_PRECISION (to_mode);
 123   from_size = GET_MODE_PRECISION (from_mode);
 124
 125   /* Most partial integers have a precision less than the "full"
 126      integer it requires for storage.  In case one doesn't, for
 127      comparison purposes here, reduce the bit size by one in that
 128      case.  */
 129   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
 130       && pow2p_hwi (to_size))
 131     to_size --;
 132   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
 133       && pow2p_hwi (from_size))
 134     from_size --;
 135
 136   /* Assume cost of zero-extend and sign-extend is the same.  */
 137   which = (to_size < from_size ? all->trunc : all->zext);
 138
 139   PUT_MODE (all->reg, from_mode);
 140   set_convert_cost (to_mode, from_mode, speed,
 141                     set_src_cost (which, to_mode, speed));
 142 }
 143
 144 static void
 145 init_expmed_one_mode (struct init_expmed_rtl *all,
 146                       machine_mode mode, int speed)
 147 {
 148   int m, n, mode_bitsize;
 149   machine_mode mode_from;
 150
 151   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 152
 153   PUT_MODE (all->reg, mode);
 154   PUT_MODE (all->plus, mode);
 155   PUT_MODE (all->neg, mode);
 156   PUT_MODE (all->mult, mode);
 157   PUT_MODE (all->sdiv, mode);
 158   PUT_MODE (all->udiv, mode);
 159   PUT_MODE (all->sdiv_32, mode);
 160   PUT_MODE (all->smod_32, mode);
 161   PUT_MODE (all->wide_trunc, mode);
 162   PUT_MODE (all->shift, mode);
 163   PUT_MODE (all->shift_mult, mode);
 164   PUT_MODE (all->shift_add, mode);
 165   PUT_MODE (all->shift_sub0, mode);
 166   PUT_MODE (all->shift_sub1, mode);
 167   PUT_MODE (all->zext, mode);
 168   PUT_MODE (all->trunc, mode);
 169
 170   set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
 171   set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
 172   set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
 173   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
 174   set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
 175
 176   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
 177                                      <= 2 * add_cost (speed, mode)));
 178   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
 179                                      <= 4 * add_cost (speed, mode)));
 180
 181   set_shift_cost (speed, mode, 0, 0);
 182   {
 183     int cost = add_cost (speed, mode);
 184     set_shiftadd_cost (speed, mode, 0, cost);
 185     set_shiftsub0_cost (speed, mode, 0, cost);
 186     set_shiftsub1_cost (speed, mode, 0, cost);
 187   }
 188
 189   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 190   for (m = 1; m < n; m++)
 191     {
 192       XEXP (all->shift, 1) = all->cint[m];
 193       XEXP (all->shift_mult, 1) = all->pow2[m];
 194
 195       set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
 196       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
 197                                                        speed));
 198       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
 199                                                         speed));
 200       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
 201                                                         speed));
 202     }
 203
 204   if (SCALAR_INT_MODE_P (mode))
 205     {
 206       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 207            mode_from = (machine_mode)(mode_from + 1))
 208         init_expmed_one_conv (all, mode, mode_from, speed);
 209     }
 210   if (GET_MODE_CLASS (mode) == MODE_INT)
 211     {
 212       machine_mode  wider_mode = GET_MODE_WIDER_MODE (mode);
 213       if (wider_mode != VOIDmode)
 214         {
 215           PUT_MODE (all->zext, wider_mode);
 216           PUT_MODE (all->wide_mult, wider_mode);
 217           PUT_MODE (all->wide_lshr, wider_mode);
 218           XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize);
 219
 220           set_mul_widen_cost (speed, wider_mode,
 221                               set_src_cost (all->wide_mult, wider_mode, speed));
 222           set_mul_highpart_cost (speed, mode,
 223                                  set_src_cost (all->wide_trunc, mode, speed));
 224         }
 225     }
 226 }
 227
 228 void
 229 init_expmed (void)
 230 {
 231   struct init_expmed_rtl all;
 232   machine_mode mode = QImode;
 233   int m, speed;
 234
 235   memset (&all, 0, sizeof all);
 236   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 237     {
 238       all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
 239       all.cint[m] = GEN_INT (m);
 240     }
 241
 242   /* Avoid using hard regs in ways which may be unsupported.  */
 243   all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
 244   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
 245   all.neg = gen_rtx_NEG (mode, all.reg);
 246   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
 247   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
 248   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
 249   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
 250   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
 251   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
 252   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
 253   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
 254   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
 255   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
 256   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
 257   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
 258   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
 259   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
 260   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
 261
 262   for (speed = 0; speed < 2; speed++)
 263     {
 264       crtl->maybe_hot_insn_p = speed;
 265       set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
 266
 267       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 268            mode = (machine_mode)(mode + 1))
 269         init_expmed_one_mode (&all, mode, speed);
 270
 271       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 272         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 273              mode = (machine_mode)(mode + 1))
 274           init_expmed_one_mode (&all, mode, speed);
 275
 276       if (MIN_MODE_VECTOR_INT != VOIDmode)
 277         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 278              mode = (machine_mode)(mode + 1))
 279           init_expmed_one_mode (&all, mode, speed);
 280     }
 281
 282   if (alg_hash_used_p ())
 283     {
 284       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 285       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 286     }
 287   else
 288     set_alg_hash_used_p (true);
 289   default_rtl_profile ();
 290
 291   ggc_free (all.trunc);
 292   ggc_free (all.shift_sub1);
 293   ggc_free (all.shift_sub0);
 294   ggc_free (all.shift_add);
 295   ggc_free (all.shift_mult);
 296   ggc_free (all.shift);
 297   ggc_free (all.wide_trunc);
 298   ggc_free (all.wide_lshr);
 299   ggc_free (all.wide_mult);
 300   ggc_free (all.zext);
 301   ggc_free (all.smod_32);
 302   ggc_free (all.sdiv_32);
 303   ggc_free (all.udiv);
 304   ggc_free (all.sdiv);
 305   ggc_free (all.mult);
 306   ggc_free (all.neg);
 307   ggc_free (all.plus);
 308   ggc_free (all.reg);
 309 }
 310
 311 /* Return an rtx representing minus the value of X.
 312    MODE is the intended mode of the result,
 313    useful if X is a CONST_INT.  */
 314
 315 rtx
 316 negate_rtx (machine_mode mode, rtx x)
 317 {
 318   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 319
 320   if (result == 0)
 321     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 322
 323   return result;
 324 }
 325
 326 /* Whether reverse storage order is supported on the target.  */
 327 static int reverse_storage_order_supported = -1;
 328
 329 /* Check whether reverse storage order is supported on the target.  */
 330
 331 static void
 332 check_reverse_storage_order_support (void)
 333 {
 334   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 335     {
 336       reverse_storage_order_supported = 0;
 337       sorry ("reverse scalar storage order");
 338     }
 339   else
 340     reverse_storage_order_supported = 1;
 341 }
 342
 343 /* Whether reverse FP storage order is supported on the target.  */
 344 static int reverse_float_storage_order_supported = -1;
 345
 346 /* Check whether reverse FP storage order is supported on the target.  */
 347
 348 static void
 349 check_reverse_float_storage_order_support (void)
 350 {
 351   if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 352     {
 353       reverse_float_storage_order_supported = 0;
 354       sorry ("reverse floating-point scalar storage order");
 355     }
 356   else
 357     reverse_float_storage_order_supported = 1;
 358 }
 359
 360 /* Return an rtx representing value of X with reverse storage order.
 361    MODE is the intended mode of the result,
 362    useful if X is a CONST_INT.  */
 363
 364 rtx
 365 flip_storage_order (enum machine_mode mode, rtx x)
 366 {
 367   enum machine_mode int_mode;
 368   rtx result;
 369
 370   if (mode == QImode)
 371     return x;
 372
 373   if (COMPLEX_MODE_P (mode))
 374     {
 375       rtx real = read_complex_part (x, false);
 376       rtx imag = read_complex_part (x, true);
 377
 378       real = flip_storage_order (GET_MODE_INNER (mode), real);
 379       imag = flip_storage_order (GET_MODE_INNER (mode), imag);
 380
 381       return gen_rtx_CONCAT (mode, real, imag);
 382     }
 383
 384   if (__builtin_expect (reverse_storage_order_supported < 0, 0))
 385     check_reverse_storage_order_support ();
 386
 387   if (SCALAR_INT_MODE_P (mode))
 388     int_mode = mode;
 389   else
 390     {
 391       if (FLOAT_MODE_P (mode)
 392           && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
 393         check_reverse_float_storage_order_support ();
 394
 395       int_mode = mode_for_size (GET_MODE_PRECISION (mode), MODE_INT, 0);
 396       if (int_mode == BLKmode)
 397         {
 398           sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
 399           return x;
 400         }
 401       x = gen_lowpart (int_mode, x);
 402     }
 403
 404   result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
 405   if (result == 0)
 406     result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
 407
 408   if (int_mode != mode)
 409     result = gen_lowpart (mode, result);
 410
 411   return result;
 412 }
 413
 414 /* Adjust bitfield memory MEM so that it points to the first unit of mode
 415    MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
 416    If MODE is BLKmode, return a reference to every byte in the bitfield.
 417    Set *NEW_BITNUM to the bit position of the field within the new memory.  */
 418
 419 static rtx
 420 narrow_bit_field_mem (rtx mem, machine_mode mode,
 421                       unsigned HOST_WIDE_INT bitsize,
 422                       unsigned HOST_WIDE_INT bitnum,
 423                       unsigned HOST_WIDE_INT *new_bitnum)
 424 {
 425   if (mode == BLKmode)
 426     {
 427       *new_bitnum = bitnum % BITS_PER_UNIT;
 428       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 429       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 430                             / BITS_PER_UNIT);
 431       return adjust_bitfield_address_size (mem, mode, offset, size);
 432     }
 433   else
 434     {
 435       unsigned int unit = GET_MODE_BITSIZE (mode);
 436       *new_bitnum = bitnum % unit;
 437       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 438       return adjust_bitfield_address (mem, mode, offset);
 439     }
 440 }
 441
 442 /* The caller wants to perform insertion or extraction PATTERN on a
 443    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 444    BITREGION_START and BITREGION_END are as for store_bit_field
 445    and FIELDMODE is the natural mode of the field.
 446
 447    Search for a mode that is compatible with the memory access
 448    restrictions and (where applicable) with a register insertion or
 449    extraction.  Return the new memory on success, storing the adjusted
 450    bit position in *NEW_BITNUM.  Return null otherwise.  */
 451
 452 static rtx
 453 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 454                               rtx op0, HOST_WIDE_INT bitsize,
 455                               HOST_WIDE_INT bitnum,
 456                               unsigned HOST_WIDE_INT bitregion_start,
 457                               unsigned HOST_WIDE_INT bitregion_end,
 458                               machine_mode fieldmode,
 459                               unsigned HOST_WIDE_INT *new_bitnum)
 460 {
 461   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 462                                 bitregion_end, MEM_ALIGN (op0),
 463                                 MEM_VOLATILE_P (op0));
 464   machine_mode best_mode;
 465   if (iter.next_mode (&best_mode))
 466     {
 467       /* We can use a memory in BEST_MODE.  See whether this is true for
 468          any wider modes.  All other things being equal, we prefer to
 469          use the widest mode possible because it tends to expose more
 470          CSE opportunities.  */
 471       if (!iter.prefer_smaller_modes ())
 472         {
 473           /* Limit the search to the mode required by the corresponding
 474              register insertion or extraction instruction, if any.  */
 475           machine_mode limit_mode = word_mode;
 476           extraction_insn insn;
 477           if (get_best_reg_extraction_insn (&insn, pattern,
 478                                             GET_MODE_BITSIZE (best_mode),
 479                                             fieldmode))
 480             limit_mode = insn.field_mode;
 481
 482           machine_mode wider_mode;
 483           while (iter.next_mode (&wider_mode)
 484                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 485             best_mode = wider_mode;
 486         }
 487       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 488                                    new_bitnum);
 489     }
 490   return NULL_RTX;
 491 }
 492
 493 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 494    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 495    offset is then BITNUM / BITS_PER_UNIT.  */
 496
 497 static bool
 498 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
 499                      unsigned HOST_WIDE_INT bitsize,
 500                      machine_mode struct_mode)
 501 {
 502   if (BYTES_BIG_ENDIAN)
 503     return (bitnum % BITS_PER_UNIT == 0
 504             && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
 505                 || (bitnum + bitsize) % BITS_PER_WORD == 0));
 506   else
 507     return bitnum % BITS_PER_WORD == 0;
 508 }
 509
 510 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
 511    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
 512    Return false if the access would touch memory outside the range
 513    BITREGION_START to BITREGION_END for conformance to the C++ memory
 514    model.  */
 515
 516 static bool
 517 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 518                             unsigned HOST_WIDE_INT bitnum,
 519                             machine_mode fieldmode,
 520                             unsigned HOST_WIDE_INT bitregion_start,
 521                             unsigned HOST_WIDE_INT bitregion_end)
 522 {
 523   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
 524
 525   /* -fstrict-volatile-bitfields must be enabled and we must have a
 526      volatile MEM.  */
 527   if (!MEM_P (op0)
 528       || !MEM_VOLATILE_P (op0)
 529       || flag_strict_volatile_bitfields <= 0)
 530     return false;
 531
 532   /* Non-integral modes likely only happen with packed structures.
 533      Punt.  */
 534   if (!SCALAR_INT_MODE_P (fieldmode))
 535     return false;
 536
 537   /* The bit size must not be larger than the field mode, and
 538      the field mode must not be larger than a word.  */
 539   if (bitsize > modesize || modesize > BITS_PER_WORD)
 540     return false;
 541
 542   /* Check for cases of unaligned fields that must be split.  */
 543   if (bitnum % modesize + bitsize > modesize)
 544     return false;
 545
 546   /* The memory must be sufficiently aligned for a MODESIZE access.
 547      This condition guarantees, that the memory access will not
 548      touch anything after the end of the structure.  */
 549   if (MEM_ALIGN (op0) < modesize)
 550     return false;
 551
 552   /* Check for cases where the C++ memory model applies.  */
 553   if (bitregion_end != 0
 554       && (bitnum - bitnum % modesize < bitregion_start
 555           || bitnum - bitnum % modesize + modesize - 1 > bitregion_end))
 556     return false;
 557
 558   return true;
 559 }
 560
 561 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 562    bit number BITNUM can be treated as a simple value of mode MODE.  */
 563
 564 static bool
 565 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 566                        unsigned HOST_WIDE_INT bitnum, machine_mode mode)
 567 {
 568   return (MEM_P (op0)
 569           && bitnum % BITS_PER_UNIT == 0
 570           && bitsize == GET_MODE_BITSIZE (mode)
 571           && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
 572               || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
 573                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 574 }
 575 \f
 576 /* Try to use instruction INSV to store VALUE into a field of OP0.
 577    BITSIZE and BITNUM are as for store_bit_field.  */
 578
 579 static bool
 580 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 581                             unsigned HOST_WIDE_INT bitsize,
 582                             unsigned HOST_WIDE_INT bitnum,
 583                             rtx value)
 584 {
 585   struct expand_operand ops[4];
 586   rtx value1;
 587   rtx xop0 = op0;
 588   rtx_insn *last = get_last_insn ();
 589   bool copy_back = false;
 590
 591   machine_mode op_mode = insv->field_mode;
 592   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 593   if (bitsize == 0 || bitsize > unit)
 594     return false;
 595
 596   if (MEM_P (xop0))
 597     /* Get a reference to the first byte of the field.  */
 598     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 599                                  &bitnum);
 600   else
 601     {
 602       /* Convert from counting within OP0 to counting in OP_MODE.  */
 603       if (BYTES_BIG_ENDIAN)
 604         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 605
 606       /* If xop0 is a register, we need it in OP_MODE
 607          to make it acceptable to the format of insv.  */
 608       if (GET_CODE (xop0) == SUBREG)
 609         /* We can't just change the mode, because this might clobber op0,
 610            and we will need the original value of op0 if insv fails.  */
 611         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 612       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 613         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 614     }
 615
 616   /* If the destination is a paradoxical subreg such that we need a
 617      truncate to the inner mode, perform the insertion on a temporary and
 618      truncate the result to the original destination.  Note that we can't
 619      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 620      X) 0)) is (reg:N X).  */
 621   if (GET_CODE (xop0) == SUBREG
 622       && REG_P (SUBREG_REG (xop0))
 623       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 624                                          op_mode))
 625     {
 626       rtx tem = gen_reg_rtx (op_mode);
 627       emit_move_insn (tem, xop0);
 628       xop0 = tem;
 629       copy_back = true;
 630     }
 631
 632   /* There are similar overflow check at the start of store_bit_field_1,
 633      but that only check the situation where the field lies completely
 634      outside the register, while there do have situation where the field
 635      lies partialy in the register, we need to adjust bitsize for this
 636      partial overflow situation.  Without this fix, pr48335-2.c on big-endian
 637      will broken on those arch support bit insert instruction, like arm, aarch64
 638      etc.  */
 639   if (bitsize + bitnum > unit && bitnum < unit)
 640     {
 641       warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
 642                "destination object, data truncated into %wu-bit",
 643                bitsize, unit - bitnum);
 644       bitsize = unit - bitnum;
 645     }
 646
 647   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 648      "backwards" from the size of the unit we are inserting into.
 649      Otherwise, we count bits from the most significant on a
 650      BYTES/BITS_BIG_ENDIAN machine.  */
 651
 652   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 653     bitnum = unit - bitsize - bitnum;
 654
 655   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 656   value1 = value;
 657   if (GET_MODE (value) != op_mode)
 658     {
 659       if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 660         {
 661           rtx tmp;
 662           /* Optimization: Don't bother really extending VALUE
 663              if it has all the bits we will actually use.  However,
 664              if we must narrow it, be sure we do it correctly.  */
 665
 666           if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 667             {
 668               tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 669               if (! tmp)
 670                 tmp = simplify_gen_subreg (op_mode,
 671                                            force_reg (GET_MODE (value),
 672                                                       value1),
 673                                            GET_MODE (value), 0);
 674             }
 675           else
 676             {
 677               tmp = gen_lowpart_if_possible (op_mode, value1);
 678               if (! tmp)
 679                 tmp = gen_lowpart (op_mode, force_reg (GET_MODE (value),
 680                                                        value1));
 681             }
 682           value1 = tmp;
 683         }
 684       else if (CONST_INT_P (value))
 685         value1 = gen_int_mode (INTVAL (value), op_mode);
 686       else
 687         /* Parse phase is supposed to make VALUE's data type
 688            match that of the component reference, which is a type
 689            at least as wide as the field; so VALUE should have
 690            a mode that corresponds to that type.  */
 691         gcc_assert (CONSTANT_P (value));
 692     }
 693
 694   create_fixed_operand (&ops[0], xop0);
 695   create_integer_operand (&ops[1], bitsize);
 696   create_integer_operand (&ops[2], bitnum);
 697   create_input_operand (&ops[3], value1, op_mode);
 698   if (maybe_expand_insn (insv->icode, 4, ops))
 699     {
 700       if (copy_back)
 701         convert_move (op0, xop0, true);
 702       return true;
 703     }
 704   delete_insns_since (last);
 705   return false;
 706 }
 707
 708 /* A subroutine of store_bit_field, with the same arguments.  Return true
 709    if the operation could be implemented.
 710
 711    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 712    no other way of implementing the operation.  If FALLBACK_P is false,
 713    return false instead.  */
 714
 715 static bool
 716 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 717                    unsigned HOST_WIDE_INT bitnum,
 718                    unsigned HOST_WIDE_INT bitregion_start,
 719                    unsigned HOST_WIDE_INT bitregion_end,
 720                    machine_mode fieldmode,
 721                    rtx value, bool reverse, bool fallback_p)
 722 {
 723   rtx op0 = str_rtx;
 724   rtx orig_value;
 725
 726   while (GET_CODE (op0) == SUBREG)
 727     {
 728       /* The following line once was done only if WORDS_BIG_ENDIAN,
 729          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 730          meaningful at a much higher level; when structures are copied
 731          between memory and regs, the higher-numbered regs
 732          always get higher addresses.  */
 733       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 734       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 735       int byte_offset = 0;
 736
 737       /* Paradoxical subregs need special handling on big-endian machines.  */
 738       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 739         {
 740           int difference = inner_mode_size - outer_mode_size;
 741
 742           if (WORDS_BIG_ENDIAN)
 743             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 744           if (BYTES_BIG_ENDIAN)
 745             byte_offset += difference % UNITS_PER_WORD;
 746         }
 747       else
 748         byte_offset = SUBREG_BYTE (op0);
 749
 750       bitnum += byte_offset * BITS_PER_UNIT;
 751       op0 = SUBREG_REG (op0);
 752     }
 753
 754   /* No action is needed if the target is a register and if the field
 755      lies completely outside that register.  This can occur if the source
 756      code contains an out-of-bounds access to a small array.  */
 757   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 758     return true;
 759
 760   /* Use vec_set patterns for inserting parts of vectors whenever
 761      available.  */
 762   if (VECTOR_MODE_P (GET_MODE (op0))
 763       && !MEM_P (op0)
 764       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
 765       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 766       && bitsize == GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
 767       && !(bitnum % GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
 768     {
 769       struct expand_operand ops[3];
 770       machine_mode outermode = GET_MODE (op0);
 771       machine_mode innermode = GET_MODE_INNER (outermode);
 772       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 773       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 774
 775       create_fixed_operand (&ops[0], op0);
 776       create_input_operand (&ops[1], value, innermode);
 777       create_integer_operand (&ops[2], pos);
 778       if (maybe_expand_insn (icode, 3, ops))
 779         return true;
 780     }
 781
 782   /* If the target is a register, overwriting the entire object, or storing
 783      a full-word or multi-word field can be done with just a SUBREG.  */
 784   if (!MEM_P (op0)
 785       && bitsize == GET_MODE_BITSIZE (fieldmode)
 786       && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
 787           || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
 788     {
 789       /* Use the subreg machinery either to narrow OP0 to the required
 790          words or to cope with mode punning between equal-sized modes.
 791          In the latter case, use subreg on the rhs side, not lhs.  */
 792       rtx sub;
 793
 794       if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
 795         {
 796           sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
 797           if (sub)
 798             {
 799               if (reverse)
 800                 sub = flip_storage_order (GET_MODE (op0), sub);
 801               emit_move_insn (op0, sub);
 802               return true;
 803             }
 804         }
 805       else
 806         {
 807           sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 808                                      bitnum / BITS_PER_UNIT);
 809           if (sub)
 810             {
 811               if (reverse)
 812                 value = flip_storage_order (fieldmode, value);
 813               emit_move_insn (sub, value);
 814               return true;
 815             }
 816         }
 817     }
 818
 819   /* If the target is memory, storing any naturally aligned field can be
 820      done with a simple store.  For targets that support fast unaligned
 821      memory, any naturally sized, unit aligned field can be done directly.  */
 822   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
 823     {
 824       op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
 825       if (reverse)
 826         value = flip_storage_order (fieldmode, value);
 827       emit_move_insn (op0, value);
 828       return true;
 829     }
 830
 831   /* Make sure we are playing with integral modes.  Pun with subregs
 832      if we aren't.  This must come after the entire register case above,
 833      since that case is valid for any mode.  The following cases are only
 834      valid for integral modes.  */
 835   {
 836     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 837     if (imode != GET_MODE (op0))
 838       {
 839         if (MEM_P (op0))
 840           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
 841         else
 842           {
 843             gcc_assert (imode != BLKmode);
 844             op0 = gen_lowpart (imode, op0);
 845           }
 846       }
 847   }
 848
 849   /* Storing an lsb-aligned field in a register
 850      can be done with a movstrict instruction.  */
 851
 852   if (!MEM_P (op0)
 853       && !reverse
 854       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
 855       && bitsize == GET_MODE_BITSIZE (fieldmode)
 856       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 857     {
 858       struct expand_operand ops[2];
 859       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 860       rtx arg0 = op0;
 861       unsigned HOST_WIDE_INT subreg_off;
 862
 863       if (GET_CODE (arg0) == SUBREG)
 864         {
 865           /* Else we've got some float mode source being extracted into
 866              a different float mode destination -- this combination of
 867              subregs results in Severe Tire Damage.  */
 868           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 869                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 870                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 871           arg0 = SUBREG_REG (arg0);
 872         }
 873
 874       subreg_off = bitnum / BITS_PER_UNIT;
 875       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 876         {
 877           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 878
 879           create_fixed_operand (&ops[0], arg0);
 880           /* Shrink the source operand to FIELDMODE.  */
 881           create_convert_operand_to (&ops[1], value, fieldmode, false);
 882           if (maybe_expand_insn (icode, 2, ops))
 883             return true;
 884         }
 885     }
 886
 887   /* Handle fields bigger than a word.  */
 888
 889   if (bitsize > BITS_PER_WORD)
 890     {
 891       /* Here we transfer the words of the field
 892          in the order least significant first.
 893          This is because the most significant word is the one which may
 894          be less than full.
 895          However, only do that if the value is not BLKmode.  */
 896
 897       const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 898       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 899       unsigned int i;
 900       rtx_insn *last;
 901
 902       /* This is the mode we must force value to, so that there will be enough
 903          subwords to extract.  Note that fieldmode will often (always?) be
 904          VOIDmode, because that is what store_field uses to indicate that this
 905          is a bit field, but passing VOIDmode to operand_subword_force
 906          is not allowed.  */
 907       fieldmode = GET_MODE (value);
 908       if (fieldmode == VOIDmode)
 909         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 910
 911       last = get_last_insn ();
 912       for (i = 0; i < nwords; i++)
 913         {
 914           /* If I is 0, use the low-order word in both field and target;
 915              if I is 1, use the next to lowest word; and so on.  */
 916           unsigned int wordnum = (backwards
 917                                   ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
 918                                   - i - 1
 919                                   : i);
 920           unsigned int bit_offset = (backwards ^ reverse
 921                                      ? MAX ((int) bitsize - ((int) i + 1)
 922                                             * BITS_PER_WORD,
 923                                             0)
 924                                      : (int) i * BITS_PER_WORD);
 925           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 926           unsigned HOST_WIDE_INT new_bitsize =
 927             MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 928
 929           /* If the remaining chunk doesn't have full wordsize we have
 930              to make sure that for big-endian machines the higher order
 931              bits are used.  */
 932           if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
 933             value_word = simplify_expand_binop (word_mode, lshr_optab,
 934                                                 value_word,
 935                                                 GEN_INT (BITS_PER_WORD
 936                                                          - new_bitsize),
 937                                                 NULL_RTX, true,
 938                                                 OPTAB_LIB_WIDEN);
 939
 940           if (!store_bit_field_1 (op0, new_bitsize,
 941                                   bitnum + bit_offset,
 942                                   bitregion_start, bitregion_end,
 943                                   word_mode,
 944                                   value_word, reverse, fallback_p))
 945             {
 946               delete_insns_since (last);
 947               return false;
 948             }
 949         }
 950       return true;
 951     }
 952
 953   /* If VALUE has a floating-point or complex mode, access it as an
 954      integer of the corresponding size.  This can occur on a machine
 955      with 64 bit registers that uses SFmode for float.  It can also
 956      occur for unaligned float or complex fields.  */
 957   orig_value = value;
 958   if (GET_MODE (value) != VOIDmode
 959       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 960       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 961     {
 962       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 963       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 964     }
 965
 966   /* If OP0 is a multi-word register, narrow it to the affected word.
 967      If the region spans two words, defer to store_split_bit_field.  */
 968   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 969     {
 970       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
 971         {
 972           if (!fallback_p)
 973             return false;
 974
 975           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
 976                                  bitregion_end, value, reverse);
 977           return true;
 978         }
 979       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
 980                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
 981       gcc_assert (op0);
 982       bitnum %= BITS_PER_WORD;
 983     }
 984
 985   /* From here on we can assume that the field to be stored in fits
 986      within a word.  If the destination is a register, it too fits
 987      in a word.  */
 988
 989   extraction_insn insv;
 990   if (!MEM_P (op0)
 991       && !reverse
 992       && get_best_reg_extraction_insn (&insv, EP_insv,
 993                                        GET_MODE_BITSIZE (GET_MODE (op0)),
 994                                        fieldmode)
 995       && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 996     return true;
 997
 998   /* If OP0 is a memory, try copying it to a register and seeing if a
 999      cheap register alternative is available.  */
1000   if (MEM_P (op0) && !reverse)
1001     {
1002       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1003                                         fieldmode)
1004           && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
1005         return true;
1006
1007       rtx_insn *last = get_last_insn ();
1008
1009       /* Try loading part of OP0 into a register, inserting the bitfield
1010          into that, and then copying the result back to OP0.  */
1011       unsigned HOST_WIDE_INT bitpos;
1012       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1013                                                bitregion_start, bitregion_end,
1014                                                fieldmode, &bitpos);
1015       if (xop0)
1016         {
1017           rtx tempreg = copy_to_reg (xop0);
1018           if (store_bit_field_1 (tempreg, bitsize, bitpos,
1019                                  bitregion_start, bitregion_end,
1020                                  fieldmode, orig_value, reverse, false))
1021             {
1022               emit_move_insn (xop0, tempreg);
1023               return true;
1024             }
1025           delete_insns_since (last);
1026         }
1027     }
1028
1029   if (!fallback_p)
1030     return false;
1031
1032   store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
1033                          bitregion_end, value, reverse);
1034   return true;
1035 }
1036
1037 /* Generate code to store value from rtx VALUE
1038    into a bit-field within structure STR_RTX
1039    containing BITSIZE bits starting at bit BITNUM.
1040
1041    BITREGION_START is bitpos of the first bitfield in this region.
1042    BITREGION_END is the bitpos of the ending bitfield in this region.
1043    These two fields are 0, if the C++ memory model does not apply,
1044    or we are not interested in keeping track of bitfield regions.
1045
1046    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1047
1048    If REVERSE is true, the store is to be done in reverse order.  */
1049
1050 void
1051 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1052                  unsigned HOST_WIDE_INT bitnum,
1053                  unsigned HOST_WIDE_INT bitregion_start,
1054                  unsigned HOST_WIDE_INT bitregion_end,
1055                  machine_mode fieldmode,
1056                  rtx value, bool reverse)
1057 {
1058   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1059   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode,
1060                                   bitregion_start, bitregion_end))
1061     {
1062       /* Storing of a full word can be done with a simple store.
1063          We know here that the field can be accessed with one single
1064          instruction.  For targets that support unaligned memory,
1065          an unaligned access may be necessary.  */
1066       if (bitsize == GET_MODE_BITSIZE (fieldmode))
1067         {
1068           str_rtx = adjust_bitfield_address (str_rtx, fieldmode,
1069                                              bitnum / BITS_PER_UNIT);
1070           if (reverse)
1071             value = flip_storage_order (fieldmode, value);
1072           gcc_assert (bitnum % BITS_PER_UNIT == 0);
1073           emit_move_insn (str_rtx, value);
1074         }
1075       else
1076         {
1077           rtx temp;
1078
1079           str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum,
1080                                           &bitnum);
1081           gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (fieldmode));
1082           temp = copy_to_reg (str_rtx);
1083           if (!store_bit_field_1 (temp, bitsize, bitnum, 0, 0,
1084                                   fieldmode, value, reverse, true))
1085             gcc_unreachable ();
1086
1087           emit_move_insn (str_rtx, temp);
1088         }
1089
1090       return;
1091     }
1092
1093   /* Under the C++0x memory model, we must not touch bits outside the
1094      bit region.  Adjust the address to start at the beginning of the
1095      bit region.  */
1096   if (MEM_P (str_rtx) && bitregion_start > 0)
1097     {
1098       machine_mode bestmode;
1099       HOST_WIDE_INT offset, size;
1100
1101       gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
1102
1103       offset = bitregion_start / BITS_PER_UNIT;
1104       bitnum -= bitregion_start;
1105       size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
1106       bitregion_end -= bitregion_start;
1107       bitregion_start = 0;
1108       bestmode = get_best_mode (bitsize, bitnum,
1109                                 bitregion_start, bitregion_end,
1110                                 MEM_ALIGN (str_rtx), VOIDmode,
1111                                 MEM_VOLATILE_P (str_rtx));
1112       str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
1113     }
1114
1115   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1116                           bitregion_start, bitregion_end,
1117                           fieldmode, value, reverse, true))
1118     gcc_unreachable ();
1119 }
1120 \f
1121 /* Use shifts and boolean operations to store VALUE into a bit field of
1122    width BITSIZE in OP0, starting at bit BITNUM.
1123
1124    If REVERSE is true, the store is to be done in reverse order.  */
1125
1126 static void
1127 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1128                        unsigned HOST_WIDE_INT bitnum,
1129                        unsigned HOST_WIDE_INT bitregion_start,
1130                        unsigned HOST_WIDE_INT bitregion_end,
1131                        rtx value, bool reverse)
1132 {
1133   /* There is a case not handled here:
1134      a structure with a known alignment of just a halfword
1135      and a field split across two aligned halfwords within the structure.
1136      Or likewise a structure with a known alignment of just a byte
1137      and a field split across two bytes.
1138      Such cases are not supposed to be able to occur.  */
1139
1140   if (MEM_P (op0))
1141     {
1142       machine_mode mode = GET_MODE (op0);
1143       if (GET_MODE_BITSIZE (mode) == 0
1144           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
1145         mode = word_mode;
1146       mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1147                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
1148
1149       if (mode == VOIDmode)
1150         {
1151           /* The only way this should occur is if the field spans word
1152              boundaries.  */
1153           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
1154                                  bitregion_end, value, reverse);
1155           return;
1156         }
1157
1158       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1159     }
1160
1161   store_fixed_bit_field_1 (op0, bitsize, bitnum, value, reverse);
1162 }
1163
1164 /* Helper function for store_fixed_bit_field, stores
1165    the bit field always using the MODE of OP0.  */
1166
1167 static void
1168 store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize,
1169                          unsigned HOST_WIDE_INT bitnum,
1170                          rtx value, bool reverse)
1171 {
1172   machine_mode mode;
1173   rtx temp;
1174   int all_zero = 0;
1175   int all_one = 0;
1176
1177   mode = GET_MODE (op0);
1178   gcc_assert (SCALAR_INT_MODE_P (mode));
1179
1180   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1181      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1182
1183   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1184     /* BITNUM is the distance between our msb
1185        and that of the containing datum.
1186        Convert it to the distance from the lsb.  */
1187     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1188
1189   /* Now BITNUM is always the distance between our lsb
1190      and that of OP0.  */
1191
1192   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1193      we must first convert its mode to MODE.  */
1194
1195   if (CONST_INT_P (value))
1196     {
1197       unsigned HOST_WIDE_INT v = UINTVAL (value);
1198
1199       if (bitsize < HOST_BITS_PER_WIDE_INT)
1200         v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1201
1202       if (v == 0)
1203         all_zero = 1;
1204       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1205                 && v == (HOST_WIDE_INT_1U << bitsize) - 1)
1206                || (bitsize == HOST_BITS_PER_WIDE_INT
1207                    && v == HOST_WIDE_INT_M1U))
1208         all_one = 1;
1209
1210       value = lshift_value (mode, v, bitnum);
1211     }
1212   else
1213     {
1214       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1215                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1216
1217       if (GET_MODE (value) != mode)
1218         value = convert_to_mode (mode, value, 1);
1219
1220       if (must_and)
1221         value = expand_binop (mode, and_optab, value,
1222                               mask_rtx (mode, 0, bitsize, 0),
1223                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1224       if (bitnum > 0)
1225         value = expand_shift (LSHIFT_EXPR, mode, value,
1226                               bitnum, NULL_RTX, 1);
1227     }
1228
1229   if (reverse)
1230     value = flip_storage_order (mode, value);
1231
1232   /* Now clear the chosen bits in OP0,
1233      except that if VALUE is -1 we need not bother.  */
1234   /* We keep the intermediates in registers to allow CSE to combine
1235      consecutive bitfield assignments.  */
1236
1237   temp = force_reg (mode, op0);
1238
1239   if (! all_one)
1240     {
1241       rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1242       if (reverse)
1243         mask = flip_storage_order (mode, mask);
1244       temp = expand_binop (mode, and_optab, temp, mask,
1245                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1246       temp = force_reg (mode, temp);
1247     }
1248
1249   /* Now logical-or VALUE into OP0, unless it is zero.  */
1250
1251   if (! all_zero)
1252     {
1253       temp = expand_binop (mode, ior_optab, temp, value,
1254                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1255       temp = force_reg (mode, temp);
1256     }
1257
1258   if (op0 != temp)
1259     {
1260       op0 = copy_rtx (op0);
1261       emit_move_insn (op0, temp);
1262     }
1263 }
1264 \f
1265 /* Store a bit field that is split across multiple accessible memory objects.
1266
1267    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1268    BITSIZE is the field width; BITPOS the position of its first bit
1269    (within the word).
1270    VALUE is the value to store.
1271
1272    If REVERSE is true, the store is to be done in reverse order.
1273
1274    This does not yet handle fields wider than BITS_PER_WORD.  */
1275
1276 static void
1277 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1278                        unsigned HOST_WIDE_INT bitpos,
1279                        unsigned HOST_WIDE_INT bitregion_start,
1280                        unsigned HOST_WIDE_INT bitregion_end,
1281                        rtx value, bool reverse)
1282 {
1283   unsigned int unit, total_bits, bitsdone = 0;
1284
1285   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1286      much at a time.  */
1287   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1288     unit = BITS_PER_WORD;
1289   else
1290     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1291
1292   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1293      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1294      again, and we will mutually recurse forever.  */
1295   if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1296     unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0)));
1297
1298   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1299      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1300      that VALUE might be a floating-point constant.  */
1301   if (CONSTANT_P (value) && !CONST_INT_P (value))
1302     {
1303       rtx word = gen_lowpart_common (word_mode, value);
1304
1305       if (word && (value != word))
1306         value = word;
1307       else
1308         value = gen_lowpart_common (word_mode,
1309                                     force_reg (GET_MODE (value) != VOIDmode
1310                                                ? GET_MODE (value)
1311                                                : word_mode, value));
1312     }
1313
1314   total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1315
1316   while (bitsdone < bitsize)
1317     {
1318       unsigned HOST_WIDE_INT thissize;
1319       unsigned HOST_WIDE_INT thispos;
1320       unsigned HOST_WIDE_INT offset;
1321       rtx part, word;
1322
1323       offset = (bitpos + bitsdone) / unit;
1324       thispos = (bitpos + bitsdone) % unit;
1325
1326       /* When region of bytes we can touch is restricted, decrease
1327          UNIT close to the end of the region as needed.  If op0 is a REG
1328          or SUBREG of REG, don't do this, as there can't be data races
1329          on a register and we can expand shorter code in some cases.  */
1330       if (bitregion_end
1331           && unit > BITS_PER_UNIT
1332           && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1333           && !REG_P (op0)
1334           && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1335         {
1336           unit = unit / 2;
1337           continue;
1338         }
1339
1340       /* THISSIZE must not overrun a word boundary.  Otherwise,
1341          store_fixed_bit_field will call us again, and we will mutually
1342          recurse forever.  */
1343       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1344       thissize = MIN (thissize, unit - thispos);
1345
1346       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1347         {
1348           /* Fetch successively less significant portions.  */
1349           if (CONST_INT_P (value))
1350             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1351                              >> (bitsize - bitsdone - thissize))
1352                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1353           /* Likewise, but the source is little-endian.  */
1354           else if (reverse)
1355             part = extract_fixed_bit_field (word_mode, value, thissize,
1356                                             bitsize - bitsdone - thissize,
1357                                             NULL_RTX, 1, false);
1358           else
1359             {
1360               int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1361               /* The args are chosen so that the last part includes the
1362                  lsb.  Give extract_bit_field the value it needs (with
1363                  endianness compensation) to fetch the piece we want.  */
1364               part = extract_fixed_bit_field (word_mode, value, thissize,
1365                                               total_bits - bitsize + bitsdone,
1366                                               NULL_RTX, 1, false);
1367             }
1368         }
1369       else
1370         {
1371           /* Fetch successively more significant portions.  */
1372           if (CONST_INT_P (value))
1373             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1374                              >> bitsdone)
1375                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1376           /* Likewise, but the source is big-endian.  */
1377           else if (reverse)
1378             part = extract_fixed_bit_field (word_mode, value, thissize,
1379                                             total_bits - bitsdone - thissize,
1380                                             NULL_RTX, 1, false);
1381           else
1382             part = extract_fixed_bit_field (word_mode, value, thissize,
1383                                             bitsdone, NULL_RTX, 1, false);
1384         }
1385
1386       /* If OP0 is a register, then handle OFFSET here.  */
1387       if (SUBREG_P (op0) || REG_P (op0))
1388         {
1389           machine_mode op0_mode = GET_MODE (op0);
1390           if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1391             word = offset ? const0_rtx : op0;
1392           else
1393             word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1394                                           GET_MODE (op0));
1395           offset &= BITS_PER_WORD / unit - 1;
1396         }
1397       else
1398         word = op0;
1399
1400       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1401          it is just an out-of-bounds access.  Ignore it.  */
1402       if (word != const0_rtx)
1403         store_fixed_bit_field (word, thissize, offset * unit + thispos,
1404                                bitregion_start, bitregion_end, part,
1405                                reverse);
1406       bitsdone += thissize;
1407     }
1408 }
1409 \f
1410 /* A subroutine of extract_bit_field_1 that converts return value X
1411    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1412    to extract_bit_field.  */
1413
1414 static rtx
1415 convert_extracted_bit_field (rtx x, machine_mode mode,
1416                              machine_mode tmode, bool unsignedp)
1417 {
1418   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1419     return x;
1420
1421   /* If the x mode is not a scalar integral, first convert to the
1422      integer mode of that size and then access it as a floating-point
1423      value via a SUBREG.  */
1424   if (!SCALAR_INT_MODE_P (tmode))
1425     {
1426       machine_mode smode;
1427
1428       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1429       x = convert_to_mode (smode, x, unsignedp);
1430       x = force_reg (smode, x);
1431       return gen_lowpart (tmode, x);
1432     }
1433
1434   return convert_to_mode (tmode, x, unsignedp);
1435 }
1436
1437 /* Try to use an ext(z)v pattern to extract a field from OP0.
1438    Return the extracted value on success, otherwise return null.
1439    EXT_MODE is the mode of the extraction and the other arguments
1440    are as for extract_bit_field.  */
1441
1442 static rtx
1443 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1444                               unsigned HOST_WIDE_INT bitsize,
1445                               unsigned HOST_WIDE_INT bitnum,
1446                               int unsignedp, rtx target,
1447                               machine_mode mode, machine_mode tmode)
1448 {
1449   struct expand_operand ops[4];
1450   rtx spec_target = target;
1451   rtx spec_target_subreg = 0;
1452   machine_mode ext_mode = extv->field_mode;
1453   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1454
1455   if (bitsize == 0 || unit < bitsize)
1456     return NULL_RTX;
1457
1458   if (MEM_P (op0))
1459     /* Get a reference to the first byte of the field.  */
1460     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1461                                 &bitnum);
1462   else
1463     {
1464       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1465       if (BYTES_BIG_ENDIAN)
1466         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1467
1468       /* If op0 is a register, we need it in EXT_MODE to make it
1469          acceptable to the format of ext(z)v.  */
1470       if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1471         return NULL_RTX;
1472       if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1473         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1474     }
1475
1476   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1477      "backwards" from the size of the unit we are extracting from.
1478      Otherwise, we count bits from the most significant on a
1479      BYTES/BITS_BIG_ENDIAN machine.  */
1480
1481   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1482     bitnum = unit - bitsize - bitnum;
1483
1484   if (target == 0)
1485     target = spec_target = gen_reg_rtx (tmode);
1486
1487   if (GET_MODE (target) != ext_mode)
1488     {
1489       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1490          between the mode of the extraction (word_mode) and the target
1491          mode.  Instead, create a temporary and use convert_move to set
1492          the target.  */
1493       if (REG_P (target)
1494           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1495         {
1496           target = gen_lowpart (ext_mode, target);
1497           if (GET_MODE_PRECISION (ext_mode)
1498               > GET_MODE_PRECISION (GET_MODE (spec_target)))
1499             spec_target_subreg = target;
1500         }
1501       else
1502         target = gen_reg_rtx (ext_mode);
1503     }
1504
1505   create_output_operand (&ops[0], target, ext_mode);
1506   create_fixed_operand (&ops[1], op0);
1507   create_integer_operand (&ops[2], bitsize);
1508   create_integer_operand (&ops[3], bitnum);
1509   if (maybe_expand_insn (extv->icode, 4, ops))
1510     {
1511       target = ops[0].value;
1512       if (target == spec_target)
1513         return target;
1514       if (target == spec_target_subreg)
1515         return spec_target;
1516       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1517     }
1518   return NULL_RTX;
1519 }
1520
1521 /* A subroutine of extract_bit_field, with the same arguments.
1522    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1523    if we can find no other means of implementing the operation.
1524    if FALLBACK_P is false, return NULL instead.  */
1525
1526 static rtx
1527 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1528                      unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1529                      machine_mode mode, machine_mode tmode,
1530                      bool reverse, bool fallback_p)
1531 {
1532   rtx op0 = str_rtx;
1533   machine_mode int_mode;
1534   machine_mode mode1;
1535
1536   if (tmode == VOIDmode)
1537     tmode = mode;
1538
1539   while (GET_CODE (op0) == SUBREG)
1540     {
1541       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1542       op0 = SUBREG_REG (op0);
1543     }
1544
1545   /* If we have an out-of-bounds access to a register, just return an
1546      uninitialized register of the required mode.  This can occur if the
1547      source code contains an out-of-bounds access to a small array.  */
1548   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1549     return gen_reg_rtx (tmode);
1550
1551   if (REG_P (op0)
1552       && mode == GET_MODE (op0)
1553       && bitnum == 0
1554       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1555     {
1556       if (reverse)
1557         op0 = flip_storage_order (mode, op0);
1558       /* We're trying to extract a full register from itself.  */
1559       return op0;
1560     }
1561
1562   /* See if we can get a better vector mode before extracting.  */
1563   if (VECTOR_MODE_P (GET_MODE (op0))
1564       && !MEM_P (op0)
1565       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1566     {
1567       machine_mode new_mode;
1568
1569       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1570         new_mode = MIN_MODE_VECTOR_FLOAT;
1571       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1572         new_mode = MIN_MODE_VECTOR_FRACT;
1573       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1574         new_mode = MIN_MODE_VECTOR_UFRACT;
1575       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1576         new_mode = MIN_MODE_VECTOR_ACCUM;
1577       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1578         new_mode = MIN_MODE_VECTOR_UACCUM;
1579       else
1580         new_mode = MIN_MODE_VECTOR_INT;
1581
1582       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1583         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1584             && GET_MODE_UNIT_SIZE (new_mode) == GET_MODE_SIZE (tmode)
1585             && targetm.vector_mode_supported_p (new_mode))
1586           break;
1587       if (new_mode != VOIDmode)
1588         op0 = gen_lowpart (new_mode, op0);
1589     }
1590
1591   /* Use vec_extract patterns for extracting parts of vectors whenever
1592      available.  */
1593   if (VECTOR_MODE_P (GET_MODE (op0))
1594       && !MEM_P (op0)
1595       && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1596       && ((bitnum + bitsize - 1) / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
1597           == bitnum / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
1598     {
1599       struct expand_operand ops[3];
1600       machine_mode outermode = GET_MODE (op0);
1601       machine_mode innermode = GET_MODE_INNER (outermode);
1602       enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1603       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1604
1605       create_output_operand (&ops[0], target, innermode);
1606       create_input_operand (&ops[1], op0, outermode);
1607       create_integer_operand (&ops[2], pos);
1608       if (maybe_expand_insn (icode, 3, ops))
1609         {
1610           target = ops[0].value;
1611           if (GET_MODE (target) != mode)
1612             return gen_lowpart (tmode, target);
1613           return target;
1614         }
1615     }
1616
1617   /* Make sure we are playing with integral modes.  Pun with subregs
1618      if we aren't.  */
1619   {
1620     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1621     if (imode != GET_MODE (op0))
1622       {
1623         if (MEM_P (op0))
1624           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1625         else if (imode != BLKmode)
1626           {
1627             op0 = gen_lowpart (imode, op0);
1628
1629             /* If we got a SUBREG, force it into a register since we
1630                aren't going to be able to do another SUBREG on it.  */
1631             if (GET_CODE (op0) == SUBREG)
1632               op0 = force_reg (imode, op0);
1633           }
1634         else
1635           {
1636             HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1637             rtx mem = assign_stack_temp (GET_MODE (op0), size);
1638             emit_move_insn (mem, op0);
1639             op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1640           }
1641       }
1642   }
1643
1644   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1645      If that's wrong, the solution is to test for it and set TARGET to 0
1646      if needed.  */
1647
1648   /* Get the mode of the field to use for atomic access or subreg
1649      conversion.  */
1650   mode1 = mode;
1651   if (SCALAR_INT_MODE_P (tmode))
1652     {
1653       machine_mode try_mode = mode_for_size (bitsize,
1654                                                   GET_MODE_CLASS (tmode), 0);
1655       if (try_mode != BLKmode)
1656         mode1 = try_mode;
1657     }
1658   gcc_assert (mode1 != BLKmode);
1659
1660   /* Extraction of a full MODE1 value can be done with a subreg as long
1661      as the least significant bit of the value is the least significant
1662      bit of either OP0 or a word of OP0.  */
1663   if (!MEM_P (op0)
1664       && !reverse
1665       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1666       && bitsize == GET_MODE_BITSIZE (mode1)
1667       && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1668     {
1669       rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1670                                      bitnum / BITS_PER_UNIT);
1671       if (sub)
1672         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1673     }
1674
1675   /* Extraction of a full MODE1 value can be done with a load as long as
1676      the field is on a byte boundary and is sufficiently aligned.  */
1677   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1678     {
1679       op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1680       if (reverse)
1681         op0 = flip_storage_order (mode1, op0);
1682       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1683     }
1684
1685   /* Handle fields bigger than a word.  */
1686
1687   if (bitsize > BITS_PER_WORD)
1688     {
1689       /* Here we transfer the words of the field
1690          in the order least significant first.
1691          This is because the most significant word is the one which may
1692          be less than full.  */
1693
1694       const bool backwards = WORDS_BIG_ENDIAN;
1695       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1696       unsigned int i;
1697       rtx_insn *last;
1698
1699       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1700         target = gen_reg_rtx (mode);
1701
1702       /* In case we're about to clobber a base register or something
1703          (see gcc.c-torture/execute/20040625-1.c).   */
1704       if (reg_mentioned_p (target, str_rtx))
1705         target = gen_reg_rtx (mode);
1706
1707       /* Indicate for flow that the entire target reg is being set.  */
1708       emit_clobber (target);
1709
1710       last = get_last_insn ();
1711       for (i = 0; i < nwords; i++)
1712         {
1713           /* If I is 0, use the low-order word in both field and target;
1714              if I is 1, use the next to lowest word; and so on.  */
1715           /* Word number in TARGET to use.  */
1716           unsigned int wordnum
1717             = (backwards
1718                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1719                : i);
1720           /* Offset from start of field in OP0.  */
1721           unsigned int bit_offset = (backwards ^ reverse
1722                                      ? MAX ((int) bitsize - ((int) i + 1)
1723                                             * BITS_PER_WORD,
1724                                             0)
1725                                      : (int) i * BITS_PER_WORD);
1726           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1727           rtx result_part
1728             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1729                                              bitsize - i * BITS_PER_WORD),
1730                                    bitnum + bit_offset, 1, target_part,
1731                                    mode, word_mode, reverse, fallback_p);
1732
1733           gcc_assert (target_part);
1734           if (!result_part)
1735             {
1736               delete_insns_since (last);
1737               return NULL;
1738             }
1739
1740           if (result_part != target_part)
1741             emit_move_insn (target_part, result_part);
1742         }
1743
1744       if (unsignedp)
1745         {
1746           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1747              need to be zero'd out.  */
1748           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1749             {
1750               unsigned int i, total_words;
1751
1752               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1753               for (i = nwords; i < total_words; i++)
1754                 emit_move_insn
1755                   (operand_subword (target,
1756                                     backwards ? total_words - i - 1 : i,
1757                                     1, VOIDmode),
1758                    const0_rtx);
1759             }
1760           return target;
1761         }
1762
1763       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1764       target = expand_shift (LSHIFT_EXPR, mode, target,
1765                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1766       return expand_shift (RSHIFT_EXPR, mode, target,
1767                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1768     }
1769
1770   /* If OP0 is a multi-word register, narrow it to the affected word.
1771      If the region spans two words, defer to extract_split_bit_field.  */
1772   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1773     {
1774       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1775         {
1776           if (!fallback_p)
1777             return NULL_RTX;
1778           target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
1779                                             reverse);
1780           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1781         }
1782       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1783                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1784       bitnum %= BITS_PER_WORD;
1785     }
1786
1787   /* From here on we know the desired field is smaller than a word.
1788      If OP0 is a register, it too fits within a word.  */
1789   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1790   extraction_insn extv;
1791   if (!MEM_P (op0)
1792       && !reverse
1793       /* ??? We could limit the structure size to the part of OP0 that
1794          contains the field, with appropriate checks for endianness
1795          and TRULY_NOOP_TRUNCATION.  */
1796       && get_best_reg_extraction_insn (&extv, pattern,
1797                                        GET_MODE_BITSIZE (GET_MODE (op0)),
1798                                        tmode))
1799     {
1800       rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1801                                                  unsignedp, target, mode,
1802                                                  tmode);
1803       if (result)
1804         return result;
1805     }
1806
1807   /* If OP0 is a memory, try copying it to a register and seeing if a
1808      cheap register alternative is available.  */
1809   if (MEM_P (op0) & !reverse)
1810     {
1811       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1812                                         tmode))
1813         {
1814           rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1815                                                      bitnum, unsignedp,
1816                                                      target, mode,
1817                                                      tmode);
1818           if (result)
1819             return result;
1820         }
1821
1822       rtx_insn *last = get_last_insn ();
1823
1824       /* Try loading part of OP0 into a register and extracting the
1825          bitfield from that.  */
1826       unsigned HOST_WIDE_INT bitpos;
1827       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1828                                                0, 0, tmode, &bitpos);
1829       if (xop0)
1830         {
1831           xop0 = copy_to_reg (xop0);
1832           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1833                                             unsignedp, target,
1834                                             mode, tmode, reverse, false);
1835           if (result)
1836             return result;
1837           delete_insns_since (last);
1838         }
1839     }
1840
1841   if (!fallback_p)
1842     return NULL;
1843
1844   /* Find a correspondingly-sized integer field, so we can apply
1845      shifts and masks to it.  */
1846   int_mode = int_mode_for_mode (tmode);
1847   if (int_mode == BLKmode)
1848     int_mode = int_mode_for_mode (mode);
1849   /* Should probably push op0 out to memory and then do a load.  */
1850   gcc_assert (int_mode != BLKmode);
1851
1852   target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum, target,
1853                                     unsignedp, reverse);
1854
1855   /* Complex values must be reversed piecewise, so we need to undo the global
1856      reversal, convert to the complex mode and reverse again.  */
1857   if (reverse && COMPLEX_MODE_P (tmode))
1858     {
1859       target = flip_storage_order (int_mode, target);
1860       target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1861       target = flip_storage_order (tmode, target);
1862     }
1863   else
1864     target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1865
1866   return target;
1867 }
1868
1869 /* Generate code to extract a byte-field from STR_RTX
1870    containing BITSIZE bits, starting at BITNUM,
1871    and put it in TARGET if possible (if TARGET is nonzero).
1872    Regardless of TARGET, we return the rtx for where the value is placed.
1873
1874    STR_RTX is the structure containing the byte (a REG or MEM).
1875    UNSIGNEDP is nonzero if this is an unsigned bit field.
1876    MODE is the natural mode of the field value once extracted.
1877    TMODE is the mode the caller would like the value to have;
1878    but the value may be returned with type MODE instead.
1879
1880    If REVERSE is true, the extraction is to be done in reverse order.
1881
1882    If a TARGET is specified and we can store in it at no extra cost,
1883    we do so, and return TARGET.
1884    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1885    if they are equally easy.  */
1886
1887 rtx
1888 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1889                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1890                    machine_mode mode, machine_mode tmode, bool reverse)
1891 {
1892   machine_mode mode1;
1893
1894   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1895   if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1896     mode1 = GET_MODE (str_rtx);
1897   else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1898     mode1 = GET_MODE (target);
1899   else
1900     mode1 = tmode;
1901
1902   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0))
1903     {
1904       /* Extraction of a full MODE1 value can be done with a simple load.
1905          We know here that the field can be accessed with one single
1906          instruction.  For targets that support unaligned memory,
1907          an unaligned access may be necessary.  */
1908       if (bitsize == GET_MODE_BITSIZE (mode1))
1909         {
1910           rtx result = adjust_bitfield_address (str_rtx, mode1,
1911                                                 bitnum / BITS_PER_UNIT);
1912           if (reverse)
1913             result = flip_storage_order (mode1, result);
1914           gcc_assert (bitnum % BITS_PER_UNIT == 0);
1915           return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1916         }
1917
1918       str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum,
1919                                       &bitnum);
1920       gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (mode1));
1921       str_rtx = copy_to_reg (str_rtx);
1922     }
1923
1924   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1925                               target, mode, tmode, reverse, true);
1926 }
1927 \f
1928 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1929    from bit BITNUM of OP0.
1930
1931    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1932    If REVERSE is true, the extraction is to be done in reverse order.
1933
1934    If TARGET is nonzero, attempts to store the value there
1935    and return TARGET, but this is not guaranteed.
1936    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1937
1938 static rtx
1939 extract_fixed_bit_field (machine_mode tmode, rtx op0,
1940                          unsigned HOST_WIDE_INT bitsize,
1941                          unsigned HOST_WIDE_INT bitnum, rtx target,
1942                          int unsignedp, bool reverse)
1943 {
1944   if (MEM_P (op0))
1945     {
1946       machine_mode mode
1947         = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode,
1948                          MEM_VOLATILE_P (op0));
1949
1950       if (mode == VOIDmode)
1951         /* The only way this should occur is if the field spans word
1952            boundaries.  */
1953         return extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
1954                                         reverse);
1955
1956       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1957     }
1958
1959   return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum,
1960                                     target, unsignedp, reverse);
1961 }
1962
1963 /* Helper function for extract_fixed_bit_field, extracts
1964    the bit field always using the MODE of OP0.  */
1965
1966 static rtx
1967 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0,
1968                            unsigned HOST_WIDE_INT bitsize,
1969                            unsigned HOST_WIDE_INT bitnum, rtx target,
1970                            int unsignedp, bool reverse)
1971 {
1972   machine_mode mode = GET_MODE (op0);
1973   gcc_assert (SCALAR_INT_MODE_P (mode));
1974
1975   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1976      for invalid input, such as extract equivalent of f5 from
1977      gcc.dg/pr48335-2.c.  */
1978
1979   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1980     /* BITNUM is the distance between our msb and that of OP0.
1981        Convert it to the distance from the lsb.  */
1982     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1983
1984   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1985      We have reduced the big-endian case to the little-endian case.  */
1986   if (reverse)
1987     op0 = flip_storage_order (mode, op0);
1988
1989   if (unsignedp)
1990     {
1991       if (bitnum)
1992         {
1993           /* If the field does not already start at the lsb,
1994              shift it so it does.  */
1995           /* Maybe propagate the target for the shift.  */
1996           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1997           if (tmode != mode)
1998             subtarget = 0;
1999           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2000         }
2001       /* Convert the value to the desired mode.  */
2002       if (mode != tmode)
2003         op0 = convert_to_mode (tmode, op0, 1);
2004
2005       /* Unless the msb of the field used to be the msb when we shifted,
2006          mask out the upper bits.  */
2007
2008       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2009         return expand_binop (GET_MODE (op0), and_optab, op0,
2010                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
2011                              target, 1, OPTAB_LIB_WIDEN);
2012       return op0;
2013     }
2014
2015   /* To extract a signed bit-field, first shift its msb to the msb of the word,
2016      then arithmetic-shift its lsb to the lsb of the word.  */
2017   op0 = force_reg (mode, op0);
2018
2019   /* Find the narrowest integer mode that contains the field.  */
2020
2021   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
2022        mode = GET_MODE_WIDER_MODE (mode))
2023     if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
2024       {
2025         op0 = convert_to_mode (mode, op0, 0);
2026         break;
2027       }
2028
2029   if (mode != tmode)
2030     target = 0;
2031
2032   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2033     {
2034       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2035       /* Maybe propagate the target for the shift.  */
2036       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2037       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2038     }
2039
2040   return expand_shift (RSHIFT_EXPR, mode, op0,
2041                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2042 }
2043
2044 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2045    VALUE << BITPOS.  */
2046
2047 static rtx
2048 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2049               int bitpos)
2050 {
2051   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2052 }
2053 \f
2054 /* Extract a bit field that is split across two words
2055    and return an RTX for the result.
2056
2057    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2058    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2059    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2060
2061    If REVERSE is true, the extraction is to be done in reverse order.  */
2062
2063 static rtx
2064 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
2065                          unsigned HOST_WIDE_INT bitpos, int unsignedp,
2066                          bool reverse)
2067 {
2068   unsigned int unit;
2069   unsigned int bitsdone = 0;
2070   rtx result = NULL_RTX;
2071   int first = 1;
2072
2073   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2074      much at a time.  */
2075   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2076     unit = BITS_PER_WORD;
2077   else
2078     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2079
2080   while (bitsdone < bitsize)
2081     {
2082       unsigned HOST_WIDE_INT thissize;
2083       rtx part, word;
2084       unsigned HOST_WIDE_INT thispos;
2085       unsigned HOST_WIDE_INT offset;
2086
2087       offset = (bitpos + bitsdone) / unit;
2088       thispos = (bitpos + bitsdone) % unit;
2089
2090       /* THISSIZE must not overrun a word boundary.  Otherwise,
2091          extract_fixed_bit_field will call us again, and we will mutually
2092          recurse forever.  */
2093       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2094       thissize = MIN (thissize, unit - thispos);
2095
2096       /* If OP0 is a register, then handle OFFSET here.  */
2097       if (SUBREG_P (op0) || REG_P (op0))
2098         {
2099           word = operand_subword_force (op0, offset, GET_MODE (op0));
2100           offset = 0;
2101         }
2102       else
2103         word = op0;
2104
2105       /* Extract the parts in bit-counting order,
2106          whose meaning is determined by BYTES_PER_UNIT.
2107          OFFSET is in UNITs, and UNIT is in bits.  */
2108       part = extract_fixed_bit_field (word_mode, word, thissize,
2109                                       offset * unit + thispos, 0, 1, reverse);
2110       bitsdone += thissize;
2111
2112       /* Shift this part into place for the result.  */
2113       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2114         {
2115           if (bitsize != bitsdone)
2116             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2117                                  bitsize - bitsdone, 0, 1);
2118         }
2119       else
2120         {
2121           if (bitsdone != thissize)
2122             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2123                                  bitsdone - thissize, 0, 1);
2124         }
2125
2126       if (first)
2127         result = part;
2128       else
2129         /* Combine the parts with bitwise or.  This works
2130            because we extracted each part as an unsigned bit field.  */
2131         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2132                                OPTAB_LIB_WIDEN);
2133
2134       first = 0;
2135     }
2136
2137   /* Unsigned bit field: we are done.  */
2138   if (unsignedp)
2139     return result;
2140   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2141   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2142                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2143   return expand_shift (RSHIFT_EXPR, word_mode, result,
2144                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2145 }
2146 \f
2147 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2148    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2149    MODE, fill the upper bits with zeros.  Fail if the layout of either
2150    mode is unknown (as for CC modes) or if the extraction would involve
2151    unprofitable mode punning.  Return the value on success, otherwise
2152    return null.
2153
2154    This is different from gen_lowpart* in these respects:
2155
2156      - the returned value must always be considered an rvalue
2157
2158      - when MODE is wider than SRC_MODE, the extraction involves
2159        a zero extension
2160
2161      - when MODE is smaller than SRC_MODE, the extraction involves
2162        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2163
2164    In other words, this routine performs a computation, whereas the
2165    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2166    operations.  */
2167
2168 rtx
2169 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2170 {
2171   machine_mode int_mode, src_int_mode;
2172
2173   if (mode == src_mode)
2174     return src;
2175
2176   if (CONSTANT_P (src))
2177     {
2178       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2179          fails, it will happily create (subreg (symbol_ref)) or similar
2180          invalid SUBREGs.  */
2181       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2182       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2183       if (ret)
2184         return ret;
2185
2186       if (GET_MODE (src) == VOIDmode
2187           || !validate_subreg (mode, src_mode, src, byte))
2188         return NULL_RTX;
2189
2190       src = force_reg (GET_MODE (src), src);
2191       return gen_rtx_SUBREG (mode, src, byte);
2192     }
2193
2194   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2195     return NULL_RTX;
2196
2197   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2198       && MODES_TIEABLE_P (mode, src_mode))
2199     {
2200       rtx x = gen_lowpart_common (mode, src);
2201       if (x)
2202         return x;
2203     }
2204
2205   src_int_mode = int_mode_for_mode (src_mode);
2206   int_mode = int_mode_for_mode (mode);
2207   if (src_int_mode == BLKmode || int_mode == BLKmode)
2208     return NULL_RTX;
2209
2210   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2211     return NULL_RTX;
2212   if (!MODES_TIEABLE_P (int_mode, mode))
2213     return NULL_RTX;
2214
2215   src = gen_lowpart (src_int_mode, src);
2216   src = convert_modes (int_mode, src_int_mode, src, true);
2217   src = gen_lowpart (mode, src);
2218   return src;
2219 }
2220 \f
2221 /* Add INC into TARGET.  */
2222
2223 void
2224 expand_inc (rtx target, rtx inc)
2225 {
2226   rtx value = expand_binop (GET_MODE (target), add_optab,
2227                             target, inc,
2228                             target, 0, OPTAB_LIB_WIDEN);
2229   if (value != target)
2230     emit_move_insn (target, value);
2231 }
2232
2233 /* Subtract DEC from TARGET.  */
2234
2235 void
2236 expand_dec (rtx target, rtx dec)
2237 {
2238   rtx value = expand_binop (GET_MODE (target), sub_optab,
2239                             target, dec,
2240                             target, 0, OPTAB_LIB_WIDEN);
2241   if (value != target)
2242     emit_move_insn (target, value);
2243 }
2244 \f
2245 /* Output a shift instruction for expression code CODE,
2246    with SHIFTED being the rtx for the value to shift,
2247    and AMOUNT the rtx for the amount to shift by.
2248    Store the result in the rtx TARGET, if that is convenient.
2249    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2250    Return the rtx for where the value is.  */
2251
2252 static rtx
2253 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2254                 rtx amount, rtx target, int unsignedp)
2255 {
2256   rtx op1, temp = 0;
2257   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2258   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2259   optab lshift_optab = ashl_optab;
2260   optab rshift_arith_optab = ashr_optab;
2261   optab rshift_uns_optab = lshr_optab;
2262   optab lrotate_optab = rotl_optab;
2263   optab rrotate_optab = rotr_optab;
2264   machine_mode op1_mode;
2265   machine_mode scalar_mode = mode;
2266   int attempt;
2267   bool speed = optimize_insn_for_speed_p ();
2268
2269   if (VECTOR_MODE_P (mode))
2270     scalar_mode = GET_MODE_INNER (mode);
2271   op1 = amount;
2272   op1_mode = GET_MODE (op1);
2273
2274   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2275      shift amount is a vector, use the vector/vector shift patterns.  */
2276   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2277     {
2278       lshift_optab = vashl_optab;
2279       rshift_arith_optab = vashr_optab;
2280       rshift_uns_optab = vlshr_optab;
2281       lrotate_optab = vrotl_optab;
2282       rrotate_optab = vrotr_optab;
2283     }
2284
2285   /* Previously detected shift-counts computed by NEGATE_EXPR
2286      and shifted in the other direction; but that does not work
2287      on all machines.  */
2288
2289   if (SHIFT_COUNT_TRUNCATED)
2290     {
2291       if (CONST_INT_P (op1)
2292           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2293               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2294         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2295                        % GET_MODE_BITSIZE (scalar_mode));
2296       else if (GET_CODE (op1) == SUBREG
2297                && subreg_lowpart_p (op1)
2298                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2299                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2300         op1 = SUBREG_REG (op1);
2301     }
2302
2303   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2304      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2305      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2306      amount instead.  */
2307   if (rotate
2308       && CONST_INT_P (op1)
2309       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2310                    GET_MODE_BITSIZE (scalar_mode) - 1))
2311     {
2312       op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2313       left = !left;
2314       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2315     }
2316
2317   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2318      Note that this is not the case for bigger values.  For instance a rotation
2319      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2320      0x04030201 (bswapsi).  */
2321   if (rotate
2322       && CONST_INT_P (op1)
2323       && INTVAL (op1) == BITS_PER_UNIT
2324       && GET_MODE_SIZE (scalar_mode) == 2
2325       && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing)
2326     return expand_unop (HImode, bswap_optab, shifted, NULL_RTX,
2327                                   unsignedp);
2328
2329   if (op1 == const0_rtx)
2330     return shifted;
2331
2332   /* Check whether its cheaper to implement a left shift by a constant
2333      bit count by a sequence of additions.  */
2334   if (code == LSHIFT_EXPR
2335       && CONST_INT_P (op1)
2336       && INTVAL (op1) > 0
2337       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2338       && INTVAL (op1) < MAX_BITS_PER_WORD
2339       && (shift_cost (speed, mode, INTVAL (op1))
2340           > INTVAL (op1) * add_cost (speed, mode))
2341       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2342     {
2343       int i;
2344       for (i = 0; i < INTVAL (op1); i++)
2345         {
2346           temp = force_reg (mode, shifted);
2347           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2348                                   unsignedp, OPTAB_LIB_WIDEN);
2349         }
2350       return shifted;
2351     }
2352
2353   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2354     {
2355       enum optab_methods methods;
2356
2357       if (attempt == 0)
2358         methods = OPTAB_DIRECT;
2359       else if (attempt == 1)
2360         methods = OPTAB_WIDEN;
2361       else
2362         methods = OPTAB_LIB_WIDEN;
2363
2364       if (rotate)
2365         {
2366           /* Widening does not work for rotation.  */
2367           if (methods == OPTAB_WIDEN)
2368             continue;
2369           else if (methods == OPTAB_LIB_WIDEN)
2370             {
2371               /* If we have been unable to open-code this by a rotation,
2372                  do it as the IOR of two shifts.  I.e., to rotate A
2373                  by N bits, compute
2374                  (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2375                  where C is the bitsize of A.
2376
2377                  It is theoretically possible that the target machine might
2378                  not be able to perform either shift and hence we would
2379                  be making two libcalls rather than just the one for the
2380                  shift (similarly if IOR could not be done).  We will allow
2381                  this extremely unlikely lossage to avoid complicating the
2382                  code below.  */
2383
2384               rtx subtarget = target == shifted ? 0 : target;
2385               rtx new_amount, other_amount;
2386               rtx temp1;
2387
2388               new_amount = op1;
2389               if (op1 == const0_rtx)
2390                 return shifted;
2391               else if (CONST_INT_P (op1))
2392                 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
2393                                         - INTVAL (op1));
2394               else
2395                 {
2396                   other_amount
2397                     = simplify_gen_unary (NEG, GET_MODE (op1),
2398                                           op1, GET_MODE (op1));
2399                   HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2400                   other_amount
2401                     = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2402                                            gen_int_mode (mask, GET_MODE (op1)));
2403                 }
2404
2405               shifted = force_reg (mode, shifted);
2406
2407               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2408                                      mode, shifted, new_amount, 0, 1);
2409               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2410                                       mode, shifted, other_amount,
2411                                       subtarget, 1);
2412               return expand_binop (mode, ior_optab, temp, temp1, target,
2413                                    unsignedp, methods);
2414             }
2415
2416           temp = expand_binop (mode,
2417                                left ? lrotate_optab : rrotate_optab,
2418                                shifted, op1, target, unsignedp, methods);
2419         }
2420       else if (unsignedp)
2421         temp = expand_binop (mode,
2422                              left ? lshift_optab : rshift_uns_optab,
2423                              shifted, op1, target, unsignedp, methods);
2424
2425       /* Do arithmetic shifts.
2426          Also, if we are going to widen the operand, we can just as well
2427          use an arithmetic right-shift instead of a logical one.  */
2428       if (temp == 0 && ! rotate
2429           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2430         {
2431           enum optab_methods methods1 = methods;
2432
2433           /* If trying to widen a log shift to an arithmetic shift,
2434              don't accept an arithmetic shift of the same size.  */
2435           if (unsignedp)
2436             methods1 = OPTAB_MUST_WIDEN;
2437
2438           /* Arithmetic shift */
2439
2440           temp = expand_binop (mode,
2441                                left ? lshift_optab : rshift_arith_optab,
2442                                shifted, op1, target, unsignedp, methods1);
2443         }
2444
2445       /* We used to try extzv here for logical right shifts, but that was
2446          only useful for one machine, the VAX, and caused poor code
2447          generation there for lshrdi3, so the code was deleted and a
2448          define_expand for lshrsi3 was added to vax.md.  */
2449     }
2450
2451   gcc_assert (temp);
2452   return temp;
2453 }
2454
2455 /* Output a shift instruction for expression code CODE,
2456    with SHIFTED being the rtx for the value to shift,
2457    and AMOUNT the amount to shift by.
2458    Store the result in the rtx TARGET, if that is convenient.
2459    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2460    Return the rtx for where the value is.  */
2461
2462 rtx
2463 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2464               int amount, rtx target, int unsignedp)
2465 {
2466   return expand_shift_1 (code, mode,
2467                          shifted, GEN_INT (amount), target, unsignedp);
2468 }
2469
2470 /* Output a shift instruction for expression code CODE,
2471    with SHIFTED being the rtx for the value to shift,
2472    and AMOUNT the tree for the amount to shift by.
2473    Store the result in the rtx TARGET, if that is convenient.
2474    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2475    Return the rtx for where the value is.  */
2476
2477 rtx
2478 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2479                        tree amount, rtx target, int unsignedp)
2480 {
2481   return expand_shift_1 (code, mode,
2482                          shifted, expand_normal (amount), target, unsignedp);
2483 }
2484
2485 \f
2486 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2487                         const struct mult_cost *, machine_mode mode);
2488 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2489                               const struct algorithm *, enum mult_variant);
2490 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2491 static rtx extract_high_half (machine_mode, rtx);
2492 static rtx expmed_mult_highpart (machine_mode, rtx, rtx, rtx, int, int);
2493 static rtx expmed_mult_highpart_optab (machine_mode, rtx, rtx, rtx,
2494                                        int, int);
2495 /* Compute and return the best algorithm for multiplying by T.
2496    The algorithm must cost less than cost_limit
2497    If retval.cost >= COST_LIMIT, no algorithm was found and all
2498    other field of the returned struct are undefined.
2499    MODE is the machine mode of the multiplication.  */
2500
2501 static void
2502 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2503             const struct mult_cost *cost_limit, machine_mode mode)
2504 {
2505   int m;
2506   struct algorithm *alg_in, *best_alg;
2507   struct mult_cost best_cost;
2508   struct mult_cost new_limit;
2509   int op_cost, op_latency;
2510   unsigned HOST_WIDE_INT orig_t = t;
2511   unsigned HOST_WIDE_INT q;
2512   int maxm, hash_index;
2513   bool cache_hit = false;
2514   enum alg_code cache_alg = alg_zero;
2515   bool speed = optimize_insn_for_speed_p ();
2516   machine_mode imode;
2517   struct alg_hash_entry *entry_ptr;
2518
2519   /* Indicate that no algorithm is yet found.  If no algorithm
2520      is found, this value will be returned and indicate failure.  */
2521   alg_out->cost.cost = cost_limit->cost + 1;
2522   alg_out->cost.latency = cost_limit->latency + 1;
2523
2524   if (cost_limit->cost < 0
2525       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2526     return;
2527
2528   /* Be prepared for vector modes.  */
2529   imode = GET_MODE_INNER (mode);
2530
2531   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2532
2533   /* Restrict the bits of "t" to the multiplication's mode.  */
2534   t &= GET_MODE_MASK (imode);
2535
2536   /* t == 1 can be done in zero cost.  */
2537   if (t == 1)
2538     {
2539       alg_out->ops = 1;
2540       alg_out->cost.cost = 0;
2541       alg_out->cost.latency = 0;
2542       alg_out->op[0] = alg_m;
2543       return;
2544     }
2545
2546   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2547      fail now.  */
2548   if (t == 0)
2549     {
2550       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2551         return;
2552       else
2553         {
2554           alg_out->ops = 1;
2555           alg_out->cost.cost = zero_cost (speed);
2556           alg_out->cost.latency = zero_cost (speed);
2557           alg_out->op[0] = alg_zero;
2558           return;
2559         }
2560     }
2561
2562   /* We'll be needing a couple extra algorithm structures now.  */
2563
2564   alg_in = XALLOCA (struct algorithm);
2565   best_alg = XALLOCA (struct algorithm);
2566   best_cost = *cost_limit;
2567
2568   /* Compute the hash index.  */
2569   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2570
2571   /* See if we already know what to do for T.  */
2572   entry_ptr = alg_hash_entry_ptr (hash_index);
2573   if (entry_ptr->t == t
2574       && entry_ptr->mode == mode
2575       && entry_ptr->speed == speed
2576       && entry_ptr->alg != alg_unknown)
2577     {
2578       cache_alg = entry_ptr->alg;
2579
2580       if (cache_alg == alg_impossible)
2581         {
2582           /* The cache tells us that it's impossible to synthesize
2583              multiplication by T within entry_ptr->cost.  */
2584           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2585             /* COST_LIMIT is at least as restrictive as the one
2586                recorded in the hash table, in which case we have no
2587                hope of synthesizing a multiplication.  Just
2588                return.  */
2589             return;
2590
2591           /* If we get here, COST_LIMIT is less restrictive than the
2592              one recorded in the hash table, so we may be able to
2593              synthesize a multiplication.  Proceed as if we didn't
2594              have the cache entry.  */
2595         }
2596       else
2597         {
2598           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2599             /* The cached algorithm shows that this multiplication
2600                requires more cost than COST_LIMIT.  Just return.  This
2601                way, we don't clobber this cache entry with
2602                alg_impossible but retain useful information.  */
2603             return;
2604
2605           cache_hit = true;
2606
2607           switch (cache_alg)
2608             {
2609             case alg_shift:
2610               goto do_alg_shift;
2611
2612             case alg_add_t_m2:
2613             case alg_sub_t_m2:
2614               goto do_alg_addsub_t_m2;
2615
2616             case alg_add_factor:
2617             case alg_sub_factor:
2618               goto do_alg_addsub_factor;
2619
2620             case alg_add_t2_m:
2621               goto do_alg_add_t2_m;
2622
2623             case alg_sub_t2_m:
2624               goto do_alg_sub_t2_m;
2625
2626             default:
2627               gcc_unreachable ();
2628             }
2629         }
2630     }
2631
2632   /* If we have a group of zero bits at the low-order part of T, try
2633      multiplying by the remaining bits and then doing a shift.  */
2634
2635   if ((t & 1) == 0)
2636     {
2637     do_alg_shift:
2638       m = ctz_or_zero (t); /* m = number of low zero bits */
2639       if (m < maxm)
2640         {
2641           q = t >> m;
2642           /* The function expand_shift will choose between a shift and
2643              a sequence of additions, so the observed cost is given as
2644              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2645           op_cost = m * add_cost (speed, mode);
2646           if (shift_cost (speed, mode, m) < op_cost)
2647             op_cost = shift_cost (speed, mode, m);
2648           new_limit.cost = best_cost.cost - op_cost;
2649           new_limit.latency = best_cost.latency - op_cost;
2650           synth_mult (alg_in, q, &new_limit, mode);
2651
2652           alg_in->cost.cost += op_cost;
2653           alg_in->cost.latency += op_cost;
2654           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2655             {
2656               best_cost = alg_in->cost;
2657               std::swap (alg_in, best_alg);
2658               best_alg->log[best_alg->ops] = m;
2659               best_alg->op[best_alg->ops] = alg_shift;
2660             }
2661
2662           /* See if treating ORIG_T as a signed number yields a better
2663              sequence.  Try this sequence only for a negative ORIG_T
2664              as it would be useless for a non-negative ORIG_T.  */
2665           if ((HOST_WIDE_INT) orig_t < 0)
2666             {
2667               /* Shift ORIG_T as follows because a right shift of a
2668                  negative-valued signed type is implementation
2669                  defined.  */
2670               q = ~(~orig_t >> m);
2671               /* The function expand_shift will choose between a shift
2672                  and a sequence of additions, so the observed cost is
2673                  given as MIN (m * add_cost(speed, mode),
2674                  shift_cost(speed, mode, m)).  */
2675               op_cost = m * add_cost (speed, mode);
2676               if (shift_cost (speed, mode, m) < op_cost)
2677                 op_cost = shift_cost (speed, mode, m);
2678               new_limit.cost = best_cost.cost - op_cost;
2679               new_limit.latency = best_cost.latency - op_cost;
2680               synth_mult (alg_in, q, &new_limit, mode);
2681
2682               alg_in->cost.cost += op_cost;
2683               alg_in->cost.latency += op_cost;
2684               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2685                 {
2686                   best_cost = alg_in->cost;
2687                   std::swap (alg_in, best_alg);
2688                   best_alg->log[best_alg->ops] = m;
2689                   best_alg->op[best_alg->ops] = alg_shift;
2690                 }
2691             }
2692         }
2693       if (cache_hit)
2694         goto done;
2695     }
2696
2697   /* If we have an odd number, add or subtract one.  */
2698   if ((t & 1) != 0)
2699     {
2700       unsigned HOST_WIDE_INT w;
2701
2702     do_alg_addsub_t_m2:
2703       for (w = 1; (w & t) != 0; w <<= 1)
2704         ;
2705       /* If T was -1, then W will be zero after the loop.  This is another
2706          case where T ends with ...111.  Handling this with (T + 1) and
2707          subtract 1 produces slightly better code and results in algorithm
2708          selection much faster than treating it like the ...0111 case
2709          below.  */
2710       if (w == 0
2711           || (w > 2
2712               /* Reject the case where t is 3.
2713                  Thus we prefer addition in that case.  */
2714               && t != 3))
2715         {
2716           /* T ends with ...111.  Multiply by (T + 1) and subtract T.  */
2717
2718           op_cost = add_cost (speed, mode);
2719           new_limit.cost = best_cost.cost - op_cost;
2720           new_limit.latency = best_cost.latency - op_cost;
2721           synth_mult (alg_in, t + 1, &new_limit, mode);
2722
2723           alg_in->cost.cost += op_cost;
2724           alg_in->cost.latency += op_cost;
2725           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2726             {
2727               best_cost = alg_in->cost;
2728               std::swap (alg_in, best_alg);
2729               best_alg->log[best_alg->ops] = 0;
2730               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2731             }
2732         }
2733       else
2734         {
2735           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add T.  */
2736
2737           op_cost = add_cost (speed, mode);
2738           new_limit.cost = best_cost.cost - op_cost;
2739           new_limit.latency = best_cost.latency - op_cost;
2740           synth_mult (alg_in, t - 1, &new_limit, mode);
2741
2742           alg_in->cost.cost += op_cost;
2743           alg_in->cost.latency += op_cost;
2744           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2745             {
2746               best_cost = alg_in->cost;
2747               std::swap (alg_in, best_alg);
2748               best_alg->log[best_alg->ops] = 0;
2749               best_alg->op[best_alg->ops] = alg_add_t_m2;
2750             }
2751         }
2752
2753       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2754          quickly with a - a * n for some appropriate constant n.  */
2755       m = exact_log2 (-orig_t + 1);
2756       if (m >= 0 && m < maxm)
2757         {
2758           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2759           /* If the target has a cheap shift-and-subtract insn use
2760              that in preference to a shift insn followed by a sub insn.
2761              Assume that the shift-and-sub is "atomic" with a latency
2762              equal to it's cost, otherwise assume that on superscalar
2763              hardware the shift may be executed concurrently with the
2764              earlier steps in the algorithm.  */
2765           if (shiftsub1_cost (speed, mode, m) <= op_cost)
2766             {
2767               op_cost = shiftsub1_cost (speed, mode, m);
2768               op_latency = op_cost;
2769             }
2770           else
2771             op_latency = add_cost (speed, mode);
2772
2773           new_limit.cost = best_cost.cost - op_cost;
2774           new_limit.latency = best_cost.latency - op_latency;
2775           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2776                       &new_limit, mode);
2777
2778           alg_in->cost.cost += op_cost;
2779           alg_in->cost.latency += op_latency;
2780           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2781             {
2782               best_cost = alg_in->cost;
2783               std::swap (alg_in, best_alg);
2784               best_alg->log[best_alg->ops] = m;
2785               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2786             }
2787         }
2788
2789       if (cache_hit)
2790         goto done;
2791     }
2792
2793   /* Look for factors of t of the form
2794      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2795      If we find such a factor, we can multiply by t using an algorithm that
2796      multiplies by q, shift the result by m and add/subtract it to itself.
2797
2798      We search for large factors first and loop down, even if large factors
2799      are less probable than small; if we find a large factor we will find a
2800      good sequence quickly, and therefore be able to prune (by decreasing
2801      COST_LIMIT) the search.  */
2802
2803  do_alg_addsub_factor:
2804   for (m = floor_log2 (t - 1); m >= 2; m--)
2805     {
2806       unsigned HOST_WIDE_INT d;
2807
2808       d = (HOST_WIDE_INT_1U << m) + 1;
2809       if (t % d == 0 && t > d && m < maxm
2810           && (!cache_hit || cache_alg == alg_add_factor))
2811         {
2812           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2813           if (shiftadd_cost (speed, mode, m) <= op_cost)
2814             op_cost = shiftadd_cost (speed, mode, m);
2815
2816           op_latency = op_cost;
2817
2818
2819           new_limit.cost = best_cost.cost - op_cost;
2820           new_limit.latency = best_cost.latency - op_latency;
2821           synth_mult (alg_in, t / d, &new_limit, mode);
2822
2823           alg_in->cost.cost += op_cost;
2824           alg_in->cost.latency += op_latency;
2825           if (alg_in->cost.latency < op_cost)
2826             alg_in->cost.latency = op_cost;
2827           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2828             {
2829               best_cost = alg_in->cost;
2830               std::swap (alg_in, best_alg);
2831               best_alg->log[best_alg->ops] = m;
2832               best_alg->op[best_alg->ops] = alg_add_factor;
2833             }
2834           /* Other factors will have been taken care of in the recursion.  */
2835           break;
2836         }
2837
2838       d = (HOST_WIDE_INT_1U << m) - 1;
2839       if (t % d == 0 && t > d && m < maxm
2840           && (!cache_hit || cache_alg == alg_sub_factor))
2841         {
2842           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2843           if (shiftsub0_cost (speed, mode, m) <= op_cost)
2844             op_cost = shiftsub0_cost (speed, mode, m);
2845
2846           op_latency = op_cost;
2847
2848           new_limit.cost = best_cost.cost - op_cost;
2849           new_limit.latency = best_cost.latency - op_latency;
2850           synth_mult (alg_in, t / d, &new_limit, mode);
2851
2852           alg_in->cost.cost += op_cost;
2853           alg_in->cost.latency += op_latency;
2854           if (alg_in->cost.latency < op_cost)
2855             alg_in->cost.latency = op_cost;
2856           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2857             {
2858               best_cost = alg_in->cost;
2859               std::swap (alg_in, best_alg);
2860               best_alg->log[best_alg->ops] = m;
2861               best_alg->op[best_alg->ops] = alg_sub_factor;
2862             }
2863           break;
2864         }
2865     }
2866   if (cache_hit)
2867     goto done;
2868
2869   /* Try shift-and-add (load effective address) instructions,
2870      i.e. do a*3, a*5, a*9.  */
2871   if ((t & 1) != 0)
2872     {
2873     do_alg_add_t2_m:
2874       q = t - 1;
2875       m = ctz_hwi (q);
2876       if (q && m < maxm)
2877         {
2878           op_cost = shiftadd_cost (speed, mode, m);
2879           new_limit.cost = best_cost.cost - op_cost;
2880           new_limit.latency = best_cost.latency - op_cost;
2881           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2882
2883           alg_in->cost.cost += op_cost;
2884           alg_in->cost.latency += op_cost;
2885           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2886             {
2887               best_cost = alg_in->cost;
2888               std::swap (alg_in, best_alg);
2889               best_alg->log[best_alg->ops] = m;
2890               best_alg->op[best_alg->ops] = alg_add_t2_m;
2891             }
2892         }
2893       if (cache_hit)
2894         goto done;
2895
2896     do_alg_sub_t2_m:
2897       q = t + 1;
2898       m = ctz_hwi (q);
2899       if (q && m < maxm)
2900         {
2901           op_cost = shiftsub0_cost (speed, mode, m);
2902           new_limit.cost = best_cost.cost - op_cost;
2903           new_limit.latency = best_cost.latency - op_cost;
2904           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2905
2906           alg_in->cost.cost += op_cost;
2907           alg_in->cost.latency += op_cost;
2908           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2909             {
2910               best_cost = alg_in->cost;
2911               std::swap (alg_in, best_alg);
2912               best_alg->log[best_alg->ops] = m;
2913               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2914             }
2915         }
2916       if (cache_hit)
2917         goto done;
2918     }
2919
2920  done:
2921   /* If best_cost has not decreased, we have not found any algorithm.  */
2922   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2923     {
2924       /* We failed to find an algorithm.  Record alg_impossible for
2925          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2926          we are asked to find an algorithm for T within the same or
2927          lower COST_LIMIT, we can immediately return to the
2928          caller.  */
2929       entry_ptr->t = t;
2930       entry_ptr->mode = mode;
2931       entry_ptr->speed = speed;
2932       entry_ptr->alg = alg_impossible;
2933       entry_ptr->cost = *cost_limit;
2934       return;
2935     }
2936
2937   /* Cache the result.  */
2938   if (!cache_hit)
2939     {
2940       entry_ptr->t = t;
2941       entry_ptr->mode = mode;
2942       entry_ptr->speed = speed;
2943       entry_ptr->alg = best_alg->op[best_alg->ops];
2944       entry_ptr->cost.cost = best_cost.cost;
2945       entry_ptr->cost.latency = best_cost.latency;
2946     }
2947
2948   /* If we are getting a too long sequence for `struct algorithm'
2949      to record, make this search fail.  */
2950   if (best_alg->ops == MAX_BITS_PER_WORD)
2951     return;
2952
2953   /* Copy the algorithm from temporary space to the space at alg_out.
2954      We avoid using structure assignment because the majority of
2955      best_alg is normally undefined, and this is a critical function.  */
2956   alg_out->ops = best_alg->ops + 1;
2957   alg_out->cost = best_cost;
2958   memcpy (alg_out->op, best_alg->op,
2959           alg_out->ops * sizeof *alg_out->op);
2960   memcpy (alg_out->log, best_alg->log,
2961           alg_out->ops * sizeof *alg_out->log);
2962 }
2963 \f
2964 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2965    Try three variations:
2966
2967        - a shift/add sequence based on VAL itself
2968        - a shift/add sequence based on -VAL, followed by a negation
2969        - a shift/add sequence based on VAL - 1, followed by an addition.
2970
2971    Return true if the cheapest of these cost less than MULT_COST,
2972    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2973
2974 bool
2975 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
2976                      struct algorithm *alg, enum mult_variant *variant,
2977                      int mult_cost)
2978 {
2979   struct algorithm alg2;
2980   struct mult_cost limit;
2981   int op_cost;
2982   bool speed = optimize_insn_for_speed_p ();
2983
2984   /* Fail quickly for impossible bounds.  */
2985   if (mult_cost < 0)
2986     return false;
2987
2988   /* Ensure that mult_cost provides a reasonable upper bound.
2989      Any constant multiplication can be performed with less
2990      than 2 * bits additions.  */
2991   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
2992   if (mult_cost > op_cost)
2993     mult_cost = op_cost;
2994
2995   *variant = basic_variant;
2996   limit.cost = mult_cost;
2997   limit.latency = mult_cost;
2998   synth_mult (alg, val, &limit, mode);
2999
3000   /* This works only if the inverted value actually fits in an
3001      `unsigned int' */
3002   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3003     {
3004       op_cost = neg_cost (speed, mode);
3005       if (MULT_COST_LESS (&alg->cost, mult_cost))
3006         {
3007           limit.cost = alg->cost.cost - op_cost;
3008           limit.latency = alg->cost.latency - op_cost;
3009         }
3010       else
3011         {
3012           limit.cost = mult_cost - op_cost;
3013           limit.latency = mult_cost - op_cost;
3014         }
3015
3016       synth_mult (&alg2, -val, &limit, mode);
3017       alg2.cost.cost += op_cost;
3018       alg2.cost.latency += op_cost;
3019       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3020         *alg = alg2, *variant = negate_variant;
3021     }
3022
3023   /* This proves very useful for division-by-constant.  */
3024   op_cost = add_cost (speed, mode);
3025   if (MULT_COST_LESS (&alg->cost, mult_cost))
3026     {
3027       limit.cost = alg->cost.cost - op_cost;
3028       limit.latency = alg->cost.latency - op_cost;
3029     }
3030   else
3031     {
3032       limit.cost = mult_cost - op_cost;
3033       limit.latency = mult_cost - op_cost;
3034     }
3035
3036   synth_mult (&alg2, val - 1, &limit, mode);
3037   alg2.cost.cost += op_cost;
3038   alg2.cost.latency += op_cost;
3039   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3040     *alg = alg2, *variant = add_variant;
3041
3042   return MULT_COST_LESS (&alg->cost, mult_cost);
3043 }
3044
3045 /* A subroutine of expand_mult, used for constant multiplications.
3046    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3047    convenient.  Use the shift/add sequence described by ALG and apply
3048    the final fixup specified by VARIANT.  */
3049
3050 static rtx
3051 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3052                    rtx target, const struct algorithm *alg,
3053                    enum mult_variant variant)
3054 {
3055   unsigned HOST_WIDE_INT val_so_far;
3056   rtx_insn *insn;
3057   rtx accum, tem;
3058   int opno;
3059   machine_mode nmode;
3060
3061   /* Avoid referencing memory over and over and invalid sharing
3062      on SUBREGs.  */
3063   op0 = force_reg (mode, op0);
3064
3065   /* ACCUM starts out either as OP0 or as a zero, depending on
3066      the first operation.  */
3067
3068   if (alg->op[0] == alg_zero)
3069     {
3070       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3071       val_so_far = 0;
3072     }
3073   else if (alg->op[0] == alg_m)
3074     {
3075       accum = copy_to_mode_reg (mode, op0);
3076       val_so_far = 1;
3077     }
3078   else
3079     gcc_unreachable ();
3080
3081   for (opno = 1; opno < alg->ops; opno++)
3082     {
3083       int log = alg->log[opno];
3084       rtx shift_subtarget = optimize ? 0 : accum;
3085       rtx add_target
3086         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3087            && !optimize)
3088           ? target : 0;
3089       rtx accum_target = optimize ? 0 : accum;
3090       rtx accum_inner;
3091
3092       switch (alg->op[opno])
3093         {
3094         case alg_shift:
3095           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3096           /* REG_EQUAL note will be attached to the following insn.  */
3097           emit_move_insn (accum, tem);
3098           val_so_far <<= log;
3099           break;
3100
3101         case alg_add_t_m2:
3102           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3103           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3104                                  add_target ? add_target : accum_target);
3105           val_so_far += HOST_WIDE_INT_1U << log;
3106           break;
3107
3108         case alg_sub_t_m2:
3109           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3110           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3111                                  add_target ? add_target : accum_target);
3112           val_so_far -= HOST_WIDE_INT_1U << log;
3113           break;
3114
3115         case alg_add_t2_m:
3116           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3117                                 log, shift_subtarget, 0);
3118           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3119                                  add_target ? add_target : accum_target);
3120           val_so_far = (val_so_far << log) + 1;
3121           break;
3122
3123         case alg_sub_t2_m:
3124           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3125                                 log, shift_subtarget, 0);
3126           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3127                                  add_target ? add_target : accum_target);
3128           val_so_far = (val_so_far << log) - 1;
3129           break;
3130
3131         case alg_add_factor:
3132           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3133           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3134                                  add_target ? add_target : accum_target);
3135           val_so_far += val_so_far << log;
3136           break;
3137
3138         case alg_sub_factor:
3139           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3140           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3141                                  (add_target
3142                                   ? add_target : (optimize ? 0 : tem)));
3143           val_so_far = (val_so_far << log) - val_so_far;
3144           break;
3145
3146         default:
3147           gcc_unreachable ();
3148         }
3149
3150       if (SCALAR_INT_MODE_P (mode))
3151         {
3152           /* Write a REG_EQUAL note on the last insn so that we can cse
3153              multiplication sequences.  Note that if ACCUM is a SUBREG,
3154              we've set the inner register and must properly indicate that.  */
3155           tem = op0, nmode = mode;
3156           accum_inner = accum;
3157           if (GET_CODE (accum) == SUBREG)
3158             {
3159               accum_inner = SUBREG_REG (accum);
3160               nmode = GET_MODE (accum_inner);
3161               tem = gen_lowpart (nmode, op0);
3162             }
3163
3164           insn = get_last_insn ();
3165           set_dst_reg_note (insn, REG_EQUAL,
3166                             gen_rtx_MULT (nmode, tem,
3167                                           gen_int_mode (val_so_far, nmode)),
3168                             accum_inner);
3169         }
3170     }
3171
3172   if (variant == negate_variant)
3173     {
3174       val_so_far = -val_so_far;
3175       accum = expand_unop (mode, neg_optab, accum, target, 0);
3176     }
3177   else if (variant == add_variant)
3178     {
3179       val_so_far = val_so_far + 1;
3180       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3181     }
3182
3183   /* Compare only the bits of val and val_so_far that are significant
3184      in the result mode, to avoid sign-/zero-extension confusion.  */
3185   nmode = GET_MODE_INNER (mode);
3186   val &= GET_MODE_MASK (nmode);
3187   val_so_far &= GET_MODE_MASK (nmode);
3188   gcc_assert (val == (HOST_WIDE_INT) val_so_far);
3189
3190   return accum;
3191 }
3192
3193 /* Perform a multiplication and return an rtx for the result.
3194    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3195    TARGET is a suggestion for where to store the result (an rtx).
3196
3197    We check specially for a constant integer as OP1.
3198    If you want this check for OP0 as well, then before calling
3199    you should swap the two operands if OP0 would be constant.  */
3200
3201 rtx
3202 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3203              int unsignedp)
3204 {
3205   enum mult_variant variant;
3206   struct algorithm algorithm;
3207   rtx scalar_op1;
3208   int max_cost;
3209   bool speed = optimize_insn_for_speed_p ();
3210   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3211
3212   if (CONSTANT_P (op0))
3213     std::swap (op0, op1);
3214
3215   /* For vectors, there are several simplifications that can be made if
3216      all elements of the vector constant are identical.  */
3217   scalar_op1 = unwrap_const_vec_duplicate (op1);
3218
3219   if (INTEGRAL_MODE_P (mode))
3220     {
3221       rtx fake_reg;
3222       HOST_WIDE_INT coeff;
3223       bool is_neg;
3224       int mode_bitsize;
3225
3226       if (op1 == CONST0_RTX (mode))
3227         return op1;
3228       if (op1 == CONST1_RTX (mode))
3229         return op0;
3230       if (op1 == CONSTM1_RTX (mode))
3231         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3232                             op0, target, 0);
3233
3234       if (do_trapv)
3235         goto skip_synth;
3236
3237       /* If mode is integer vector mode, check if the backend supports
3238          vector lshift (by scalar or vector) at all.  If not, we can't use
3239          synthetized multiply.  */
3240       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3241           && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3242           && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3243         goto skip_synth;
3244
3245       /* These are the operations that are potentially turned into
3246          a sequence of shifts and additions.  */
3247       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3248
3249       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3250          less than or equal in size to `unsigned int' this doesn't matter.
3251          If the mode is larger than `unsigned int', then synth_mult works
3252          only if the constant value exactly fits in an `unsigned int' without
3253          any truncation.  This means that multiplying by negative values does
3254          not work; results are off by 2^32 on a 32 bit machine.  */
3255       if (CONST_INT_P (scalar_op1))
3256         {
3257           coeff = INTVAL (scalar_op1);
3258           is_neg = coeff < 0;
3259         }
3260 #if TARGET_SUPPORTS_WIDE_INT
3261       else if (CONST_WIDE_INT_P (scalar_op1))
3262 #else
3263       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3264 #endif
3265         {
3266           int shift = wi::exact_log2 (std::make_pair (scalar_op1, mode));
3267           /* Perfect power of 2 (other than 1, which is handled above).  */
3268           if (shift > 0)
3269             return expand_shift (LSHIFT_EXPR, mode, op0,
3270                                  shift, target, unsignedp);
3271           else
3272             goto skip_synth;
3273         }
3274       else
3275         goto skip_synth;
3276
3277       /* We used to test optimize here, on the grounds that it's better to
3278          produce a smaller program when -O is not used.  But this causes
3279          such a terrible slowdown sometimes that it seems better to always
3280          use synth_mult.  */
3281
3282       /* Special case powers of two.  */
3283       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3284           && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3285         return expand_shift (LSHIFT_EXPR, mode, op0,
3286                              floor_log2 (coeff), target, unsignedp);
3287
3288       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3289
3290       /* Attempt to handle multiplication of DImode values by negative
3291          coefficients, by performing the multiplication by a positive
3292          multiplier and then inverting the result.  */
3293       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3294         {
3295           /* Its safe to use -coeff even for INT_MIN, as the
3296              result is interpreted as an unsigned coefficient.
3297              Exclude cost of op0 from max_cost to match the cost
3298              calculation of the synth_mult.  */
3299           coeff = -(unsigned HOST_WIDE_INT) coeff;
3300           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3301                                     mode, speed)
3302                       - neg_cost (speed, mode));
3303           if (max_cost <= 0)
3304             goto skip_synth;
3305
3306           /* Special case powers of two.  */
3307           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3308             {
3309               rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3310                                        floor_log2 (coeff), target, unsignedp);
3311               return expand_unop (mode, neg_optab, temp, target, 0);
3312             }
3313
3314           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3315                                    max_cost))
3316             {
3317               rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3318                                             &algorithm, variant);
3319               return expand_unop (mode, neg_optab, temp, target, 0);
3320             }
3321           goto skip_synth;
3322         }
3323
3324       /* Exclude cost of op0 from max_cost to match the cost
3325          calculation of the synth_mult.  */
3326       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3327       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3328         return expand_mult_const (mode, op0, coeff, target,
3329                                   &algorithm, variant);
3330     }
3331  skip_synth:
3332
3333   /* Expand x*2.0 as x+x.  */
3334   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3335       && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3336     {
3337       op0 = force_reg (GET_MODE (op0), op0);
3338       return expand_binop (mode, add_optab, op0, op0,
3339                            target, unsignedp, OPTAB_LIB_WIDEN);
3340     }
3341
3342   /* This used to use umul_optab if unsigned, but for non-widening multiply
3343      there is no difference between signed and unsigned.  */
3344   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3345                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3346   gcc_assert (op0);
3347   return op0;
3348 }
3349
3350 /* Return a cost estimate for multiplying a register by the given
3351    COEFFicient in the given MODE and SPEED.  */
3352
3353 int
3354 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3355 {
3356   int max_cost;
3357   struct algorithm algorithm;
3358   enum mult_variant variant;
3359
3360   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3361   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3362                            mode, speed);
3363   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3364     return algorithm.cost.cost;
3365   else
3366     return max_cost;
3367 }
3368
3369 /* Perform a widening multiplication and return an rtx for the result.
3370    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3371    TARGET is a suggestion for where to store the result (an rtx).
3372    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3373    or smul_widen_optab.
3374
3375    We check specially for a constant integer as OP1, comparing the
3376    cost of a widening multiply against the cost of a sequence of shifts
3377    and adds.  */
3378
3379 rtx
3380 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3381                       int unsignedp, optab this_optab)
3382 {
3383   bool speed = optimize_insn_for_speed_p ();
3384   rtx cop1;
3385
3386   if (CONST_INT_P (op1)
3387       && GET_MODE (op0) != VOIDmode
3388       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3389                                 this_optab == umul_widen_optab))
3390       && CONST_INT_P (cop1)
3391       && (INTVAL (cop1) >= 0
3392           || HWI_COMPUTABLE_MODE_P (mode)))
3393     {
3394       HOST_WIDE_INT coeff = INTVAL (cop1);
3395       int max_cost;
3396       enum mult_variant variant;
3397       struct algorithm algorithm;
3398
3399       if (coeff == 0)
3400         return CONST0_RTX (mode);
3401
3402       /* Special case powers of two.  */
3403       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3404         {
3405           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3406           return expand_shift (LSHIFT_EXPR, mode, op0,
3407                                floor_log2 (coeff), target, unsignedp);
3408         }
3409
3410       /* Exclude cost of op0 from max_cost to match the cost
3411          calculation of the synth_mult.  */
3412       max_cost = mul_widen_cost (speed, mode);
3413       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3414                                max_cost))
3415         {
3416           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3417           return expand_mult_const (mode, op0, coeff, target,
3418                                     &algorithm, variant);
3419         }
3420     }
3421   return expand_binop (mode, this_optab, op0, op1, target,
3422                        unsignedp, OPTAB_LIB_WIDEN);
3423 }
3424 \f
3425 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3426    replace division by D, and put the least significant N bits of the result
3427    in *MULTIPLIER_PTR and return the most significant bit.
3428
3429    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3430    needed precision is in PRECISION (should be <= N).
3431
3432    PRECISION should be as small as possible so this function can choose
3433    multiplier more freely.
3434
3435    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3436    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3437
3438    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3439    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3440
3441 unsigned HOST_WIDE_INT
3442 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3443                    unsigned HOST_WIDE_INT *multiplier_ptr,
3444                    int *post_shift_ptr, int *lgup_ptr)
3445 {
3446   int lgup, post_shift;
3447   int pow, pow2;
3448
3449   /* lgup = ceil(log2(divisor)); */
3450   lgup = ceil_log2 (d);
3451
3452   gcc_assert (lgup <= n);
3453
3454   pow = n + lgup;
3455   pow2 = n + lgup - precision;
3456
3457   /* mlow = 2^(N + lgup)/d */
3458   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3459   wide_int mlow = wi::udiv_trunc (val, d);
3460
3461   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3462   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3463   wide_int mhigh = wi::udiv_trunc (val, d);
3464
3465   /* If precision == N, then mlow, mhigh exceed 2^N
3466      (but they do not exceed 2^(N+1)).  */
3467
3468   /* Reduce to lowest terms.  */
3469   for (post_shift = lgup; post_shift > 0; post_shift--)
3470     {
3471       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3472                                                        HOST_BITS_PER_WIDE_INT);
3473       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3474                                                        HOST_BITS_PER_WIDE_INT);
3475       if (ml_lo >= mh_lo)
3476         break;
3477
3478       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3479       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3480     }
3481
3482   *post_shift_ptr = post_shift;
3483   *lgup_ptr = lgup;
3484   if (n < HOST_BITS_PER_WIDE_INT)
3485     {
3486       unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3487       *multiplier_ptr = mhigh.to_uhwi () & mask;
3488       return mhigh.to_uhwi () >= mask;
3489     }
3490   else
3491     {
3492       *multiplier_ptr = mhigh.to_uhwi ();
3493       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3494     }
3495 }
3496
3497 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3498    congruent to 1 (mod 2**N).  */
3499
3500 static unsigned HOST_WIDE_INT
3501 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3502 {
3503   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3504
3505   /* The algorithm notes that the choice y = x satisfies
3506      x*y == 1 mod 2^3, since x is assumed odd.
3507      Each iteration doubles the number of bits of significance in y.  */
3508
3509   unsigned HOST_WIDE_INT mask;
3510   unsigned HOST_WIDE_INT y = x;
3511   int nbit = 3;
3512
3513   mask = (n == HOST_BITS_PER_WIDE_INT
3514           ? HOST_WIDE_INT_M1U
3515           : (HOST_WIDE_INT_1U << n) - 1);
3516
3517   while (nbit < n)
3518     {
3519       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3520       nbit *= 2;
3521     }
3522   return y;
3523 }
3524
3525 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3526    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3527    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3528    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3529    become signed.
3530
3531    The result is put in TARGET if that is convenient.
3532
3533    MODE is the mode of operation.  */
3534
3535 rtx
3536 expand_mult_highpart_adjust (machine_mode mode, rtx adj_operand, rtx op0,
3537                              rtx op1, rtx target, int unsignedp)
3538 {
3539   rtx tem;
3540   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3541
3542   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3543                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3544   tem = expand_and (mode, tem, op1, NULL_RTX);
3545   adj_operand
3546     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3547                      adj_operand);
3548
3549   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3550                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3551   tem = expand_and (mode, tem, op0, NULL_RTX);
3552   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3553                           target);
3554
3555   return target;
3556 }
3557
3558 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3559
3560 static rtx
3561 extract_high_half (machine_mode mode, rtx op)
3562 {
3563   machine_mode wider_mode;
3564
3565   if (mode == word_mode)
3566     return gen_highpart (mode, op);
3567
3568   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3569
3570   wider_mode = GET_MODE_WIDER_MODE (mode);
3571   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3572                      GET_MODE_BITSIZE (mode), 0, 1);
3573   return convert_modes (mode, wider_mode, op, 0);
3574 }
3575
3576 /* Like expmed_mult_highpart, but only consider using a multiplication
3577    optab.  OP1 is an rtx for the constant operand.  */
3578
3579 static rtx
3580 expmed_mult_highpart_optab (machine_mode mode, rtx op0, rtx op1,
3581                             rtx target, int unsignedp, int max_cost)
3582 {
3583   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3584   machine_mode wider_mode;
3585   optab moptab;
3586   rtx tem;
3587   int size;
3588   bool speed = optimize_insn_for_speed_p ();
3589
3590   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3591
3592   wider_mode = GET_MODE_WIDER_MODE (mode);
3593   size = GET_MODE_BITSIZE (mode);
3594
3595   /* Firstly, try using a multiplication insn that only generates the needed
3596      high part of the product, and in the sign flavor of unsignedp.  */
3597   if (mul_highpart_cost (speed, mode) < max_cost)
3598     {
3599       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3600       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3601                           unsignedp, OPTAB_DIRECT);
3602       if (tem)
3603         return tem;
3604     }
3605
3606   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3607      Need to adjust the result after the multiplication.  */
3608   if (size - 1 < BITS_PER_WORD
3609       && (mul_highpart_cost (speed, mode)
3610           + 2 * shift_cost (speed, mode, size-1)
3611           + 4 * add_cost (speed, mode) < max_cost))
3612     {
3613       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3614       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3615                           unsignedp, OPTAB_DIRECT);
3616       if (tem)
3617         /* We used the wrong signedness.  Adjust the result.  */
3618         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3619                                             tem, unsignedp);
3620     }
3621
3622   /* Try widening multiplication.  */
3623   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3624   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3625       && mul_widen_cost (speed, wider_mode) < max_cost)
3626     {
3627       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3628                           unsignedp, OPTAB_WIDEN);
3629       if (tem)
3630         return extract_high_half (mode, tem);
3631     }
3632
3633   /* Try widening the mode and perform a non-widening multiplication.  */
3634   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3635       && size - 1 < BITS_PER_WORD
3636       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3637           < max_cost))
3638     {
3639       rtx_insn *insns;
3640       rtx wop0, wop1;
3641
3642       /* We need to widen the operands, for example to ensure the
3643          constant multiplier is correctly sign or zero extended.
3644          Use a sequence to clean-up any instructions emitted by
3645          the conversions if things don't work out.  */
3646       start_sequence ();
3647       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3648       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3649       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3650                           unsignedp, OPTAB_WIDEN);
3651       insns = get_insns ();
3652       end_sequence ();
3653
3654       if (tem)
3655         {
3656           emit_insn (insns);
3657           return extract_high_half (mode, tem);
3658         }
3659     }
3660
3661   /* Try widening multiplication of opposite signedness, and adjust.  */
3662   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3663   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3664       && size - 1 < BITS_PER_WORD
3665       && (mul_widen_cost (speed, wider_mode)
3666           + 2 * shift_cost (speed, mode, size-1)
3667           + 4 * add_cost (speed, mode) < max_cost))
3668     {
3669       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3670                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3671       if (tem != 0)
3672         {
3673           tem = extract_high_half (mode, tem);
3674           /* We used the wrong signedness.  Adjust the result.  */
3675           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3676                                               target, unsignedp);
3677         }
3678     }
3679
3680   return 0;
3681 }
3682
3683 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3684    putting the high half of the result in TARGET if that is convenient,
3685    and return where the result is.  If the operation can not be performed,
3686    0 is returned.
3687
3688    MODE is the mode of operation and result.
3689
3690    UNSIGNEDP nonzero means unsigned multiply.
3691
3692    MAX_COST is the total allowed cost for the expanded RTL.  */
3693
3694 static rtx
3695 expmed_mult_highpart (machine_mode mode, rtx op0, rtx op1,
3696                       rtx target, int unsignedp, int max_cost)
3697 {
3698   machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3699   unsigned HOST_WIDE_INT cnst1;
3700   int extra_cost;
3701   bool sign_adjust = false;
3702   enum mult_variant variant;
3703   struct algorithm alg;
3704   rtx tem;
3705   bool speed = optimize_insn_for_speed_p ();
3706
3707   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3708   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3709   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3710
3711   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3712
3713   /* We can't optimize modes wider than BITS_PER_WORD.
3714      ??? We might be able to perform double-word arithmetic if
3715      mode == word_mode, however all the cost calculations in
3716      synth_mult etc. assume single-word operations.  */
3717   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3718     return expmed_mult_highpart_optab (mode, op0, op1, target,
3719                                        unsignedp, max_cost);
3720
3721   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3722
3723   /* Check whether we try to multiply by a negative constant.  */
3724   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3725     {
3726       sign_adjust = true;
3727       extra_cost += add_cost (speed, mode);
3728     }
3729
3730   /* See whether shift/add multiplication is cheap enough.  */
3731   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3732                            max_cost - extra_cost))
3733     {
3734       /* See whether the specialized multiplication optabs are
3735          cheaper than the shift/add version.  */
3736       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3737                                         alg.cost.cost + extra_cost);
3738       if (tem)
3739         return tem;
3740
3741       tem = convert_to_mode (wider_mode, op0, unsignedp);
3742       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3743       tem = extract_high_half (mode, tem);
3744
3745       /* Adjust result for signedness.  */
3746       if (sign_adjust)
3747         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3748
3749       return tem;
3750     }
3751   return expmed_mult_highpart_optab (mode, op0, op1, target,
3752                                      unsignedp, max_cost);
3753 }
3754
3755
3756 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3757
3758 static rtx
3759 expand_smod_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3760 {
3761   rtx result, temp, shift;
3762   rtx_code_label *label;
3763   int logd;
3764   int prec = GET_MODE_PRECISION (mode);
3765
3766   logd = floor_log2 (d);
3767   result = gen_reg_rtx (mode);
3768
3769   /* Avoid conditional branches when they're expensive.  */
3770   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3771       && optimize_insn_for_speed_p ())
3772     {
3773       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3774                                       mode, 0, -1);
3775       if (signmask)
3776         {
3777           HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
3778           signmask = force_reg (mode, signmask);
3779           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3780
3781           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3782              which instruction sequence to use.  If logical right shifts
3783              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3784              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3785
3786           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3787           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3788               || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
3789                   > COSTS_N_INSNS (2)))
3790             {
3791               temp = expand_binop (mode, xor_optab, op0, signmask,
3792                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3793               temp = expand_binop (mode, sub_optab, temp, signmask,
3794                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3795               temp = expand_binop (mode, and_optab, temp,
3796                                    gen_int_mode (masklow, mode),
3797                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3798               temp = expand_binop (mode, xor_optab, temp, signmask,
3799                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3800               temp = expand_binop (mode, sub_optab, temp, signmask,
3801                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3802             }
3803           else
3804             {
3805               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3806                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3807               signmask = force_reg (mode, signmask);
3808
3809               temp = expand_binop (mode, add_optab, op0, signmask,
3810                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3811               temp = expand_binop (mode, and_optab, temp,
3812                                    gen_int_mode (masklow, mode),
3813                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3814               temp = expand_binop (mode, sub_optab, temp, signmask,
3815                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3816             }
3817           return temp;
3818         }
3819     }
3820
3821   /* Mask contains the mode's signbit and the significant bits of the
3822      modulus.  By including the signbit in the operation, many targets
3823      can avoid an explicit compare operation in the following comparison
3824      against zero.  */
3825   wide_int mask = wi::mask (logd, false, prec);
3826   mask = wi::set_bit (mask, prec - 1);
3827
3828   temp = expand_binop (mode, and_optab, op0,
3829                        immed_wide_int_const (mask, mode),
3830                        result, 1, OPTAB_LIB_WIDEN);
3831   if (temp != result)
3832     emit_move_insn (result, temp);
3833
3834   label = gen_label_rtx ();
3835   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3836
3837   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3838                        0, OPTAB_LIB_WIDEN);
3839
3840   mask = wi::mask (logd, true, prec);
3841   temp = expand_binop (mode, ior_optab, temp,
3842                        immed_wide_int_const (mask, mode),
3843                        result, 1, OPTAB_LIB_WIDEN);
3844   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3845                        0, OPTAB_LIB_WIDEN);
3846   if (temp != result)
3847     emit_move_insn (result, temp);
3848   emit_label (label);
3849   return result;
3850 }
3851
3852 /* Expand signed division of OP0 by a power of two D in mode MODE.
3853    This routine is only called for positive values of D.  */
3854
3855 static rtx
3856 expand_sdiv_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3857 {
3858   rtx temp;
3859   rtx_code_label *label;
3860   int logd;
3861
3862   logd = floor_log2 (d);
3863
3864   if (d == 2
3865       && BRANCH_COST (optimize_insn_for_speed_p (),
3866                       false) >= 1)
3867     {
3868       temp = gen_reg_rtx (mode);
3869       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3870       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3871                            0, OPTAB_LIB_WIDEN);
3872       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3873     }
3874
3875   if (HAVE_conditional_move
3876       && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
3877     {
3878       rtx temp2;
3879
3880       start_sequence ();
3881       temp2 = copy_to_mode_reg (mode, op0);
3882       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3883                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3884       temp = force_reg (mode, temp);
3885
3886       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3887       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3888                                      mode, temp, temp2, mode, 0);
3889       if (temp2)
3890         {
3891           rtx_insn *seq = get_insns ();
3892           end_sequence ();
3893           emit_insn (seq);
3894           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3895         }
3896       end_sequence ();
3897     }
3898
3899   if (BRANCH_COST (optimize_insn_for_speed_p (),
3900                    false) >= 2)
3901     {
3902       int ushift = GET_MODE_BITSIZE (mode) - logd;
3903
3904       temp = gen_reg_rtx (mode);
3905       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3906       if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
3907           || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3908              > COSTS_N_INSNS (1))
3909         temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3910                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3911       else
3912         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3913                              ushift, NULL_RTX, 1);
3914       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3915                            0, OPTAB_LIB_WIDEN);
3916       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3917     }
3918
3919   label = gen_label_rtx ();
3920   temp = copy_to_mode_reg (mode, op0);
3921   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3922   expand_inc (temp, gen_int_mode (d - 1, mode));
3923   emit_label (label);
3924   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3925 }
3926 \f
3927 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3928    if that is convenient, and returning where the result is.
3929    You may request either the quotient or the remainder as the result;
3930    specify REM_FLAG nonzero to get the remainder.
3931
3932    CODE is the expression code for which kind of division this is;
3933    it controls how rounding is done.  MODE is the machine mode to use.
3934    UNSIGNEDP nonzero means do unsigned division.  */
3935
3936 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3937    and then correct it by or'ing in missing high bits
3938    if result of ANDI is nonzero.
3939    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3940    This could optimize to a bfexts instruction.
3941    But C doesn't use these operations, so their optimizations are
3942    left for later.  */
3943 /* ??? For modulo, we don't actually need the highpart of the first product,
3944    the low part will do nicely.  And for small divisors, the second multiply
3945    can also be a low-part only multiply or even be completely left out.
3946    E.g. to calculate the remainder of a division by 3 with a 32 bit
3947    multiply, multiply with 0x55555556 and extract the upper two bits;
3948    the result is exact for inputs up to 0x1fffffff.
3949    The input range can be reduced by using cross-sum rules.
3950    For odd divisors >= 3, the following table gives right shift counts
3951    so that if a number is shifted by an integer multiple of the given
3952    amount, the remainder stays the same:
3953    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3954    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3955    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3956    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3957    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3958
3959    Cross-sum rules for even numbers can be derived by leaving as many bits
3960    to the right alone as the divisor has zeros to the right.
3961    E.g. if x is an unsigned 32 bit number:
3962    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3963    */
3964
3965 rtx
3966 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
3967                rtx op0, rtx op1, rtx target, int unsignedp)
3968 {
3969   machine_mode compute_mode;
3970   rtx tquotient;
3971   rtx quotient = 0, remainder = 0;
3972   rtx_insn *last;
3973   int size;
3974   rtx_insn *insn;
3975   optab optab1, optab2;
3976   int op1_is_constant, op1_is_pow2 = 0;
3977   int max_cost, extra_cost;
3978   static HOST_WIDE_INT last_div_const = 0;
3979   bool speed = optimize_insn_for_speed_p ();
3980
3981   op1_is_constant = CONST_INT_P (op1);
3982   if (op1_is_constant)
3983     {
3984       unsigned HOST_WIDE_INT ext_op1 = UINTVAL (op1);
3985       if (unsignedp)
3986         ext_op1 &= GET_MODE_MASK (mode);
3987       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3988                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3989     }
3990
3991   /*
3992      This is the structure of expand_divmod:
3993
3994      First comes code to fix up the operands so we can perform the operations
3995      correctly and efficiently.
3996
3997      Second comes a switch statement with code specific for each rounding mode.
3998      For some special operands this code emits all RTL for the desired
3999      operation, for other cases, it generates only a quotient and stores it in
4000      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
4001      to indicate that it has not done anything.
4002
4003      Last comes code that finishes the operation.  If QUOTIENT is set and
4004      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
4005      QUOTIENT is not set, it is computed using trunc rounding.
4006
4007      We try to generate special code for division and remainder when OP1 is a
4008      constant.  If |OP1| = 2**n we can use shifts and some other fast
4009      operations.  For other values of OP1, we compute a carefully selected
4010      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4011      by m.
4012
4013      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4014      half of the product.  Different strategies for generating the product are
4015      implemented in expmed_mult_highpart.
4016
4017      If what we actually want is the remainder, we generate that by another
4018      by-constant multiplication and a subtraction.  */
4019
4020   /* We shouldn't be called with OP1 == const1_rtx, but some of the
4021      code below will malfunction if we are, so check here and handle
4022      the special case if so.  */
4023   if (op1 == const1_rtx)
4024     return rem_flag ? const0_rtx : op0;
4025
4026     /* When dividing by -1, we could get an overflow.
4027      negv_optab can handle overflows.  */
4028   if (! unsignedp && op1 == constm1_rtx)
4029     {
4030       if (rem_flag)
4031         return const0_rtx;
4032       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4033                           ? negv_optab : neg_optab, op0, target, 0);
4034     }
4035
4036   if (target
4037       /* Don't use the function value register as a target
4038          since we have to read it as well as write it,
4039          and function-inlining gets confused by this.  */
4040       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4041           /* Don't clobber an operand while doing a multi-step calculation.  */
4042           || ((rem_flag || op1_is_constant)
4043               && (reg_mentioned_p (target, op0)
4044                   || (MEM_P (op0) && MEM_P (target))))
4045           || reg_mentioned_p (target, op1)
4046           || (MEM_P (op1) && MEM_P (target))))
4047     target = 0;
4048
4049   /* Get the mode in which to perform this computation.  Normally it will
4050      be MODE, but sometimes we can't do the desired operation in MODE.
4051      If so, pick a wider mode in which we can do the operation.  Convert
4052      to that mode at the start to avoid repeated conversions.
4053
4054      First see what operations we need.  These depend on the expression
4055      we are evaluating.  (We assume that divxx3 insns exist under the
4056      same conditions that modxx3 insns and that these insns don't normally
4057      fail.  If these assumptions are not correct, we may generate less
4058      efficient code in some cases.)
4059
4060      Then see if we find a mode in which we can open-code that operation
4061      (either a division, modulus, or shift).  Finally, check for the smallest
4062      mode for which we can do the operation with a library call.  */
4063
4064   /* We might want to refine this now that we have division-by-constant
4065      optimization.  Since expmed_mult_highpart tries so many variants, it is
4066      not straightforward to generalize this.  Maybe we should make an array
4067      of possible modes in init_expmed?  Save this for GCC 2.7.  */
4068
4069   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
4070             ? (unsignedp ? lshr_optab : ashr_optab)
4071             : (unsignedp ? udiv_optab : sdiv_optab));
4072   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
4073             ? optab1
4074             : (unsignedp ? udivmod_optab : sdivmod_optab));
4075
4076   for (compute_mode = mode; compute_mode != VOIDmode;
4077        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4078     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4079         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4080       break;
4081
4082   if (compute_mode == VOIDmode)
4083     for (compute_mode = mode; compute_mode != VOIDmode;
4084          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4085       if (optab_libfunc (optab1, compute_mode)
4086           || optab_libfunc (optab2, compute_mode))
4087         break;
4088
4089   /* If we still couldn't find a mode, use MODE, but expand_binop will
4090      probably die.  */
4091   if (compute_mode == VOIDmode)
4092     compute_mode = mode;
4093
4094   if (target && GET_MODE (target) == compute_mode)
4095     tquotient = target;
4096   else
4097     tquotient = gen_reg_rtx (compute_mode);
4098
4099   size = GET_MODE_BITSIZE (compute_mode);
4100 #if 0
4101   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4102      (mode), and thereby get better code when OP1 is a constant.  Do that
4103      later.  It will require going over all usages of SIZE below.  */
4104   size = GET_MODE_BITSIZE (mode);
4105 #endif
4106
4107   /* Only deduct something for a REM if the last divide done was
4108      for a different constant.   Then set the constant of the last
4109      divide.  */
4110   max_cost = (unsignedp
4111               ? udiv_cost (speed, compute_mode)
4112               : sdiv_cost (speed, compute_mode));
4113   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4114                      && INTVAL (op1) == last_div_const))
4115     max_cost -= (mul_cost (speed, compute_mode)
4116                  + add_cost (speed, compute_mode));
4117
4118   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4119
4120   /* Now convert to the best mode to use.  */
4121   if (compute_mode != mode)
4122     {
4123       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4124       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4125
4126       /* convert_modes may have placed op1 into a register, so we
4127          must recompute the following.  */
4128       op1_is_constant = CONST_INT_P (op1);
4129       op1_is_pow2 = (op1_is_constant
4130                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4131                           || (! unsignedp
4132                               && EXACT_POWER_OF_2_OR_ZERO_P (-UINTVAL (op1))))));
4133     }
4134
4135   /* If one of the operands is a volatile MEM, copy it into a register.  */
4136
4137   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4138     op0 = force_reg (compute_mode, op0);
4139   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4140     op1 = force_reg (compute_mode, op1);
4141
4142   /* If we need the remainder or if OP1 is constant, we need to
4143      put OP0 in a register in case it has any queued subexpressions.  */
4144   if (rem_flag || op1_is_constant)
4145     op0 = force_reg (compute_mode, op0);
4146
4147   last = get_last_insn ();
4148
4149   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4150   if (unsignedp)
4151     {
4152       if (code == FLOOR_DIV_EXPR)
4153         code = TRUNC_DIV_EXPR;
4154       if (code == FLOOR_MOD_EXPR)
4155         code = TRUNC_MOD_EXPR;
4156       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4157         code = TRUNC_DIV_EXPR;
4158     }
4159
4160   if (op1 != const0_rtx)
4161     switch (code)
4162       {
4163       case TRUNC_MOD_EXPR:
4164       case TRUNC_DIV_EXPR:
4165         if (op1_is_constant)
4166           {
4167             if (unsignedp)
4168               {
4169                 unsigned HOST_WIDE_INT mh, ml;
4170                 int pre_shift, post_shift;
4171                 int dummy;
4172                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4173                                             & GET_MODE_MASK (compute_mode));
4174
4175                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4176                   {
4177                     pre_shift = floor_log2 (d);
4178                     if (rem_flag)
4179                       {
4180                         unsigned HOST_WIDE_INT mask
4181                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4182                         remainder
4183                           = expand_binop (compute_mode, and_optab, op0,
4184                                           gen_int_mode (mask, compute_mode),
4185                                           remainder, 1,
4186                                           OPTAB_LIB_WIDEN);
4187                         if (remainder)
4188                           return gen_lowpart (mode, remainder);
4189                       }
4190                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4191                                              pre_shift, tquotient, 1);
4192                   }
4193                 else if (size <= HOST_BITS_PER_WIDE_INT)
4194                   {
4195                     if (d >= (HOST_WIDE_INT_1U << (size - 1)))
4196                       {
4197                         /* Most significant bit of divisor is set; emit an scc
4198                            insn.  */
4199                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4200                                                           compute_mode, 1, 1);
4201                       }
4202                     else
4203                       {
4204                         /* Find a suitable multiplier and right shift count
4205                            instead of multiplying with D.  */
4206
4207                         mh = choose_multiplier (d, size, size,
4208                                                 &ml, &post_shift, &dummy);
4209
4210                         /* If the suggested multiplier is more than SIZE bits,
4211                            we can do better for even divisors, using an
4212                            initial right shift.  */
4213                         if (mh != 0 && (d & 1) == 0)
4214                           {
4215                             pre_shift = ctz_or_zero (d);
4216                             mh = choose_multiplier (d >> pre_shift, size,
4217                                                     size - pre_shift,
4218                                                     &ml, &post_shift, &dummy);
4219                             gcc_assert (!mh);
4220                           }
4221                         else
4222                           pre_shift = 0;
4223
4224                         if (mh != 0)
4225                           {
4226                             rtx t1, t2, t3, t4;
4227
4228                             if (post_shift - 1 >= BITS_PER_WORD)
4229                               goto fail1;
4230
4231                             extra_cost
4232                               = (shift_cost (speed, compute_mode, post_shift - 1)
4233                                  + shift_cost (speed, compute_mode, 1)
4234                                  + 2 * add_cost (speed, compute_mode));
4235                             t1 = expmed_mult_highpart
4236                               (compute_mode, op0,
4237                                gen_int_mode (ml, compute_mode),
4238                                NULL_RTX, 1, max_cost - extra_cost);
4239                             if (t1 == 0)
4240                               goto fail1;
4241                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4242                                                                op0, t1),
4243                                                 NULL_RTX);
4244                             t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4245                                                t2, 1, NULL_RTX, 1);
4246                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4247                                                               t1, t3),
4248                                                 NULL_RTX);
4249                             quotient = expand_shift
4250                               (RSHIFT_EXPR, compute_mode, t4,
4251                                post_shift - 1, tquotient, 1);
4252                           }
4253                         else
4254                           {
4255                             rtx t1, t2;
4256
4257                             if (pre_shift >= BITS_PER_WORD
4258                                 || post_shift >= BITS_PER_WORD)
4259                               goto fail1;
4260
4261                             t1 = expand_shift
4262                               (RSHIFT_EXPR, compute_mode, op0,
4263                                pre_shift, NULL_RTX, 1);
4264                             extra_cost
4265                               = (shift_cost (speed, compute_mode, pre_shift)
4266                                  + shift_cost (speed, compute_mode, post_shift));
4267                             t2 = expmed_mult_highpart
4268                               (compute_mode, t1,
4269                                gen_int_mode (ml, compute_mode),
4270                                NULL_RTX, 1, max_cost - extra_cost);
4271                             if (t2 == 0)
4272                               goto fail1;
4273                             quotient = expand_shift
4274                               (RSHIFT_EXPR, compute_mode, t2,
4275                                post_shift, tquotient, 1);
4276                           }
4277                       }
4278                   }
4279                 else            /* Too wide mode to use tricky code */
4280                   break;
4281
4282                 insn = get_last_insn ();
4283                 if (insn != last)
4284                   set_dst_reg_note (insn, REG_EQUAL,
4285                                     gen_rtx_UDIV (compute_mode, op0, op1),
4286                                     quotient);
4287               }
4288             else                /* TRUNC_DIV, signed */
4289               {
4290                 unsigned HOST_WIDE_INT ml;
4291                 int lgup, post_shift;
4292                 rtx mlr;
4293                 HOST_WIDE_INT d = INTVAL (op1);
4294                 unsigned HOST_WIDE_INT abs_d;
4295
4296                 /* Since d might be INT_MIN, we have to cast to
4297                    unsigned HOST_WIDE_INT before negating to avoid
4298                    undefined signed overflow.  */
4299                 abs_d = (d >= 0
4300                          ? (unsigned HOST_WIDE_INT) d
4301                          : - (unsigned HOST_WIDE_INT) d);
4302
4303                 /* n rem d = n rem -d */
4304                 if (rem_flag && d < 0)
4305                   {
4306                     d = abs_d;
4307                     op1 = gen_int_mode (abs_d, compute_mode);
4308                   }
4309
4310                 if (d == 1)
4311                   quotient = op0;
4312                 else if (d == -1)
4313                   quotient = expand_unop (compute_mode, neg_optab, op0,
4314                                           tquotient, 0);
4315                 else if (HOST_BITS_PER_WIDE_INT >= size
4316                          && abs_d == HOST_WIDE_INT_1U << (size - 1))
4317                   {
4318                     /* This case is not handled correctly below.  */
4319                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4320                                                 compute_mode, 1, 1);
4321                     if (quotient == 0)
4322                       goto fail1;
4323                   }
4324                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4325                          && (rem_flag
4326                              ? smod_pow2_cheap (speed, compute_mode)
4327                              : sdiv_pow2_cheap (speed, compute_mode))
4328                          /* We assume that cheap metric is true if the
4329                             optab has an expander for this mode.  */
4330                          && ((optab_handler ((rem_flag ? smod_optab
4331                                               : sdiv_optab),
4332                                              compute_mode)
4333                               != CODE_FOR_nothing)
4334                              || (optab_handler (sdivmod_optab,
4335                                                 compute_mode)
4336                                  != CODE_FOR_nothing)))
4337                   ;
4338                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4339                   {
4340                     if (rem_flag)
4341                       {
4342                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4343                         if (remainder)
4344                           return gen_lowpart (mode, remainder);
4345                       }
4346
4347                     if (sdiv_pow2_cheap (speed, compute_mode)
4348                         && ((optab_handler (sdiv_optab, compute_mode)
4349                              != CODE_FOR_nothing)
4350                             || (optab_handler (sdivmod_optab, compute_mode)
4351                                 != CODE_FOR_nothing)))
4352                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4353                                                 compute_mode, op0,
4354                                                 gen_int_mode (abs_d,
4355                                                               compute_mode),
4356                                                 NULL_RTX, 0);
4357                     else
4358                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4359
4360                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4361                        negate the quotient.  */
4362                     if (d < 0)
4363                       {
4364                         insn = get_last_insn ();
4365                         if (insn != last
4366                             && abs_d < (HOST_WIDE_INT_1U
4367                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4368                           set_dst_reg_note (insn, REG_EQUAL,
4369                                             gen_rtx_DIV (compute_mode, op0,
4370                                                          gen_int_mode
4371                                                            (abs_d,
4372                                                             compute_mode)),
4373                                             quotient);
4374
4375                         quotient = expand_unop (compute_mode, neg_optab,
4376                                                 quotient, quotient, 0);
4377                       }
4378                   }
4379                 else if (size <= HOST_BITS_PER_WIDE_INT)
4380                   {
4381                     choose_multiplier (abs_d, size, size - 1,
4382                                        &ml, &post_shift, &lgup);
4383                     if (ml < HOST_WIDE_INT_1U << (size - 1))
4384                       {
4385                         rtx t1, t2, t3;
4386
4387                         if (post_shift >= BITS_PER_WORD
4388                             || size - 1 >= BITS_PER_WORD)
4389                           goto fail1;
4390
4391                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4392                                       + shift_cost (speed, compute_mode, size - 1)
4393                                       + add_cost (speed, compute_mode));
4394                         t1 = expmed_mult_highpart
4395                           (compute_mode, op0, gen_int_mode (ml, compute_mode),
4396                            NULL_RTX, 0, max_cost - extra_cost);
4397                         if (t1 == 0)
4398                           goto fail1;
4399                         t2 = expand_shift
4400                           (RSHIFT_EXPR, compute_mode, t1,
4401                            post_shift, NULL_RTX, 0);
4402                         t3 = expand_shift
4403                           (RSHIFT_EXPR, compute_mode, op0,
4404                            size - 1, NULL_RTX, 0);
4405                         if (d < 0)
4406                           quotient
4407                             = force_operand (gen_rtx_MINUS (compute_mode,
4408                                                             t3, t2),
4409                                              tquotient);
4410                         else
4411                           quotient
4412                             = force_operand (gen_rtx_MINUS (compute_mode,
4413                                                             t2, t3),
4414                                              tquotient);
4415                       }
4416                     else
4417                       {
4418                         rtx t1, t2, t3, t4;
4419
4420                         if (post_shift >= BITS_PER_WORD
4421                             || size - 1 >= BITS_PER_WORD)
4422                           goto fail1;
4423
4424                         ml |= HOST_WIDE_INT_M1U << (size - 1);
4425                         mlr = gen_int_mode (ml, compute_mode);
4426                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4427                                       + shift_cost (speed, compute_mode, size - 1)
4428                                       + 2 * add_cost (speed, compute_mode));
4429                         t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4430                                                    NULL_RTX, 0,
4431                                                    max_cost - extra_cost);
4432                         if (t1 == 0)
4433                           goto fail1;
4434                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4435                                                           t1, op0),
4436                                             NULL_RTX);
4437                         t3 = expand_shift
4438                           (RSHIFT_EXPR, compute_mode, t2,
4439                            post_shift, NULL_RTX, 0);
4440                         t4 = expand_shift
4441                           (RSHIFT_EXPR, compute_mode, op0,
4442                            size - 1, NULL_RTX, 0);
4443                         if (d < 0)
4444                           quotient
4445                             = force_operand (gen_rtx_MINUS (compute_mode,
4446                                                             t4, t3),
4447                                              tquotient);
4448                         else
4449                           quotient
4450                             = force_operand (gen_rtx_MINUS (compute_mode,
4451                                                             t3, t4),
4452                                              tquotient);
4453                       }
4454                   }
4455                 else            /* Too wide mode to use tricky code */
4456                   break;
4457
4458                 insn = get_last_insn ();
4459                 if (insn != last)
4460                   set_dst_reg_note (insn, REG_EQUAL,
4461                                     gen_rtx_DIV (compute_mode, op0, op1),
4462                                     quotient);
4463               }
4464             break;
4465           }
4466       fail1:
4467         delete_insns_since (last);
4468         break;
4469
4470       case FLOOR_DIV_EXPR:
4471       case FLOOR_MOD_EXPR:
4472       /* We will come here only for signed operations.  */
4473         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4474           {
4475             unsigned HOST_WIDE_INT mh, ml;
4476             int pre_shift, lgup, post_shift;
4477             HOST_WIDE_INT d = INTVAL (op1);
4478
4479             if (d > 0)
4480               {
4481                 /* We could just as easily deal with negative constants here,
4482                    but it does not seem worth the trouble for GCC 2.6.  */
4483                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4484                   {
4485                     pre_shift = floor_log2 (d);
4486                     if (rem_flag)
4487                       {
4488                         unsigned HOST_WIDE_INT mask
4489                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4490                         remainder = expand_binop
4491                           (compute_mode, and_optab, op0,
4492                            gen_int_mode (mask, compute_mode),
4493                            remainder, 0, OPTAB_LIB_WIDEN);
4494                         if (remainder)
4495                           return gen_lowpart (mode, remainder);
4496                       }
4497                     quotient = expand_shift
4498                       (RSHIFT_EXPR, compute_mode, op0,
4499                        pre_shift, tquotient, 0);
4500                   }
4501                 else
4502                   {
4503                     rtx t1, t2, t3, t4;
4504
4505                     mh = choose_multiplier (d, size, size - 1,
4506                                             &ml, &post_shift, &lgup);
4507                     gcc_assert (!mh);
4508
4509                     if (post_shift < BITS_PER_WORD
4510                         && size - 1 < BITS_PER_WORD)
4511                       {
4512                         t1 = expand_shift
4513                           (RSHIFT_EXPR, compute_mode, op0,
4514                            size - 1, NULL_RTX, 0);
4515                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4516                                            NULL_RTX, 0, OPTAB_WIDEN);
4517                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4518                                       + shift_cost (speed, compute_mode, size - 1)
4519                                       + 2 * add_cost (speed, compute_mode));
4520                         t3 = expmed_mult_highpart
4521                           (compute_mode, t2, gen_int_mode (ml, compute_mode),
4522                            NULL_RTX, 1, max_cost - extra_cost);
4523                         if (t3 != 0)
4524                           {
4525                             t4 = expand_shift
4526                               (RSHIFT_EXPR, compute_mode, t3,
4527                                post_shift, NULL_RTX, 1);
4528                             quotient = expand_binop (compute_mode, xor_optab,
4529                                                      t4, t1, tquotient, 0,
4530                                                      OPTAB_WIDEN);
4531                           }
4532                       }
4533                   }
4534               }
4535             else
4536               {
4537                 rtx nsign, t1, t2, t3, t4;
4538                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4539                                                   op0, constm1_rtx), NULL_RTX);
4540                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4541                                    0, OPTAB_WIDEN);
4542                 nsign = expand_shift
4543                   (RSHIFT_EXPR, compute_mode, t2,
4544                    size - 1, NULL_RTX, 0);
4545                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4546                                     NULL_RTX);
4547                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4548                                     NULL_RTX, 0);
4549                 if (t4)
4550                   {
4551                     rtx t5;
4552                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4553                                       NULL_RTX, 0);
4554                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4555                                                             t4, t5),
4556                                               tquotient);
4557                   }
4558               }
4559           }
4560
4561         if (quotient != 0)
4562           break;
4563         delete_insns_since (last);
4564
4565         /* Try using an instruction that produces both the quotient and
4566            remainder, using truncation.  We can easily compensate the quotient
4567            or remainder to get floor rounding, once we have the remainder.
4568            Notice that we compute also the final remainder value here,
4569            and return the result right away.  */
4570         if (target == 0 || GET_MODE (target) != compute_mode)
4571           target = gen_reg_rtx (compute_mode);
4572
4573         if (rem_flag)
4574           {
4575             remainder
4576               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4577             quotient = gen_reg_rtx (compute_mode);
4578           }
4579         else
4580           {
4581             quotient
4582               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4583             remainder = gen_reg_rtx (compute_mode);
4584           }
4585
4586         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4587                                  quotient, remainder, 0))
4588           {
4589             /* This could be computed with a branch-less sequence.
4590                Save that for later.  */
4591             rtx tem;
4592             rtx_code_label *label = gen_label_rtx ();
4593             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4594             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4595                                 NULL_RTX, 0, OPTAB_WIDEN);
4596             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4597             expand_dec (quotient, const1_rtx);
4598             expand_inc (remainder, op1);
4599             emit_label (label);
4600             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4601           }
4602
4603         /* No luck with division elimination or divmod.  Have to do it
4604            by conditionally adjusting op0 *and* the result.  */
4605         {
4606           rtx_code_label *label1, *label2, *label3, *label4, *label5;
4607           rtx adjusted_op0;
4608           rtx tem;
4609
4610           quotient = gen_reg_rtx (compute_mode);
4611           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4612           label1 = gen_label_rtx ();
4613           label2 = gen_label_rtx ();
4614           label3 = gen_label_rtx ();
4615           label4 = gen_label_rtx ();
4616           label5 = gen_label_rtx ();
4617           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4618           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4619           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4620                               quotient, 0, OPTAB_LIB_WIDEN);
4621           if (tem != quotient)
4622             emit_move_insn (quotient, tem);
4623           emit_jump_insn (targetm.gen_jump (label5));
4624           emit_barrier ();
4625           emit_label (label1);
4626           expand_inc (adjusted_op0, const1_rtx);
4627           emit_jump_insn (targetm.gen_jump (label4));
4628           emit_barrier ();
4629           emit_label (label2);
4630           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4631           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4632                               quotient, 0, OPTAB_LIB_WIDEN);
4633           if (tem != quotient)
4634             emit_move_insn (quotient, tem);
4635           emit_jump_insn (targetm.gen_jump (label5));
4636           emit_barrier ();
4637           emit_label (label3);
4638           expand_dec (adjusted_op0, const1_rtx);
4639           emit_label (label4);
4640           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4641                               quotient, 0, OPTAB_LIB_WIDEN);
4642           if (tem != quotient)
4643             emit_move_insn (quotient, tem);
4644           expand_dec (quotient, const1_rtx);
4645           emit_label (label5);
4646         }
4647         break;
4648
4649       case CEIL_DIV_EXPR:
4650       case CEIL_MOD_EXPR:
4651         if (unsignedp)
4652           {
4653             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4654               {
4655                 rtx t1, t2, t3;
4656                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4657                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4658                                    floor_log2 (d), tquotient, 1);
4659                 t2 = expand_binop (compute_mode, and_optab, op0,
4660                                    gen_int_mode (d - 1, compute_mode),
4661                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4662                 t3 = gen_reg_rtx (compute_mode);
4663                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4664                                       compute_mode, 1, 1);
4665                 if (t3 == 0)
4666                   {
4667                     rtx_code_label *lab;
4668                     lab = gen_label_rtx ();
4669                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4670                     expand_inc (t1, const1_rtx);
4671                     emit_label (lab);
4672                     quotient = t1;
4673                   }
4674                 else
4675                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4676                                                           t1, t3),
4677                                             tquotient);
4678                 break;
4679               }
4680
4681             /* Try using an instruction that produces both the quotient and
4682                remainder, using truncation.  We can easily compensate the
4683                quotient or remainder to get ceiling rounding, once we have the
4684                remainder.  Notice that we compute also the final remainder
4685                value here, and return the result right away.  */
4686             if (target == 0 || GET_MODE (target) != compute_mode)
4687               target = gen_reg_rtx (compute_mode);
4688
4689             if (rem_flag)
4690               {
4691                 remainder = (REG_P (target)
4692                              ? target : gen_reg_rtx (compute_mode));
4693                 quotient = gen_reg_rtx (compute_mode);
4694               }
4695             else
4696               {
4697                 quotient = (REG_P (target)
4698                             ? target : gen_reg_rtx (compute_mode));
4699                 remainder = gen_reg_rtx (compute_mode);
4700               }
4701
4702             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4703                                      remainder, 1))
4704               {
4705                 /* This could be computed with a branch-less sequence.
4706                    Save that for later.  */
4707                 rtx_code_label *label = gen_label_rtx ();
4708                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4709                                  compute_mode, label);
4710                 expand_inc (quotient, const1_rtx);
4711                 expand_dec (remainder, op1);
4712                 emit_label (label);
4713                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4714               }
4715
4716             /* No luck with division elimination or divmod.  Have to do it
4717                by conditionally adjusting op0 *and* the result.  */
4718             {
4719               rtx_code_label *label1, *label2;
4720               rtx adjusted_op0, tem;
4721
4722               quotient = gen_reg_rtx (compute_mode);
4723               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4724               label1 = gen_label_rtx ();
4725               label2 = gen_label_rtx ();
4726               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4727                                compute_mode, label1);
4728               emit_move_insn  (quotient, const0_rtx);
4729               emit_jump_insn (targetm.gen_jump (label2));
4730               emit_barrier ();
4731               emit_label (label1);
4732               expand_dec (adjusted_op0, const1_rtx);
4733               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4734                                   quotient, 1, OPTAB_LIB_WIDEN);
4735               if (tem != quotient)
4736                 emit_move_insn (quotient, tem);
4737               expand_inc (quotient, const1_rtx);
4738               emit_label (label2);
4739             }
4740           }
4741         else /* signed */
4742           {
4743             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4744                 && INTVAL (op1) >= 0)
4745               {
4746                 /* This is extremely similar to the code for the unsigned case
4747                    above.  For 2.7 we should merge these variants, but for
4748                    2.6.1 I don't want to touch the code for unsigned since that
4749                    get used in C.  The signed case will only be used by other
4750                    languages (Ada).  */
4751
4752                 rtx t1, t2, t3;
4753                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4754                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4755                                    floor_log2 (d), tquotient, 0);
4756                 t2 = expand_binop (compute_mode, and_optab, op0,
4757                                    gen_int_mode (d - 1, compute_mode),
4758                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4759                 t3 = gen_reg_rtx (compute_mode);
4760                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4761                                       compute_mode, 1, 1);
4762                 if (t3 == 0)
4763                   {
4764                     rtx_code_label *lab;
4765                     lab = gen_label_rtx ();
4766                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4767                     expand_inc (t1, const1_rtx);
4768                     emit_label (lab);
4769                     quotient = t1;
4770                   }
4771                 else
4772                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4773                                                           t1, t3),
4774                                             tquotient);
4775                 break;
4776               }
4777
4778             /* Try using an instruction that produces both the quotient and
4779                remainder, using truncation.  We can easily compensate the
4780                quotient or remainder to get ceiling rounding, once we have the
4781                remainder.  Notice that we compute also the final remainder
4782                value here, and return the result right away.  */
4783             if (target == 0 || GET_MODE (target) != compute_mode)
4784               target = gen_reg_rtx (compute_mode);
4785             if (rem_flag)
4786               {
4787                 remainder= (REG_P (target)
4788                             ? target : gen_reg_rtx (compute_mode));
4789                 quotient = gen_reg_rtx (compute_mode);
4790               }
4791             else
4792               {
4793                 quotient = (REG_P (target)
4794                             ? target : gen_reg_rtx (compute_mode));
4795                 remainder = gen_reg_rtx (compute_mode);
4796               }
4797
4798             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4799                                      remainder, 0))
4800               {
4801                 /* This could be computed with a branch-less sequence.
4802                    Save that for later.  */
4803                 rtx tem;
4804                 rtx_code_label *label = gen_label_rtx ();
4805                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4806                                  compute_mode, label);
4807                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4808                                     NULL_RTX, 0, OPTAB_WIDEN);
4809                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4810                 expand_inc (quotient, const1_rtx);
4811                 expand_dec (remainder, op1);
4812                 emit_label (label);
4813                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4814               }
4815
4816             /* No luck with division elimination or divmod.  Have to do it
4817                by conditionally adjusting op0 *and* the result.  */
4818             {
4819               rtx_code_label *label1, *label2, *label3, *label4, *label5;
4820               rtx adjusted_op0;
4821               rtx tem;
4822
4823               quotient = gen_reg_rtx (compute_mode);
4824               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4825               label1 = gen_label_rtx ();
4826               label2 = gen_label_rtx ();
4827               label3 = gen_label_rtx ();
4828               label4 = gen_label_rtx ();
4829               label5 = gen_label_rtx ();
4830               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4831               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4832                                compute_mode, label1);
4833               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4834                                   quotient, 0, OPTAB_LIB_WIDEN);
4835               if (tem != quotient)
4836                 emit_move_insn (quotient, tem);
4837               emit_jump_insn (targetm.gen_jump (label5));
4838               emit_barrier ();
4839               emit_label (label1);
4840               expand_dec (adjusted_op0, const1_rtx);
4841               emit_jump_insn (targetm.gen_jump (label4));
4842               emit_barrier ();
4843               emit_label (label2);
4844               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4845                                compute_mode, label3);
4846               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4847                                   quotient, 0, OPTAB_LIB_WIDEN);
4848               if (tem != quotient)
4849                 emit_move_insn (quotient, tem);
4850               emit_jump_insn (targetm.gen_jump (label5));
4851               emit_barrier ();
4852               emit_label (label3);
4853               expand_inc (adjusted_op0, const1_rtx);
4854               emit_label (label4);
4855               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4856                                   quotient, 0, OPTAB_LIB_WIDEN);
4857               if (tem != quotient)
4858                 emit_move_insn (quotient, tem);
4859               expand_inc (quotient, const1_rtx);
4860               emit_label (label5);
4861             }
4862           }
4863         break;
4864
4865       case EXACT_DIV_EXPR:
4866         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4867           {
4868             HOST_WIDE_INT d = INTVAL (op1);
4869             unsigned HOST_WIDE_INT ml;
4870             int pre_shift;
4871             rtx t1;
4872
4873             pre_shift = ctz_or_zero (d);
4874             ml = invert_mod2n (d >> pre_shift, size);
4875             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4876                                pre_shift, NULL_RTX, unsignedp);
4877             quotient = expand_mult (compute_mode, t1,
4878                                     gen_int_mode (ml, compute_mode),
4879                                     NULL_RTX, 1);
4880
4881             insn = get_last_insn ();
4882             set_dst_reg_note (insn, REG_EQUAL,
4883                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4884                                               compute_mode, op0, op1),
4885                               quotient);
4886           }
4887         break;
4888
4889       case ROUND_DIV_EXPR:
4890       case ROUND_MOD_EXPR:
4891         if (unsignedp)
4892           {
4893             rtx tem;
4894             rtx_code_label *label;
4895             label = gen_label_rtx ();
4896             quotient = gen_reg_rtx (compute_mode);
4897             remainder = gen_reg_rtx (compute_mode);
4898             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4899               {
4900                 rtx tem;
4901                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4902                                          quotient, 1, OPTAB_LIB_WIDEN);
4903                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4904                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4905                                           remainder, 1, OPTAB_LIB_WIDEN);
4906               }
4907             tem = plus_constant (compute_mode, op1, -1);
4908             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4909             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4910             expand_inc (quotient, const1_rtx);
4911             expand_dec (remainder, op1);
4912             emit_label (label);
4913           }
4914         else
4915           {
4916             rtx abs_rem, abs_op1, tem, mask;
4917             rtx_code_label *label;
4918             label = gen_label_rtx ();
4919             quotient = gen_reg_rtx (compute_mode);
4920             remainder = gen_reg_rtx (compute_mode);
4921             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4922               {
4923                 rtx tem;
4924                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4925                                          quotient, 0, OPTAB_LIB_WIDEN);
4926                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4927                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4928                                           remainder, 0, OPTAB_LIB_WIDEN);
4929               }
4930             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4931             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4932             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4933                                 1, NULL_RTX, 1);
4934             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4935             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4936                                 NULL_RTX, 0, OPTAB_WIDEN);
4937             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4938                                  size - 1, NULL_RTX, 0);
4939             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4940                                 NULL_RTX, 0, OPTAB_WIDEN);
4941             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4942                                 NULL_RTX, 0, OPTAB_WIDEN);
4943             expand_inc (quotient, tem);
4944             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4945                                 NULL_RTX, 0, OPTAB_WIDEN);
4946             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4947                                 NULL_RTX, 0, OPTAB_WIDEN);
4948             expand_dec (remainder, tem);
4949             emit_label (label);
4950           }
4951         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4952
4953       default:
4954         gcc_unreachable ();
4955       }
4956
4957   if (quotient == 0)
4958     {
4959       if (target && GET_MODE (target) != compute_mode)
4960         target = 0;
4961
4962       if (rem_flag)
4963         {
4964           /* Try to produce the remainder without producing the quotient.
4965              If we seem to have a divmod pattern that does not require widening,
4966              don't try widening here.  We should really have a WIDEN argument
4967              to expand_twoval_binop, since what we'd really like to do here is
4968              1) try a mod insn in compute_mode
4969              2) try a divmod insn in compute_mode
4970              3) try a div insn in compute_mode and multiply-subtract to get
4971                 remainder
4972              4) try the same things with widening allowed.  */
4973           remainder
4974             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4975                                  op0, op1, target,
4976                                  unsignedp,
4977                                  ((optab_handler (optab2, compute_mode)
4978                                    != CODE_FOR_nothing)
4979                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4980           if (remainder == 0)
4981             {
4982               /* No luck there.  Can we do remainder and divide at once
4983                  without a library call?  */
4984               remainder = gen_reg_rtx (compute_mode);
4985               if (! expand_twoval_binop ((unsignedp
4986                                           ? udivmod_optab
4987                                           : sdivmod_optab),
4988                                          op0, op1,
4989                                          NULL_RTX, remainder, unsignedp))
4990                 remainder = 0;
4991             }
4992
4993           if (remainder)
4994             return gen_lowpart (mode, remainder);
4995         }
4996
4997       /* Produce the quotient.  Try a quotient insn, but not a library call.
4998          If we have a divmod in this mode, use it in preference to widening
4999          the div (for this test we assume it will not fail). Note that optab2
5000          is set to the one of the two optabs that the call below will use.  */
5001       quotient
5002         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5003                              op0, op1, rem_flag ? NULL_RTX : target,
5004                              unsignedp,
5005                              ((optab_handler (optab2, compute_mode)
5006                                != CODE_FOR_nothing)
5007                               ? OPTAB_DIRECT : OPTAB_WIDEN));
5008
5009       if (quotient == 0)
5010         {
5011           /* No luck there.  Try a quotient-and-remainder insn,
5012              keeping the quotient alone.  */
5013           quotient = gen_reg_rtx (compute_mode);
5014           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5015                                      op0, op1,
5016                                      quotient, NULL_RTX, unsignedp))
5017             {
5018               quotient = 0;
5019               if (! rem_flag)
5020                 /* Still no luck.  If we are not computing the remainder,
5021                    use a library call for the quotient.  */
5022                 quotient = sign_expand_binop (compute_mode,
5023                                               udiv_optab, sdiv_optab,
5024                                               op0, op1, target,
5025                                               unsignedp, OPTAB_LIB_WIDEN);
5026             }
5027         }
5028     }
5029
5030   if (rem_flag)
5031     {
5032       if (target && GET_MODE (target) != compute_mode)
5033         target = 0;
5034
5035       if (quotient == 0)
5036         {
5037           /* No divide instruction either.  Use library for remainder.  */
5038           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5039                                          op0, op1, target,
5040                                          unsignedp, OPTAB_LIB_WIDEN);
5041           /* No remainder function.  Try a quotient-and-remainder
5042              function, keeping the remainder.  */
5043           if (!remainder)
5044             {
5045               remainder = gen_reg_rtx (compute_mode);
5046               if (!expand_twoval_binop_libfunc
5047                   (unsignedp ? udivmod_optab : sdivmod_optab,
5048                    op0, op1,
5049                    NULL_RTX, remainder,
5050                    unsignedp ? UMOD : MOD))
5051                 remainder = NULL_RTX;
5052             }
5053         }
5054       else
5055         {
5056           /* We divided.  Now finish doing X - Y * (X / Y).  */
5057           remainder = expand_mult (compute_mode, quotient, op1,
5058                                    NULL_RTX, unsignedp);
5059           remainder = expand_binop (compute_mode, sub_optab, op0,
5060                                     remainder, target, unsignedp,
5061                                     OPTAB_LIB_WIDEN);
5062         }
5063     }
5064
5065   return gen_lowpart (mode, rem_flag ? remainder : quotient);
5066 }
5067 \f
5068 /* Return a tree node with data type TYPE, describing the value of X.
5069    Usually this is an VAR_DECL, if there is no obvious better choice.
5070    X may be an expression, however we only support those expressions
5071    generated by loop.c.  */
5072
5073 tree
5074 make_tree (tree type, rtx x)
5075 {
5076   tree t;
5077
5078   switch (GET_CODE (x))
5079     {
5080     case CONST_INT:
5081     case CONST_WIDE_INT:
5082       t = wide_int_to_tree (type, std::make_pair (x, TYPE_MODE (type)));
5083       return t;
5084
5085     case CONST_DOUBLE:
5086       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5087       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5088         t = wide_int_to_tree (type,
5089                               wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5090                                                     HOST_BITS_PER_WIDE_INT * 2));
5091       else
5092         t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5093
5094       return t;
5095
5096     case CONST_VECTOR:
5097       {
5098         int units = CONST_VECTOR_NUNITS (x);
5099         tree itype = TREE_TYPE (type);
5100         tree *elts;
5101         int i;
5102
5103         /* Build a tree with vector elements.  */
5104         elts = XALLOCAVEC (tree, units);
5105         for (i = units - 1; i >= 0; --i)
5106           {
5107             rtx elt = CONST_VECTOR_ELT (x, i);
5108             elts[i] = make_tree (itype, elt);
5109           }
5110
5111         return build_vector (type, elts);
5112       }
5113
5114     case PLUS:
5115       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5116                           make_tree (type, XEXP (x, 1)));
5117
5118     case MINUS:
5119       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5120                           make_tree (type, XEXP (x, 1)));
5121
5122     case NEG:
5123       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5124
5125     case MULT:
5126       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5127                           make_tree (type, XEXP (x, 1)));
5128
5129     case ASHIFT:
5130       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5131                           make_tree (type, XEXP (x, 1)));
5132
5133     case LSHIFTRT:
5134       t = unsigned_type_for (type);
5135       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5136                                          make_tree (t, XEXP (x, 0)),
5137                                          make_tree (type, XEXP (x, 1))));
5138
5139     case ASHIFTRT:
5140       t = signed_type_for (type);
5141       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5142                                          make_tree (t, XEXP (x, 0)),
5143                                          make_tree (type, XEXP (x, 1))));
5144
5145     case DIV:
5146       if (TREE_CODE (type) != REAL_TYPE)
5147         t = signed_type_for (type);
5148       else
5149         t = type;
5150
5151       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5152                                          make_tree (t, XEXP (x, 0)),
5153                                          make_tree (t, XEXP (x, 1))));
5154     case UDIV:
5155       t = unsigned_type_for (type);
5156       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5157                                          make_tree (t, XEXP (x, 0)),
5158                                          make_tree (t, XEXP (x, 1))));
5159
5160     case SIGN_EXTEND:
5161     case ZERO_EXTEND:
5162       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5163                                           GET_CODE (x) == ZERO_EXTEND);
5164       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5165
5166     case CONST:
5167       return make_tree (type, XEXP (x, 0));
5168
5169     case SYMBOL_REF:
5170       t = SYMBOL_REF_DECL (x);
5171       if (t)
5172         return fold_convert (type, build_fold_addr_expr (t));
5173       /* fall through.  */
5174
5175     default:
5176       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5177
5178       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5179          address mode to pointer mode.  */
5180       if (POINTER_TYPE_P (type))
5181         x = convert_memory_address_addr_space
5182               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5183
5184       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5185          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5186       t->decl_with_rtl.rtl = x;
5187
5188       return t;
5189     }
5190 }
5191 \f
5192 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5193    and returning TARGET.
5194
5195    If TARGET is 0, a pseudo-register or constant is returned.  */
5196
5197 rtx
5198 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5199 {
5200   rtx tem = 0;
5201
5202   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5203     tem = simplify_binary_operation (AND, mode, op0, op1);
5204   if (tem == 0)
5205     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5206
5207   if (target == 0)
5208     target = tem;
5209   else if (tem != target)
5210     emit_move_insn (target, tem);
5211   return target;
5212 }
5213
5214 /* Helper function for emit_store_flag.  */
5215 rtx
5216 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5217              machine_mode mode, machine_mode compare_mode,
5218              int unsignedp, rtx x, rtx y, int normalizep,
5219              machine_mode target_mode)
5220 {
5221   struct expand_operand ops[4];
5222   rtx op0, comparison, subtarget;
5223   rtx_insn *last;
5224   machine_mode result_mode = targetm.cstore_mode (icode);
5225
5226   last = get_last_insn ();
5227   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5228   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5229   if (!x || !y)
5230     {
5231       delete_insns_since (last);
5232       return NULL_RTX;
5233     }
5234
5235   if (target_mode == VOIDmode)
5236     target_mode = result_mode;
5237   if (!target)
5238     target = gen_reg_rtx (target_mode);
5239
5240   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5241
5242   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5243   create_fixed_operand (&ops[1], comparison);
5244   create_fixed_operand (&ops[2], x);
5245   create_fixed_operand (&ops[3], y);
5246   if (!maybe_expand_insn (icode, 4, ops))
5247     {
5248       delete_insns_since (last);
5249       return NULL_RTX;
5250     }
5251   subtarget = ops[0].value;
5252
5253   /* If we are converting to a wider mode, first convert to
5254      TARGET_MODE, then normalize.  This produces better combining
5255      opportunities on machines that have a SIGN_EXTRACT when we are
5256      testing a single bit.  This mostly benefits the 68k.
5257
5258      If STORE_FLAG_VALUE does not have the sign bit set when
5259      interpreted in MODE, we can do this conversion as unsigned, which
5260      is usually more efficient.  */
5261   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5262     {
5263       convert_move (target, subtarget,
5264                     val_signbit_known_clear_p (result_mode,
5265                                                STORE_FLAG_VALUE));
5266       op0 = target;
5267       result_mode = target_mode;
5268     }
5269   else
5270     op0 = subtarget;
5271
5272   /* If we want to keep subexpressions around, don't reuse our last
5273      target.  */
5274   if (optimize)
5275     subtarget = 0;
5276
5277   /* Now normalize to the proper value in MODE.  Sometimes we don't
5278      have to do anything.  */
5279   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5280     ;
5281   /* STORE_FLAG_VALUE might be the most negative number, so write
5282      the comparison this way to avoid a compiler-time warning.  */
5283   else if (- normalizep == STORE_FLAG_VALUE)
5284     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5285
5286   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5287      it hard to use a value of just the sign bit due to ANSI integer
5288      constant typing rules.  */
5289   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5290     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5291                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5292                         normalizep == 1);
5293   else
5294     {
5295       gcc_assert (STORE_FLAG_VALUE & 1);
5296
5297       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5298       if (normalizep == -1)
5299         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5300     }
5301
5302   /* If we were converting to a smaller mode, do the conversion now.  */
5303   if (target_mode != result_mode)
5304     {
5305       convert_move (target, op0, 0);
5306       return target;
5307     }
5308   else
5309     return op0;
5310 }
5311
5312
5313 /* A subroutine of emit_store_flag only including "tricks" that do not
5314    need a recursive call.  These are kept separate to avoid infinite
5315    loops.  */
5316
5317 static rtx
5318 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5319                    machine_mode mode, int unsignedp, int normalizep,
5320                    machine_mode target_mode)
5321 {
5322   rtx subtarget;
5323   enum insn_code icode;
5324   machine_mode compare_mode;
5325   enum mode_class mclass;
5326   enum rtx_code scode;
5327
5328   if (unsignedp)
5329     code = unsigned_condition (code);
5330   scode = swap_condition (code);
5331
5332   /* If one operand is constant, make it the second one.  Only do this
5333      if the other operand is not constant as well.  */
5334
5335   if (swap_commutative_operands_p (op0, op1))
5336     {
5337       std::swap (op0, op1);
5338       code = swap_condition (code);
5339     }
5340
5341   if (mode == VOIDmode)
5342     mode = GET_MODE (op0);
5343
5344   /* For some comparisons with 1 and -1, we can convert this to
5345      comparisons with zero.  This will often produce more opportunities for
5346      store-flag insns.  */
5347
5348   switch (code)
5349     {
5350     case LT:
5351       if (op1 == const1_rtx)
5352         op1 = const0_rtx, code = LE;
5353       break;
5354     case LE:
5355       if (op1 == constm1_rtx)
5356         op1 = const0_rtx, code = LT;
5357       break;
5358     case GE:
5359       if (op1 == const1_rtx)
5360         op1 = const0_rtx, code = GT;
5361       break;
5362     case GT:
5363       if (op1 == constm1_rtx)
5364         op1 = const0_rtx, code = GE;
5365       break;
5366     case GEU:
5367       if (op1 == const1_rtx)
5368         op1 = const0_rtx, code = NE;
5369       break;
5370     case LTU:
5371       if (op1 == const1_rtx)
5372         op1 = const0_rtx, code = EQ;
5373       break;
5374     default:
5375       break;
5376     }
5377
5378   /* If we are comparing a double-word integer with zero or -1, we can
5379      convert the comparison into one involving a single word.  */
5380   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5381       && GET_MODE_CLASS (mode) == MODE_INT
5382       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5383     {
5384       rtx tem;
5385       if ((code == EQ || code == NE)
5386           && (op1 == const0_rtx || op1 == constm1_rtx))
5387         {
5388           rtx op00, op01;
5389
5390           /* Do a logical OR or AND of the two words and compare the
5391              result.  */
5392           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5393           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5394           tem = expand_binop (word_mode,
5395                               op1 == const0_rtx ? ior_optab : and_optab,
5396                               op00, op01, NULL_RTX, unsignedp,
5397                               OPTAB_DIRECT);
5398
5399           if (tem != 0)
5400             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5401                                    unsignedp, normalizep);
5402         }
5403       else if ((code == LT || code == GE) && op1 == const0_rtx)
5404         {
5405           rtx op0h;
5406
5407           /* If testing the sign bit, can just test on high word.  */
5408           op0h = simplify_gen_subreg (word_mode, op0, mode,
5409                                       subreg_highpart_offset (word_mode,
5410                                                               mode));
5411           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5412                                  unsignedp, normalizep);
5413         }
5414       else
5415         tem = NULL_RTX;
5416
5417       if (tem)
5418         {
5419           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5420             return tem;
5421           if (!target)
5422             target = gen_reg_rtx (target_mode);
5423
5424           convert_move (target, tem,
5425                         !val_signbit_known_set_p (word_mode,
5426                                                   (normalizep ? normalizep
5427                                                    : STORE_FLAG_VALUE)));
5428           return target;
5429         }
5430     }
5431
5432   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5433      complement of A (for GE) and shifting the sign bit to the low bit.  */
5434   if (op1 == const0_rtx && (code == LT || code == GE)
5435       && GET_MODE_CLASS (mode) == MODE_INT
5436       && (normalizep || STORE_FLAG_VALUE == 1
5437           || val_signbit_p (mode, STORE_FLAG_VALUE)))
5438     {
5439       subtarget = target;
5440
5441       if (!target)
5442         target_mode = mode;
5443
5444       /* If the result is to be wider than OP0, it is best to convert it
5445          first.  If it is to be narrower, it is *incorrect* to convert it
5446          first.  */
5447       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5448         {
5449           op0 = convert_modes (target_mode, mode, op0, 0);
5450           mode = target_mode;
5451         }
5452
5453       if (target_mode != mode)
5454         subtarget = 0;
5455
5456       if (code == GE)
5457         op0 = expand_unop (mode, one_cmpl_optab, op0,
5458                            ((STORE_FLAG_VALUE == 1 || normalizep)
5459                             ? 0 : subtarget), 0);
5460
5461       if (STORE_FLAG_VALUE == 1 || normalizep)
5462         /* If we are supposed to produce a 0/1 value, we want to do
5463            a logical shift from the sign bit to the low-order bit; for
5464            a -1/0 value, we do an arithmetic shift.  */
5465         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5466                             GET_MODE_BITSIZE (mode) - 1,
5467                             subtarget, normalizep != -1);
5468
5469       if (mode != target_mode)
5470         op0 = convert_modes (target_mode, mode, op0, 0);
5471
5472       return op0;
5473     }
5474
5475   mclass = GET_MODE_CLASS (mode);
5476   for (compare_mode = mode; compare_mode != VOIDmode;
5477        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5478     {
5479      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5480      icode = optab_handler (cstore_optab, optab_mode);
5481      if (icode != CODE_FOR_nothing)
5482         {
5483           do_pending_stack_adjust ();
5484           rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5485                                  unsignedp, op0, op1, normalizep, target_mode);
5486           if (tem)
5487             return tem;
5488
5489           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5490             {
5491               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5492                                  unsignedp, op1, op0, normalizep, target_mode);
5493               if (tem)
5494                 return tem;
5495             }
5496           break;
5497         }
5498     }
5499
5500   return 0;
5501 }
5502
5503 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5504    and storing in TARGET.  Normally return TARGET.
5505    Return 0 if that cannot be done.
5506
5507    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5508    it is VOIDmode, they cannot both be CONST_INT.
5509
5510    UNSIGNEDP is for the case where we have to widen the operands
5511    to perform the operation.  It says to use zero-extension.
5512
5513    NORMALIZEP is 1 if we should convert the result to be either zero
5514    or one.  Normalize is -1 if we should convert the result to be
5515    either zero or -1.  If NORMALIZEP is zero, the result will be left
5516    "raw" out of the scc insn.  */
5517
5518 rtx
5519 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5520                  machine_mode mode, int unsignedp, int normalizep)
5521 {
5522   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5523   enum rtx_code rcode;
5524   rtx subtarget;
5525   rtx tem, trueval;
5526   rtx_insn *last;
5527
5528   /* If we compare constants, we shouldn't use a store-flag operation,
5529      but a constant load.  We can get there via the vanilla route that
5530      usually generates a compare-branch sequence, but will in this case
5531      fold the comparison to a constant, and thus elide the branch.  */
5532   if (CONSTANT_P (op0) && CONSTANT_P (op1))
5533     return NULL_RTX;
5534
5535   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5536                            target_mode);
5537   if (tem)
5538     return tem;
5539
5540   /* If we reached here, we can't do this with a scc insn, however there
5541      are some comparisons that can be done in other ways.  Don't do any
5542      of these cases if branches are very cheap.  */
5543   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5544     return 0;
5545
5546   /* See what we need to return.  We can only return a 1, -1, or the
5547      sign bit.  */
5548
5549   if (normalizep == 0)
5550     {
5551       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5552         normalizep = STORE_FLAG_VALUE;
5553
5554       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5555         ;
5556       else
5557         return 0;
5558     }
5559
5560   last = get_last_insn ();
5561
5562   /* If optimizing, use different pseudo registers for each insn, instead
5563      of reusing the same pseudo.  This leads to better CSE, but slows
5564      down the compiler, since there are more pseudos */
5565   subtarget = (!optimize
5566                && (target_mode == mode)) ? target : NULL_RTX;
5567   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5568
5569   /* For floating-point comparisons, try the reverse comparison or try
5570      changing the "orderedness" of the comparison.  */
5571   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5572     {
5573       enum rtx_code first_code;
5574       bool and_them;
5575
5576       rcode = reverse_condition_maybe_unordered (code);
5577       if (can_compare_p (rcode, mode, ccp_store_flag)
5578           && (code == ORDERED || code == UNORDERED
5579               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5580               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5581         {
5582           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5583                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5584
5585           /* For the reverse comparison, use either an addition or a XOR.  */
5586           if (want_add
5587               && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5588                            optimize_insn_for_speed_p ()) == 0)
5589             {
5590               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5591                                        STORE_FLAG_VALUE, target_mode);
5592               if (tem)
5593                 return expand_binop (target_mode, add_optab, tem,
5594                                      gen_int_mode (normalizep, target_mode),
5595                                      target, 0, OPTAB_WIDEN);
5596             }
5597           else if (!want_add
5598                    && rtx_cost (trueval, mode, XOR, 1,
5599                                 optimize_insn_for_speed_p ()) == 0)
5600             {
5601               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5602                                        normalizep, target_mode);
5603               if (tem)
5604                 return expand_binop (target_mode, xor_optab, tem, trueval,
5605                                      target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5606             }
5607         }
5608
5609       delete_insns_since (last);
5610
5611       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5612       if (code == ORDERED || code == UNORDERED)
5613         return 0;
5614
5615       and_them = split_comparison (code, mode, &first_code, &code);
5616
5617       /* If there are no NaNs, the first comparison should always fall through.
5618          Effectively change the comparison to the other one.  */
5619       if (!HONOR_NANS (mode))
5620         {
5621           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5622           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5623                                     target_mode);
5624         }
5625
5626       if (!HAVE_conditional_move)
5627         return 0;
5628
5629       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5630          conditional move.  */
5631       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5632                                normalizep, target_mode);
5633       if (tem == 0)
5634         return 0;
5635
5636       if (and_them)
5637         tem = emit_conditional_move (target, code, op0, op1, mode,
5638                                      tem, const0_rtx, GET_MODE (tem), 0);
5639       else
5640         tem = emit_conditional_move (target, code, op0, op1, mode,
5641                                      trueval, tem, GET_MODE (tem), 0);
5642
5643       if (tem == 0)
5644         delete_insns_since (last);
5645       return tem;
5646     }
5647
5648   /* The remaining tricks only apply to integer comparisons.  */
5649
5650   if (GET_MODE_CLASS (mode) != MODE_INT)
5651     return 0;
5652
5653   /* If this is an equality comparison of integers, we can try to exclusive-or
5654      (or subtract) the two operands and use a recursive call to try the
5655      comparison with zero.  Don't do any of these cases if branches are
5656      very cheap.  */
5657
5658   if ((code == EQ || code == NE) && op1 != const0_rtx)
5659     {
5660       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5661                           OPTAB_WIDEN);
5662
5663       if (tem == 0)
5664         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5665                             OPTAB_WIDEN);
5666       if (tem != 0)
5667         tem = emit_store_flag (target, code, tem, const0_rtx,
5668                                mode, unsignedp, normalizep);
5669       if (tem != 0)
5670         return tem;
5671
5672       delete_insns_since (last);
5673     }
5674
5675   /* For integer comparisons, try the reverse comparison.  However, for
5676      small X and if we'd have anyway to extend, implementing "X != 0"
5677      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5678   rcode = reverse_condition (code);
5679   if (can_compare_p (rcode, mode, ccp_store_flag)
5680       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5681             && code == NE
5682             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5683             && op1 == const0_rtx))
5684     {
5685       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5686                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5687
5688       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5689       if (want_add
5690           && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5691                        optimize_insn_for_speed_p ()) == 0)
5692         {
5693           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5694                                    STORE_FLAG_VALUE, target_mode);
5695           if (tem != 0)
5696             tem = expand_binop (target_mode, add_optab, tem,
5697                                 gen_int_mode (normalizep, target_mode),
5698                                 target, 0, OPTAB_WIDEN);
5699         }
5700       else if (!want_add
5701                && rtx_cost (trueval, mode, XOR, 1,
5702                             optimize_insn_for_speed_p ()) == 0)
5703         {
5704           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5705                                    normalizep, target_mode);
5706           if (tem != 0)
5707             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5708                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5709         }
5710
5711       if (tem != 0)
5712         return tem;
5713       delete_insns_since (last);
5714     }
5715
5716   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5717      the constant zero.  Reject all other comparisons at this point.  Only
5718      do LE and GT if branches are expensive since they are expensive on
5719      2-operand machines.  */
5720
5721   if (op1 != const0_rtx
5722       || (code != EQ && code != NE
5723           && (BRANCH_COST (optimize_insn_for_speed_p (),
5724                            false) <= 1 || (code != LE && code != GT))))
5725     return 0;
5726
5727   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5728      do the necessary operation below.  */
5729
5730   tem = 0;
5731
5732   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5733      the sign bit set.  */
5734
5735   if (code == LE)
5736     {
5737       /* This is destructive, so SUBTARGET can't be OP0.  */
5738       if (rtx_equal_p (subtarget, op0))
5739         subtarget = 0;
5740
5741       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5742                           OPTAB_WIDEN);
5743       if (tem)
5744         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5745                             OPTAB_WIDEN);
5746     }
5747
5748   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5749      number of bits in the mode of OP0, minus one.  */
5750
5751   if (code == GT)
5752     {
5753       if (rtx_equal_p (subtarget, op0))
5754         subtarget = 0;
5755
5756       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5757                           GET_MODE_BITSIZE (mode) - 1,
5758                           subtarget, 0);
5759       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5760                           OPTAB_WIDEN);
5761     }
5762
5763   if (code == EQ || code == NE)
5764     {
5765       /* For EQ or NE, one way to do the comparison is to apply an operation
5766          that converts the operand into a positive number if it is nonzero
5767          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5768          for NE we negate.  This puts the result in the sign bit.  Then we
5769          normalize with a shift, if needed.
5770
5771          Two operations that can do the above actions are ABS and FFS, so try
5772          them.  If that doesn't work, and MODE is smaller than a full word,
5773          we can use zero-extension to the wider mode (an unsigned conversion)
5774          as the operation.  */
5775
5776       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5777          that is compensated by the subsequent overflow when subtracting
5778          one / negating.  */
5779
5780       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5781         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5782       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5783         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5784       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5785         {
5786           tem = convert_modes (word_mode, mode, op0, 1);
5787           mode = word_mode;
5788         }
5789
5790       if (tem != 0)
5791         {
5792           if (code == EQ)
5793             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5794                                 0, OPTAB_WIDEN);
5795           else
5796             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5797         }
5798
5799       /* If we couldn't do it that way, for NE we can "or" the two's complement
5800          of the value with itself.  For EQ, we take the one's complement of
5801          that "or", which is an extra insn, so we only handle EQ if branches
5802          are expensive.  */
5803
5804       if (tem == 0
5805           && (code == NE
5806               || BRANCH_COST (optimize_insn_for_speed_p (),
5807                               false) > 1))
5808         {
5809           if (rtx_equal_p (subtarget, op0))
5810             subtarget = 0;
5811
5812           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5813           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5814                               OPTAB_WIDEN);
5815
5816           if (tem && code == EQ)
5817             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5818         }
5819     }
5820
5821   if (tem && normalizep)
5822     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5823                         GET_MODE_BITSIZE (mode) - 1,
5824                         subtarget, normalizep == 1);
5825
5826   if (tem)
5827     {
5828       if (!target)
5829         ;
5830       else if (GET_MODE (tem) != target_mode)
5831         {
5832           convert_move (target, tem, 0);
5833           tem = target;
5834         }
5835       else if (!subtarget)
5836         {
5837           emit_move_insn (target, tem);
5838           tem = target;
5839         }
5840     }
5841   else
5842     delete_insns_since (last);
5843
5844   return tem;
5845 }
5846
5847 /* Like emit_store_flag, but always succeeds.  */
5848
5849 rtx
5850 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5851                        machine_mode mode, int unsignedp, int normalizep)
5852 {
5853   rtx tem;
5854   rtx_code_label *label;
5855   rtx trueval, falseval;
5856
5857   /* First see if emit_store_flag can do the job.  */
5858   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5859   if (tem != 0)
5860     return tem;
5861
5862   if (!target)
5863     target = gen_reg_rtx (word_mode);
5864
5865   /* If this failed, we have to do this with set/compare/jump/set code.
5866      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5867   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5868   if (code == NE
5869       && GET_MODE_CLASS (mode) == MODE_INT
5870       && REG_P (target)
5871       && op0 == target
5872       && op1 == const0_rtx)
5873     {
5874       label = gen_label_rtx ();
5875       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
5876                                NULL_RTX, NULL, label, -1);
5877       emit_move_insn (target, trueval);
5878       emit_label (label);
5879       return target;
5880     }
5881
5882   if (!REG_P (target)
5883       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5884     target = gen_reg_rtx (GET_MODE (target));
5885
5886   /* Jump in the right direction if the target cannot implement CODE
5887      but can jump on its reverse condition.  */
5888   falseval = const0_rtx;
5889   if (! can_compare_p (code, mode, ccp_jump)
5890       && (! FLOAT_MODE_P (mode)
5891           || code == ORDERED || code == UNORDERED
5892           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5893           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5894     {
5895       enum rtx_code rcode;
5896       if (FLOAT_MODE_P (mode))
5897         rcode = reverse_condition_maybe_unordered (code);
5898       else
5899         rcode = reverse_condition (code);
5900
5901       /* Canonicalize to UNORDERED for the libcall.  */
5902       if (can_compare_p (rcode, mode, ccp_jump)
5903           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5904         {
5905           falseval = trueval;
5906           trueval = const0_rtx;
5907           code = rcode;
5908         }
5909     }
5910
5911   emit_move_insn (target, trueval);
5912   label = gen_label_rtx ();
5913   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
5914                            label, -1);
5915
5916   emit_move_insn (target, falseval);
5917   emit_label (label);
5918
5919   return target;
5920 }
5921 \f
5922 /* Perform possibly multi-word comparison and conditional jump to LABEL
5923    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5924    now a thin wrapper around do_compare_rtx_and_jump.  */
5925
5926 static void
5927 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
5928                  rtx_code_label *label)
5929 {
5930   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5931   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
5932                            NULL, label, -1);
5933 }