gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2016 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "backend.h"
  26 #include "target.h"
  27 #include "rtl.h"
  28 #include "tree.h"
  29 #include "predict.h"
  30 #include "tm_p.h"
  31 #include "expmed.h"
  32 #include "optabs.h"
  33 #include "emit-rtl.h"
  34 #include "diagnostic-core.h"
  35 #include "fold-const.h"
  36 #include "stor-layout.h"
  37 #include "dojump.h"
  38 #include "explow.h"
  39 #include "expr.h"
  40 #include "langhooks.h"
  41
  42 struct target_expmed default_target_expmed;
  43 #if SWITCHABLE_TARGET
  44 struct target_expmed *this_target_expmed = &default_target_expmed;
  45 #endif
  46
  47 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  48                                    unsigned HOST_WIDE_INT,
  49                                    unsigned HOST_WIDE_INT,
  50                                    unsigned HOST_WIDE_INT,
  51                                    rtx, bool);
  52 static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT,
  53                                      unsigned HOST_WIDE_INT,
  54                                      rtx, bool);
  55 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  56                                    unsigned HOST_WIDE_INT,
  57                                    unsigned HOST_WIDE_INT,
  58                                    unsigned HOST_WIDE_INT,
  59                                    rtx, bool);
  60 static rtx extract_fixed_bit_field (machine_mode, rtx,
  61                                     unsigned HOST_WIDE_INT,
  62                                     unsigned HOST_WIDE_INT, rtx, int, bool);
  63 static rtx extract_fixed_bit_field_1 (machine_mode, rtx,
  64                                       unsigned HOST_WIDE_INT,
  65                                       unsigned HOST_WIDE_INT, rtx, int, bool);
  66 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
  67 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  68                                     unsigned HOST_WIDE_INT, int, bool);
  69 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
  70 static rtx expand_smod_pow2 (machine_mode, rtx, HOST_WIDE_INT);
  71 static rtx expand_sdiv_pow2 (machine_mode, rtx, HOST_WIDE_INT);
  72
  73 /* Return a constant integer mask value of mode MODE with BITSIZE ones
  74    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
  75    The mask is truncated if necessary to the width of mode MODE.  The
  76    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
  77
  78 static inline rtx
  79 mask_rtx (machine_mode mode, int bitpos, int bitsize, bool complement)
  80 {
  81   return immed_wide_int_const
  82     (wi::shifted_mask (bitpos, bitsize, complement,
  83                        GET_MODE_PRECISION (mode)), mode);
  84 }
  85
  86 /* Test whether a value is zero of a power of two.  */
  87 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
  88   (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
  89
  90 struct init_expmed_rtl
  91 {
  92   rtx reg;
  93   rtx plus;
  94   rtx neg;
  95   rtx mult;
  96   rtx sdiv;
  97   rtx udiv;
  98   rtx sdiv_32;
  99   rtx smod_32;
 100   rtx wide_mult;
 101   rtx wide_lshr;
 102   rtx wide_trunc;
 103   rtx shift;
 104   rtx shift_mult;
 105   rtx shift_add;
 106   rtx shift_sub0;
 107   rtx shift_sub1;
 108   rtx zext;
 109   rtx trunc;
 110
 111   rtx pow2[MAX_BITS_PER_WORD];
 112   rtx cint[MAX_BITS_PER_WORD];
 113 };
 114
 115 static void
 116 init_expmed_one_conv (struct init_expmed_rtl *all, machine_mode to_mode,
 117                       machine_mode from_mode, bool speed)
 118 {
 119   int to_size, from_size;
 120   rtx which;
 121
 122   to_size = GET_MODE_PRECISION (to_mode);
 123   from_size = GET_MODE_PRECISION (from_mode);
 124
 125   /* Most partial integers have a precision less than the "full"
 126      integer it requires for storage.  In case one doesn't, for
 127      comparison purposes here, reduce the bit size by one in that
 128      case.  */
 129   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
 130       && exact_log2 (to_size) != -1)
 131     to_size --;
 132   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
 133       && exact_log2 (from_size) != -1)
 134     from_size --;
 135
 136   /* Assume cost of zero-extend and sign-extend is the same.  */
 137   which = (to_size < from_size ? all->trunc : all->zext);
 138
 139   PUT_MODE (all->reg, from_mode);
 140   set_convert_cost (to_mode, from_mode, speed,
 141                     set_src_cost (which, to_mode, speed));
 142 }
 143
 144 static void
 145 init_expmed_one_mode (struct init_expmed_rtl *all,
 146                       machine_mode mode, int speed)
 147 {
 148   int m, n, mode_bitsize;
 149   machine_mode mode_from;
 150
 151   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 152
 153   PUT_MODE (all->reg, mode);
 154   PUT_MODE (all->plus, mode);
 155   PUT_MODE (all->neg, mode);
 156   PUT_MODE (all->mult, mode);
 157   PUT_MODE (all->sdiv, mode);
 158   PUT_MODE (all->udiv, mode);
 159   PUT_MODE (all->sdiv_32, mode);
 160   PUT_MODE (all->smod_32, mode);
 161   PUT_MODE (all->wide_trunc, mode);
 162   PUT_MODE (all->shift, mode);
 163   PUT_MODE (all->shift_mult, mode);
 164   PUT_MODE (all->shift_add, mode);
 165   PUT_MODE (all->shift_sub0, mode);
 166   PUT_MODE (all->shift_sub1, mode);
 167   PUT_MODE (all->zext, mode);
 168   PUT_MODE (all->trunc, mode);
 169
 170   set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
 171   set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
 172   set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
 173   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
 174   set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
 175
 176   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
 177                                      <= 2 * add_cost (speed, mode)));
 178   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
 179                                      <= 4 * add_cost (speed, mode)));
 180
 181   set_shift_cost (speed, mode, 0, 0);
 182   {
 183     int cost = add_cost (speed, mode);
 184     set_shiftadd_cost (speed, mode, 0, cost);
 185     set_shiftsub0_cost (speed, mode, 0, cost);
 186     set_shiftsub1_cost (speed, mode, 0, cost);
 187   }
 188
 189   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 190   for (m = 1; m < n; m++)
 191     {
 192       XEXP (all->shift, 1) = all->cint[m];
 193       XEXP (all->shift_mult, 1) = all->pow2[m];
 194
 195       set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
 196       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
 197                                                        speed));
 198       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
 199                                                         speed));
 200       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
 201                                                         speed));
 202     }
 203
 204   if (SCALAR_INT_MODE_P (mode))
 205     {
 206       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 207            mode_from = (machine_mode)(mode_from + 1))
 208         init_expmed_one_conv (all, mode, mode_from, speed);
 209     }
 210   if (GET_MODE_CLASS (mode) == MODE_INT)
 211     {
 212       machine_mode  wider_mode = GET_MODE_WIDER_MODE (mode);
 213       if (wider_mode != VOIDmode)
 214         {
 215           PUT_MODE (all->zext, wider_mode);
 216           PUT_MODE (all->wide_mult, wider_mode);
 217           PUT_MODE (all->wide_lshr, wider_mode);
 218           XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize);
 219
 220           set_mul_widen_cost (speed, wider_mode,
 221                               set_src_cost (all->wide_mult, wider_mode, speed));
 222           set_mul_highpart_cost (speed, mode,
 223                                  set_src_cost (all->wide_trunc, mode, speed));
 224         }
 225     }
 226 }
 227
 228 void
 229 init_expmed (void)
 230 {
 231   struct init_expmed_rtl all;
 232   machine_mode mode = QImode;
 233   int m, speed;
 234
 235   memset (&all, 0, sizeof all);
 236   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 237     {
 238       all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
 239       all.cint[m] = GEN_INT (m);
 240     }
 241
 242   /* Avoid using hard regs in ways which may be unsupported.  */
 243   all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
 244   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
 245   all.neg = gen_rtx_NEG (mode, all.reg);
 246   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
 247   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
 248   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
 249   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
 250   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
 251   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
 252   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
 253   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
 254   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
 255   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
 256   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
 257   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
 258   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
 259   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
 260   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
 261
 262   for (speed = 0; speed < 2; speed++)
 263     {
 264       crtl->maybe_hot_insn_p = speed;
 265       set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
 266
 267       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 268            mode = (machine_mode)(mode + 1))
 269         init_expmed_one_mode (&all, mode, speed);
 270
 271       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 272         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 273              mode = (machine_mode)(mode + 1))
 274           init_expmed_one_mode (&all, mode, speed);
 275
 276       if (MIN_MODE_VECTOR_INT != VOIDmode)
 277         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 278              mode = (machine_mode)(mode + 1))
 279           init_expmed_one_mode (&all, mode, speed);
 280     }
 281
 282   if (alg_hash_used_p ())
 283     {
 284       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 285       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 286     }
 287   else
 288     set_alg_hash_used_p (true);
 289   default_rtl_profile ();
 290
 291   ggc_free (all.trunc);
 292   ggc_free (all.shift_sub1);
 293   ggc_free (all.shift_sub0);
 294   ggc_free (all.shift_add);
 295   ggc_free (all.shift_mult);
 296   ggc_free (all.shift);
 297   ggc_free (all.wide_trunc);
 298   ggc_free (all.wide_lshr);
 299   ggc_free (all.wide_mult);
 300   ggc_free (all.zext);
 301   ggc_free (all.smod_32);
 302   ggc_free (all.sdiv_32);
 303   ggc_free (all.udiv);
 304   ggc_free (all.sdiv);
 305   ggc_free (all.mult);
 306   ggc_free (all.neg);
 307   ggc_free (all.plus);
 308   ggc_free (all.reg);
 309 }
 310
 311 /* Return an rtx representing minus the value of X.
 312    MODE is the intended mode of the result,
 313    useful if X is a CONST_INT.  */
 314
 315 rtx
 316 negate_rtx (machine_mode mode, rtx x)
 317 {
 318   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 319
 320   if (result == 0)
 321     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 322
 323   return result;
 324 }
 325
 326 /* Whether reverse storage order is supported on the target.  */
 327 static int reverse_storage_order_supported = -1;
 328
 329 /* Check whether reverse storage order is supported on the target.  */
 330
 331 static void
 332 check_reverse_storage_order_support (void)
 333 {
 334   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 335     {
 336       reverse_storage_order_supported = 0;
 337       sorry ("reverse scalar storage order");
 338     }
 339   else
 340     reverse_storage_order_supported = 1;
 341 }
 342
 343 /* Whether reverse FP storage order is supported on the target.  */
 344 static int reverse_float_storage_order_supported = -1;
 345
 346 /* Check whether reverse FP storage order is supported on the target.  */
 347
 348 static void
 349 check_reverse_float_storage_order_support (void)
 350 {
 351   if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 352     {
 353       reverse_float_storage_order_supported = 0;
 354       sorry ("reverse floating-point scalar storage order");
 355     }
 356   else
 357     reverse_float_storage_order_supported = 1;
 358 }
 359
 360 /* Return an rtx representing value of X with reverse storage order.
 361    MODE is the intended mode of the result,
 362    useful if X is a CONST_INT.  */
 363
 364 rtx
 365 flip_storage_order (enum machine_mode mode, rtx x)
 366 {
 367   enum machine_mode int_mode;
 368   rtx result;
 369
 370   if (mode == QImode)
 371     return x;
 372
 373   if (COMPLEX_MODE_P (mode))
 374     {
 375       rtx real = read_complex_part (x, false);
 376       rtx imag = read_complex_part (x, true);
 377
 378       real = flip_storage_order (GET_MODE_INNER (mode), real);
 379       imag = flip_storage_order (GET_MODE_INNER (mode), imag);
 380
 381       return gen_rtx_CONCAT (mode, real, imag);
 382     }
 383
 384   if (__builtin_expect (reverse_storage_order_supported < 0, 0))
 385     check_reverse_storage_order_support ();
 386
 387   if (SCALAR_INT_MODE_P (mode))
 388     int_mode = mode;
 389   else
 390     {
 391       if (FLOAT_MODE_P (mode)
 392           && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
 393         check_reverse_float_storage_order_support ();
 394
 395       int_mode = mode_for_size (GET_MODE_PRECISION (mode), MODE_INT, 0);
 396       if (int_mode == BLKmode)
 397         {
 398           sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
 399           return x;
 400         }
 401       x = gen_lowpart (int_mode, x);
 402     }
 403
 404   result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
 405   if (result == 0)
 406     result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
 407
 408   if (int_mode != mode)
 409     result = gen_lowpart (mode, result);
 410
 411   return result;
 412 }
 413
 414 /* Adjust bitfield memory MEM so that it points to the first unit of mode
 415    MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
 416    If MODE is BLKmode, return a reference to every byte in the bitfield.
 417    Set *NEW_BITNUM to the bit position of the field within the new memory.  */
 418
 419 static rtx
 420 narrow_bit_field_mem (rtx mem, machine_mode mode,
 421                       unsigned HOST_WIDE_INT bitsize,
 422                       unsigned HOST_WIDE_INT bitnum,
 423                       unsigned HOST_WIDE_INT *new_bitnum)
 424 {
 425   if (mode == BLKmode)
 426     {
 427       *new_bitnum = bitnum % BITS_PER_UNIT;
 428       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 429       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 430                             / BITS_PER_UNIT);
 431       return adjust_bitfield_address_size (mem, mode, offset, size);
 432     }
 433   else
 434     {
 435       unsigned int unit = GET_MODE_BITSIZE (mode);
 436       *new_bitnum = bitnum % unit;
 437       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 438       return adjust_bitfield_address (mem, mode, offset);
 439     }
 440 }
 441
 442 /* The caller wants to perform insertion or extraction PATTERN on a
 443    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 444    BITREGION_START and BITREGION_END are as for store_bit_field
 445    and FIELDMODE is the natural mode of the field.
 446
 447    Search for a mode that is compatible with the memory access
 448    restrictions and (where applicable) with a register insertion or
 449    extraction.  Return the new memory on success, storing the adjusted
 450    bit position in *NEW_BITNUM.  Return null otherwise.  */
 451
 452 static rtx
 453 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 454                               rtx op0, HOST_WIDE_INT bitsize,
 455                               HOST_WIDE_INT bitnum,
 456                               unsigned HOST_WIDE_INT bitregion_start,
 457                               unsigned HOST_WIDE_INT bitregion_end,
 458                               machine_mode fieldmode,
 459                               unsigned HOST_WIDE_INT *new_bitnum)
 460 {
 461   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 462                                 bitregion_end, MEM_ALIGN (op0),
 463                                 MEM_VOLATILE_P (op0));
 464   machine_mode best_mode;
 465   if (iter.next_mode (&best_mode))
 466     {
 467       /* We can use a memory in BEST_MODE.  See whether this is true for
 468          any wider modes.  All other things being equal, we prefer to
 469          use the widest mode possible because it tends to expose more
 470          CSE opportunities.  */
 471       if (!iter.prefer_smaller_modes ())
 472         {
 473           /* Limit the search to the mode required by the corresponding
 474              register insertion or extraction instruction, if any.  */
 475           machine_mode limit_mode = word_mode;
 476           extraction_insn insn;
 477           if (get_best_reg_extraction_insn (&insn, pattern,
 478                                             GET_MODE_BITSIZE (best_mode),
 479                                             fieldmode))
 480             limit_mode = insn.field_mode;
 481
 482           machine_mode wider_mode;
 483           while (iter.next_mode (&wider_mode)
 484                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 485             best_mode = wider_mode;
 486         }
 487       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 488                                    new_bitnum);
 489     }
 490   return NULL_RTX;
 491 }
 492
 493 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 494    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 495    offset is then BITNUM / BITS_PER_UNIT.  */
 496
 497 static bool
 498 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
 499                      unsigned HOST_WIDE_INT bitsize,
 500                      machine_mode struct_mode)
 501 {
 502   if (BYTES_BIG_ENDIAN)
 503     return (bitnum % BITS_PER_UNIT == 0
 504             && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
 505                 || (bitnum + bitsize) % BITS_PER_WORD == 0));
 506   else
 507     return bitnum % BITS_PER_WORD == 0;
 508 }
 509
 510 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
 511    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
 512    Return false if the access would touch memory outside the range
 513    BITREGION_START to BITREGION_END for conformance to the C++ memory
 514    model.  */
 515
 516 static bool
 517 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 518                             unsigned HOST_WIDE_INT bitnum,
 519                             machine_mode fieldmode,
 520                             unsigned HOST_WIDE_INT bitregion_start,
 521                             unsigned HOST_WIDE_INT bitregion_end)
 522 {
 523   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
 524
 525   /* -fstrict-volatile-bitfields must be enabled and we must have a
 526      volatile MEM.  */
 527   if (!MEM_P (op0)
 528       || !MEM_VOLATILE_P (op0)
 529       || flag_strict_volatile_bitfields <= 0)
 530     return false;
 531
 532   /* Non-integral modes likely only happen with packed structures.
 533      Punt.  */
 534   if (!SCALAR_INT_MODE_P (fieldmode))
 535     return false;
 536
 537   /* The bit size must not be larger than the field mode, and
 538      the field mode must not be larger than a word.  */
 539   if (bitsize > modesize || modesize > BITS_PER_WORD)
 540     return false;
 541
 542   /* Check for cases of unaligned fields that must be split.  */
 543   if (bitnum % modesize + bitsize > modesize)
 544     return false;
 545
 546   /* The memory must be sufficiently aligned for a MODESIZE access.
 547      This condition guarantees, that the memory access will not
 548      touch anything after the end of the structure.  */
 549   if (MEM_ALIGN (op0) < modesize)
 550     return false;
 551
 552   /* Check for cases where the C++ memory model applies.  */
 553   if (bitregion_end != 0
 554       && (bitnum - bitnum % modesize < bitregion_start
 555           || bitnum - bitnum % modesize + modesize - 1 > bitregion_end))
 556     return false;
 557
 558   return true;
 559 }
 560
 561 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 562    bit number BITNUM can be treated as a simple value of mode MODE.  */
 563
 564 static bool
 565 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 566                        unsigned HOST_WIDE_INT bitnum, machine_mode mode)
 567 {
 568   return (MEM_P (op0)
 569           && bitnum % BITS_PER_UNIT == 0
 570           && bitsize == GET_MODE_BITSIZE (mode)
 571           && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
 572               || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
 573                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 574 }
 575 \f
 576 /* Try to use instruction INSV to store VALUE into a field of OP0.
 577    BITSIZE and BITNUM are as for store_bit_field.  */
 578
 579 static bool
 580 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 581                             unsigned HOST_WIDE_INT bitsize,
 582                             unsigned HOST_WIDE_INT bitnum,
 583                             rtx value)
 584 {
 585   struct expand_operand ops[4];
 586   rtx value1;
 587   rtx xop0 = op0;
 588   rtx_insn *last = get_last_insn ();
 589   bool copy_back = false;
 590
 591   machine_mode op_mode = insv->field_mode;
 592   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 593   if (bitsize == 0 || bitsize > unit)
 594     return false;
 595
 596   if (MEM_P (xop0))
 597     /* Get a reference to the first byte of the field.  */
 598     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 599                                  &bitnum);
 600   else
 601     {
 602       /* Convert from counting within OP0 to counting in OP_MODE.  */
 603       if (BYTES_BIG_ENDIAN)
 604         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 605
 606       /* If xop0 is a register, we need it in OP_MODE
 607          to make it acceptable to the format of insv.  */
 608       if (GET_CODE (xop0) == SUBREG)
 609         /* We can't just change the mode, because this might clobber op0,
 610            and we will need the original value of op0 if insv fails.  */
 611         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 612       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 613         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 614     }
 615
 616   /* If the destination is a paradoxical subreg such that we need a
 617      truncate to the inner mode, perform the insertion on a temporary and
 618      truncate the result to the original destination.  Note that we can't
 619      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 620      X) 0)) is (reg:N X).  */
 621   if (GET_CODE (xop0) == SUBREG
 622       && REG_P (SUBREG_REG (xop0))
 623       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 624                                          op_mode))
 625     {
 626       rtx tem = gen_reg_rtx (op_mode);
 627       emit_move_insn (tem, xop0);
 628       xop0 = tem;
 629       copy_back = true;
 630     }
 631
 632   /* There are similar overflow check at the start of store_bit_field_1,
 633      but that only check the situation where the field lies completely
 634      outside the register, while there do have situation where the field
 635      lies partialy in the register, we need to adjust bitsize for this
 636      partial overflow situation.  Without this fix, pr48335-2.c on big-endian
 637      will broken on those arch support bit insert instruction, like arm, aarch64
 638      etc.  */
 639   if (bitsize + bitnum > unit && bitnum < unit)
 640     {
 641       warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
 642                "destination object, data truncated into %wu-bit",
 643                bitsize, unit - bitnum);
 644       bitsize = unit - bitnum;
 645     }
 646
 647   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 648      "backwards" from the size of the unit we are inserting into.
 649      Otherwise, we count bits from the most significant on a
 650      BYTES/BITS_BIG_ENDIAN machine.  */
 651
 652   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 653     bitnum = unit - bitsize - bitnum;
 654
 655   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 656   value1 = value;
 657   if (GET_MODE (value) != op_mode)
 658     {
 659       if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 660         {
 661           rtx tmp;
 662           /* Optimization: Don't bother really extending VALUE
 663              if it has all the bits we will actually use.  However,
 664              if we must narrow it, be sure we do it correctly.  */
 665
 666           if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 667             {
 668               tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 669               if (! tmp)
 670                 tmp = simplify_gen_subreg (op_mode,
 671                                            force_reg (GET_MODE (value),
 672                                                       value1),
 673                                            GET_MODE (value), 0);
 674             }
 675           else
 676             {
 677               tmp = gen_lowpart_if_possible (op_mode, value1);
 678               if (! tmp)
 679                 tmp = gen_lowpart (op_mode, force_reg (GET_MODE (value),
 680                                                        value1));
 681             }
 682           value1 = tmp;
 683         }
 684       else if (CONST_INT_P (value))
 685         value1 = gen_int_mode (INTVAL (value), op_mode);
 686       else
 687         /* Parse phase is supposed to make VALUE's data type
 688            match that of the component reference, which is a type
 689            at least as wide as the field; so VALUE should have
 690            a mode that corresponds to that type.  */
 691         gcc_assert (CONSTANT_P (value));
 692     }
 693
 694   create_fixed_operand (&ops[0], xop0);
 695   create_integer_operand (&ops[1], bitsize);
 696   create_integer_operand (&ops[2], bitnum);
 697   create_input_operand (&ops[3], value1, op_mode);
 698   if (maybe_expand_insn (insv->icode, 4, ops))
 699     {
 700       if (copy_back)
 701         convert_move (op0, xop0, true);
 702       return true;
 703     }
 704   delete_insns_since (last);
 705   return false;
 706 }
 707
 708 /* A subroutine of store_bit_field, with the same arguments.  Return true
 709    if the operation could be implemented.
 710
 711    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 712    no other way of implementing the operation.  If FALLBACK_P is false,
 713    return false instead.  */
 714
 715 static bool
 716 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 717                    unsigned HOST_WIDE_INT bitnum,
 718                    unsigned HOST_WIDE_INT bitregion_start,
 719                    unsigned HOST_WIDE_INT bitregion_end,
 720                    machine_mode fieldmode,
 721                    rtx value, bool reverse, bool fallback_p)
 722 {
 723   rtx op0 = str_rtx;
 724   rtx orig_value;
 725
 726   while (GET_CODE (op0) == SUBREG)
 727     {
 728       /* The following line once was done only if WORDS_BIG_ENDIAN,
 729          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 730          meaningful at a much higher level; when structures are copied
 731          between memory and regs, the higher-numbered regs
 732          always get higher addresses.  */
 733       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 734       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 735       int byte_offset = 0;
 736
 737       /* Paradoxical subregs need special handling on big-endian machines.  */
 738       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 739         {
 740           int difference = inner_mode_size - outer_mode_size;
 741
 742           if (WORDS_BIG_ENDIAN)
 743             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 744           if (BYTES_BIG_ENDIAN)
 745             byte_offset += difference % UNITS_PER_WORD;
 746         }
 747       else
 748         byte_offset = SUBREG_BYTE (op0);
 749
 750       bitnum += byte_offset * BITS_PER_UNIT;
 751       op0 = SUBREG_REG (op0);
 752     }
 753
 754   /* No action is needed if the target is a register and if the field
 755      lies completely outside that register.  This can occur if the source
 756      code contains an out-of-bounds access to a small array.  */
 757   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 758     return true;
 759
 760   /* Use vec_set patterns for inserting parts of vectors whenever
 761      available.  */
 762   if (VECTOR_MODE_P (GET_MODE (op0))
 763       && !MEM_P (op0)
 764       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
 765       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 766       && bitsize == GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
 767       && !(bitnum % GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
 768     {
 769       struct expand_operand ops[3];
 770       machine_mode outermode = GET_MODE (op0);
 771       machine_mode innermode = GET_MODE_INNER (outermode);
 772       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 773       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 774
 775       create_fixed_operand (&ops[0], op0);
 776       create_input_operand (&ops[1], value, innermode);
 777       create_integer_operand (&ops[2], pos);
 778       if (maybe_expand_insn (icode, 3, ops))
 779         return true;
 780     }
 781
 782   /* If the target is a register, overwriting the entire object, or storing
 783      a full-word or multi-word field can be done with just a SUBREG.  */
 784   if (!MEM_P (op0)
 785       && bitsize == GET_MODE_BITSIZE (fieldmode)
 786       && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
 787           || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
 788     {
 789       /* Use the subreg machinery either to narrow OP0 to the required
 790          words or to cope with mode punning between equal-sized modes.
 791          In the latter case, use subreg on the rhs side, not lhs.  */
 792       rtx sub;
 793
 794       if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
 795         {
 796           sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
 797           if (sub)
 798             {
 799               if (reverse)
 800                 sub = flip_storage_order (GET_MODE (op0), sub);
 801               emit_move_insn (op0, sub);
 802               return true;
 803             }
 804         }
 805       else
 806         {
 807           sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 808                                      bitnum / BITS_PER_UNIT);
 809           if (sub)
 810             {
 811               if (reverse)
 812                 value = flip_storage_order (fieldmode, value);
 813               emit_move_insn (sub, value);
 814               return true;
 815             }
 816         }
 817     }
 818
 819   /* If the target is memory, storing any naturally aligned field can be
 820      done with a simple store.  For targets that support fast unaligned
 821      memory, any naturally sized, unit aligned field can be done directly.  */
 822   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
 823     {
 824       op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
 825       if (reverse)
 826         value = flip_storage_order (fieldmode, value);
 827       emit_move_insn (op0, value);
 828       return true;
 829     }
 830
 831   /* Make sure we are playing with integral modes.  Pun with subregs
 832      if we aren't.  This must come after the entire register case above,
 833      since that case is valid for any mode.  The following cases are only
 834      valid for integral modes.  */
 835   {
 836     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 837     if (imode != GET_MODE (op0))
 838       {
 839         if (MEM_P (op0))
 840           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
 841         else
 842           {
 843             gcc_assert (imode != BLKmode);
 844             op0 = gen_lowpart (imode, op0);
 845           }
 846       }
 847   }
 848
 849   /* Storing an lsb-aligned field in a register
 850      can be done with a movstrict instruction.  */
 851
 852   if (!MEM_P (op0)
 853       && !reverse
 854       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
 855       && bitsize == GET_MODE_BITSIZE (fieldmode)
 856       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 857     {
 858       struct expand_operand ops[2];
 859       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 860       rtx arg0 = op0;
 861       unsigned HOST_WIDE_INT subreg_off;
 862
 863       if (GET_CODE (arg0) == SUBREG)
 864         {
 865           /* Else we've got some float mode source being extracted into
 866              a different float mode destination -- this combination of
 867              subregs results in Severe Tire Damage.  */
 868           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 869                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 870                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 871           arg0 = SUBREG_REG (arg0);
 872         }
 873
 874       subreg_off = bitnum / BITS_PER_UNIT;
 875       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 876         {
 877           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 878
 879           create_fixed_operand (&ops[0], arg0);
 880           /* Shrink the source operand to FIELDMODE.  */
 881           create_convert_operand_to (&ops[1], value, fieldmode, false);
 882           if (maybe_expand_insn (icode, 2, ops))
 883             return true;
 884         }
 885     }
 886
 887   /* Handle fields bigger than a word.  */
 888
 889   if (bitsize > BITS_PER_WORD)
 890     {
 891       /* Here we transfer the words of the field
 892          in the order least significant first.
 893          This is because the most significant word is the one which may
 894          be less than full.
 895          However, only do that if the value is not BLKmode.  */
 896
 897       const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 898       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 899       unsigned int i;
 900       rtx_insn *last;
 901
 902       /* This is the mode we must force value to, so that there will be enough
 903          subwords to extract.  Note that fieldmode will often (always?) be
 904          VOIDmode, because that is what store_field uses to indicate that this
 905          is a bit field, but passing VOIDmode to operand_subword_force
 906          is not allowed.  */
 907       fieldmode = GET_MODE (value);
 908       if (fieldmode == VOIDmode)
 909         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 910
 911       last = get_last_insn ();
 912       for (i = 0; i < nwords; i++)
 913         {
 914           /* If I is 0, use the low-order word in both field and target;
 915              if I is 1, use the next to lowest word; and so on.  */
 916           unsigned int wordnum = (backwards
 917                                   ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
 918                                   - i - 1
 919                                   : i);
 920           unsigned int bit_offset = (backwards ^ reverse
 921                                      ? MAX ((int) bitsize - ((int) i + 1)
 922                                             * BITS_PER_WORD,
 923                                             0)
 924                                      : (int) i * BITS_PER_WORD);
 925           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 926           unsigned HOST_WIDE_INT new_bitsize =
 927             MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 928
 929           /* If the remaining chunk doesn't have full wordsize we have
 930              to make sure that for big-endian machines the higher order
 931              bits are used.  */
 932           if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
 933             value_word = simplify_expand_binop (word_mode, lshr_optab,
 934                                                 value_word,
 935                                                 GEN_INT (BITS_PER_WORD
 936                                                          - new_bitsize),
 937                                                 NULL_RTX, true,
 938                                                 OPTAB_LIB_WIDEN);
 939
 940           if (!store_bit_field_1 (op0, new_bitsize,
 941                                   bitnum + bit_offset,
 942                                   bitregion_start, bitregion_end,
 943                                   word_mode,
 944                                   value_word, reverse, fallback_p))
 945             {
 946               delete_insns_since (last);
 947               return false;
 948             }
 949         }
 950       return true;
 951     }
 952
 953   /* If VALUE has a floating-point or complex mode, access it as an
 954      integer of the corresponding size.  This can occur on a machine
 955      with 64 bit registers that uses SFmode for float.  It can also
 956      occur for unaligned float or complex fields.  */
 957   orig_value = value;
 958   if (GET_MODE (value) != VOIDmode
 959       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 960       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 961     {
 962       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 963       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 964     }
 965
 966   /* If OP0 is a multi-word register, narrow it to the affected word.
 967      If the region spans two words, defer to store_split_bit_field.  */
 968   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 969     {
 970       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
 971         {
 972           if (!fallback_p)
 973             return false;
 974
 975           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
 976                                  bitregion_end, value, reverse);
 977           return true;
 978         }
 979       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
 980                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
 981       gcc_assert (op0);
 982       bitnum %= BITS_PER_WORD;
 983     }
 984
 985   /* From here on we can assume that the field to be stored in fits
 986      within a word.  If the destination is a register, it too fits
 987      in a word.  */
 988
 989   extraction_insn insv;
 990   if (!MEM_P (op0)
 991       && !reverse
 992       && get_best_reg_extraction_insn (&insv, EP_insv,
 993                                        GET_MODE_BITSIZE (GET_MODE (op0)),
 994                                        fieldmode)
 995       && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 996     return true;
 997
 998   /* If OP0 is a memory, try copying it to a register and seeing if a
 999      cheap register alternative is available.  */
1000   if (MEM_P (op0) && !reverse)
1001     {
1002       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1003                                         fieldmode)
1004           && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
1005         return true;
1006
1007       rtx_insn *last = get_last_insn ();
1008
1009       /* Try loading part of OP0 into a register, inserting the bitfield
1010          into that, and then copying the result back to OP0.  */
1011       unsigned HOST_WIDE_INT bitpos;
1012       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1013                                                bitregion_start, bitregion_end,
1014                                                fieldmode, &bitpos);
1015       if (xop0)
1016         {
1017           rtx tempreg = copy_to_reg (xop0);
1018           if (store_bit_field_1 (tempreg, bitsize, bitpos,
1019                                  bitregion_start, bitregion_end,
1020                                  fieldmode, orig_value, reverse, false))
1021             {
1022               emit_move_insn (xop0, tempreg);
1023               return true;
1024             }
1025           delete_insns_since (last);
1026         }
1027     }
1028
1029   if (!fallback_p)
1030     return false;
1031
1032   store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
1033                          bitregion_end, value, reverse);
1034   return true;
1035 }
1036
1037 /* Generate code to store value from rtx VALUE
1038    into a bit-field within structure STR_RTX
1039    containing BITSIZE bits starting at bit BITNUM.
1040
1041    BITREGION_START is bitpos of the first bitfield in this region.
1042    BITREGION_END is the bitpos of the ending bitfield in this region.
1043    These two fields are 0, if the C++ memory model does not apply,
1044    or we are not interested in keeping track of bitfield regions.
1045
1046    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1047
1048    If REVERSE is true, the store is to be done in reverse order.  */
1049
1050 void
1051 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1052                  unsigned HOST_WIDE_INT bitnum,
1053                  unsigned HOST_WIDE_INT bitregion_start,
1054                  unsigned HOST_WIDE_INT bitregion_end,
1055                  machine_mode fieldmode,
1056                  rtx value, bool reverse)
1057 {
1058   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1059   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode,
1060                                   bitregion_start, bitregion_end))
1061     {
1062       /* Storing of a full word can be done with a simple store.
1063          We know here that the field can be accessed with one single
1064          instruction.  For targets that support unaligned memory,
1065          an unaligned access may be necessary.  */
1066       if (bitsize == GET_MODE_BITSIZE (fieldmode))
1067         {
1068           str_rtx = adjust_bitfield_address (str_rtx, fieldmode,
1069                                              bitnum / BITS_PER_UNIT);
1070           if (reverse)
1071             value = flip_storage_order (fieldmode, value);
1072           gcc_assert (bitnum % BITS_PER_UNIT == 0);
1073           emit_move_insn (str_rtx, value);
1074         }
1075       else
1076         {
1077           rtx temp;
1078
1079           str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum,
1080                                           &bitnum);
1081           gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (fieldmode));
1082           temp = copy_to_reg (str_rtx);
1083           if (!store_bit_field_1 (temp, bitsize, bitnum, 0, 0,
1084                                   fieldmode, value, reverse, true))
1085             gcc_unreachable ();
1086
1087           emit_move_insn (str_rtx, temp);
1088         }
1089
1090       return;
1091     }
1092
1093   /* Under the C++0x memory model, we must not touch bits outside the
1094      bit region.  Adjust the address to start at the beginning of the
1095      bit region.  */
1096   if (MEM_P (str_rtx) && bitregion_start > 0)
1097     {
1098       machine_mode bestmode;
1099       HOST_WIDE_INT offset, size;
1100
1101       gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
1102
1103       offset = bitregion_start / BITS_PER_UNIT;
1104       bitnum -= bitregion_start;
1105       size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
1106       bitregion_end -= bitregion_start;
1107       bitregion_start = 0;
1108       bestmode = get_best_mode (bitsize, bitnum,
1109                                 bitregion_start, bitregion_end,
1110                                 MEM_ALIGN (str_rtx), VOIDmode,
1111                                 MEM_VOLATILE_P (str_rtx));
1112       str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
1113     }
1114
1115   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1116                           bitregion_start, bitregion_end,
1117                           fieldmode, value, reverse, true))
1118     gcc_unreachable ();
1119 }
1120 \f
1121 /* Use shifts and boolean operations to store VALUE into a bit field of
1122    width BITSIZE in OP0, starting at bit BITNUM.
1123
1124    If REVERSE is true, the store is to be done in reverse order.  */
1125
1126 static void
1127 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1128                        unsigned HOST_WIDE_INT bitnum,
1129                        unsigned HOST_WIDE_INT bitregion_start,
1130                        unsigned HOST_WIDE_INT bitregion_end,
1131                        rtx value, bool reverse)
1132 {
1133   /* There is a case not handled here:
1134      a structure with a known alignment of just a halfword
1135      and a field split across two aligned halfwords within the structure.
1136      Or likewise a structure with a known alignment of just a byte
1137      and a field split across two bytes.
1138      Such cases are not supposed to be able to occur.  */
1139
1140   if (MEM_P (op0))
1141     {
1142       machine_mode mode = GET_MODE (op0);
1143       if (GET_MODE_BITSIZE (mode) == 0
1144           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
1145         mode = word_mode;
1146       mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1147                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
1148
1149       if (mode == VOIDmode)
1150         {
1151           /* The only way this should occur is if the field spans word
1152              boundaries.  */
1153           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
1154                                  bitregion_end, value, reverse);
1155           return;
1156         }
1157
1158       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1159     }
1160
1161   store_fixed_bit_field_1 (op0, bitsize, bitnum, value, reverse);
1162 }
1163
1164 /* Helper function for store_fixed_bit_field, stores
1165    the bit field always using the MODE of OP0.  */
1166
1167 static void
1168 store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize,
1169                          unsigned HOST_WIDE_INT bitnum,
1170                          rtx value, bool reverse)
1171 {
1172   machine_mode mode;
1173   rtx temp;
1174   int all_zero = 0;
1175   int all_one = 0;
1176
1177   mode = GET_MODE (op0);
1178   gcc_assert (SCALAR_INT_MODE_P (mode));
1179
1180   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1181      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1182
1183   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1184     /* BITNUM is the distance between our msb
1185        and that of the containing datum.
1186        Convert it to the distance from the lsb.  */
1187     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1188
1189   /* Now BITNUM is always the distance between our lsb
1190      and that of OP0.  */
1191
1192   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1193      we must first convert its mode to MODE.  */
1194
1195   if (CONST_INT_P (value))
1196     {
1197       unsigned HOST_WIDE_INT v = UINTVAL (value);
1198
1199       if (bitsize < HOST_BITS_PER_WIDE_INT)
1200         v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1201
1202       if (v == 0)
1203         all_zero = 1;
1204       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1205                 && v == (HOST_WIDE_INT_1U << bitsize) - 1)
1206                || (bitsize == HOST_BITS_PER_WIDE_INT
1207                    && v == HOST_WIDE_INT_M1U))
1208         all_one = 1;
1209
1210       value = lshift_value (mode, v, bitnum);
1211     }
1212   else
1213     {
1214       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1215                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1216
1217       if (GET_MODE (value) != mode)
1218         value = convert_to_mode (mode, value, 1);
1219
1220       if (must_and)
1221         value = expand_binop (mode, and_optab, value,
1222                               mask_rtx (mode, 0, bitsize, 0),
1223                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1224       if (bitnum > 0)
1225         value = expand_shift (LSHIFT_EXPR, mode, value,
1226                               bitnum, NULL_RTX, 1);
1227     }
1228
1229   if (reverse)
1230     value = flip_storage_order (mode, value);
1231
1232   /* Now clear the chosen bits in OP0,
1233      except that if VALUE is -1 we need not bother.  */
1234   /* We keep the intermediates in registers to allow CSE to combine
1235      consecutive bitfield assignments.  */
1236
1237   temp = force_reg (mode, op0);
1238
1239   if (! all_one)
1240     {
1241       rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1242       if (reverse)
1243         mask = flip_storage_order (mode, mask);
1244       temp = expand_binop (mode, and_optab, temp, mask,
1245                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1246       temp = force_reg (mode, temp);
1247     }
1248
1249   /* Now logical-or VALUE into OP0, unless it is zero.  */
1250
1251   if (! all_zero)
1252     {
1253       temp = expand_binop (mode, ior_optab, temp, value,
1254                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1255       temp = force_reg (mode, temp);
1256     }
1257
1258   if (op0 != temp)
1259     {
1260       op0 = copy_rtx (op0);
1261       emit_move_insn (op0, temp);
1262     }
1263 }
1264 \f
1265 /* Store a bit field that is split across multiple accessible memory objects.
1266
1267    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1268    BITSIZE is the field width; BITPOS the position of its first bit
1269    (within the word).
1270    VALUE is the value to store.
1271
1272    If REVERSE is true, the store is to be done in reverse order.
1273
1274    This does not yet handle fields wider than BITS_PER_WORD.  */
1275
1276 static void
1277 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1278                        unsigned HOST_WIDE_INT bitpos,
1279                        unsigned HOST_WIDE_INT bitregion_start,
1280                        unsigned HOST_WIDE_INT bitregion_end,
1281                        rtx value, bool reverse)
1282 {
1283   unsigned int unit, total_bits, bitsdone = 0;
1284
1285   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1286      much at a time.  */
1287   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1288     unit = BITS_PER_WORD;
1289   else
1290     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1291
1292   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1293      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1294      again, and we will mutually recurse forever.  */
1295   if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1296     unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0)));
1297
1298   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1299      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1300      that VALUE might be a floating-point constant.  */
1301   if (CONSTANT_P (value) && !CONST_INT_P (value))
1302     {
1303       rtx word = gen_lowpart_common (word_mode, value);
1304
1305       if (word && (value != word))
1306         value = word;
1307       else
1308         value = gen_lowpart_common (word_mode,
1309                                     force_reg (GET_MODE (value) != VOIDmode
1310                                                ? GET_MODE (value)
1311                                                : word_mode, value));
1312     }
1313
1314   total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1315
1316   while (bitsdone < bitsize)
1317     {
1318       unsigned HOST_WIDE_INT thissize;
1319       unsigned HOST_WIDE_INT thispos;
1320       unsigned HOST_WIDE_INT offset;
1321       rtx part, word;
1322
1323       offset = (bitpos + bitsdone) / unit;
1324       thispos = (bitpos + bitsdone) % unit;
1325
1326       /* When region of bytes we can touch is restricted, decrease
1327          UNIT close to the end of the region as needed.  If op0 is a REG
1328          or SUBREG of REG, don't do this, as there can't be data races
1329          on a register and we can expand shorter code in some cases.  */
1330       if (bitregion_end
1331           && unit > BITS_PER_UNIT
1332           && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1333           && !REG_P (op0)
1334           && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1335         {
1336           unit = unit / 2;
1337           continue;
1338         }
1339
1340       /* THISSIZE must not overrun a word boundary.  Otherwise,
1341          store_fixed_bit_field will call us again, and we will mutually
1342          recurse forever.  */
1343       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1344       thissize = MIN (thissize, unit - thispos);
1345
1346       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1347         {
1348           /* Fetch successively less significant portions.  */
1349           if (CONST_INT_P (value))
1350             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1351                              >> (bitsize - bitsdone - thissize))
1352                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1353           /* Likewise, but the source is little-endian.  */
1354           else if (reverse)
1355             part = extract_fixed_bit_field (word_mode, value, thissize,
1356                                             bitsize - bitsdone - thissize,
1357                                             NULL_RTX, 1, false);
1358           else
1359             {
1360               int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1361               /* The args are chosen so that the last part includes the
1362                  lsb.  Give extract_bit_field the value it needs (with
1363                  endianness compensation) to fetch the piece we want.  */
1364               part = extract_fixed_bit_field (word_mode, value, thissize,
1365                                               total_bits - bitsize + bitsdone,
1366                                               NULL_RTX, 1, false);
1367             }
1368         }
1369       else
1370         {
1371           /* Fetch successively more significant portions.  */
1372           if (CONST_INT_P (value))
1373             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1374                              >> bitsdone)
1375                             & ((HOST_WIDE_INT_1 << thissize) - 1));
1376           /* Likewise, but the source is big-endian.  */
1377           else if (reverse)
1378             part = extract_fixed_bit_field (word_mode, value, thissize,
1379                                             total_bits - bitsdone - thissize,
1380                                             NULL_RTX, 1, false);
1381           else
1382             part = extract_fixed_bit_field (word_mode, value, thissize,
1383                                             bitsdone, NULL_RTX, 1, false);
1384         }
1385
1386       /* If OP0 is a register, then handle OFFSET here.  */
1387       if (SUBREG_P (op0) || REG_P (op0))
1388         {
1389           machine_mode op0_mode = GET_MODE (op0);
1390           if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1391             word = offset ? const0_rtx : op0;
1392           else
1393             word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1394                                           GET_MODE (op0));
1395           offset &= BITS_PER_WORD / unit - 1;
1396         }
1397       else
1398         word = op0;
1399
1400       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1401          it is just an out-of-bounds access.  Ignore it.  */
1402       if (word != const0_rtx)
1403         store_fixed_bit_field (word, thissize, offset * unit + thispos,
1404                                bitregion_start, bitregion_end, part,
1405                                reverse);
1406       bitsdone += thissize;
1407     }
1408 }
1409 \f
1410 /* A subroutine of extract_bit_field_1 that converts return value X
1411    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1412    to extract_bit_field.  */
1413
1414 static rtx
1415 convert_extracted_bit_field (rtx x, machine_mode mode,
1416                              machine_mode tmode, bool unsignedp)
1417 {
1418   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1419     return x;
1420
1421   /* If the x mode is not a scalar integral, first convert to the
1422      integer mode of that size and then access it as a floating-point
1423      value via a SUBREG.  */
1424   if (!SCALAR_INT_MODE_P (tmode))
1425     {
1426       machine_mode smode;
1427
1428       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1429       x = convert_to_mode (smode, x, unsignedp);
1430       x = force_reg (smode, x);
1431       return gen_lowpart (tmode, x);
1432     }
1433
1434   return convert_to_mode (tmode, x, unsignedp);
1435 }
1436
1437 /* Try to use an ext(z)v pattern to extract a field from OP0.
1438    Return the extracted value on success, otherwise return null.
1439    EXT_MODE is the mode of the extraction and the other arguments
1440    are as for extract_bit_field.  */
1441
1442 static rtx
1443 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1444                               unsigned HOST_WIDE_INT bitsize,
1445                               unsigned HOST_WIDE_INT bitnum,
1446                               int unsignedp, rtx target,
1447                               machine_mode mode, machine_mode tmode)
1448 {
1449   struct expand_operand ops[4];
1450   rtx spec_target = target;
1451   rtx spec_target_subreg = 0;
1452   machine_mode ext_mode = extv->field_mode;
1453   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1454
1455   if (bitsize == 0 || unit < bitsize)
1456     return NULL_RTX;
1457
1458   if (MEM_P (op0))
1459     /* Get a reference to the first byte of the field.  */
1460     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1461                                 &bitnum);
1462   else
1463     {
1464       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1465       if (BYTES_BIG_ENDIAN)
1466         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1467
1468       /* If op0 is a register, we need it in EXT_MODE to make it
1469          acceptable to the format of ext(z)v.  */
1470       if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1471         return NULL_RTX;
1472       if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1473         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1474     }
1475
1476   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1477      "backwards" from the size of the unit we are extracting from.
1478      Otherwise, we count bits from the most significant on a
1479      BYTES/BITS_BIG_ENDIAN machine.  */
1480
1481   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1482     bitnum = unit - bitsize - bitnum;
1483
1484   if (target == 0)
1485     target = spec_target = gen_reg_rtx (tmode);
1486
1487   if (GET_MODE (target) != ext_mode)
1488     {
1489       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1490          between the mode of the extraction (word_mode) and the target
1491          mode.  Instead, create a temporary and use convert_move to set
1492          the target.  */
1493       if (REG_P (target)
1494           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1495         {
1496           target = gen_lowpart (ext_mode, target);
1497           if (GET_MODE_PRECISION (ext_mode)
1498               > GET_MODE_PRECISION (GET_MODE (spec_target)))
1499             spec_target_subreg = target;
1500         }
1501       else
1502         target = gen_reg_rtx (ext_mode);
1503     }
1504
1505   create_output_operand (&ops[0], target, ext_mode);
1506   create_fixed_operand (&ops[1], op0);
1507   create_integer_operand (&ops[2], bitsize);
1508   create_integer_operand (&ops[3], bitnum);
1509   if (maybe_expand_insn (extv->icode, 4, ops))
1510     {
1511       target = ops[0].value;
1512       if (target == spec_target)
1513         return target;
1514       if (target == spec_target_subreg)
1515         return spec_target;
1516       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1517     }
1518   return NULL_RTX;
1519 }
1520
1521 /* A subroutine of extract_bit_field, with the same arguments.
1522    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1523    if we can find no other means of implementing the operation.
1524    if FALLBACK_P is false, return NULL instead.  */
1525
1526 static rtx
1527 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1528                      unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1529                      machine_mode mode, machine_mode tmode,
1530                      bool reverse, bool fallback_p)
1531 {
1532   rtx op0 = str_rtx;
1533   machine_mode int_mode;
1534   machine_mode mode1;
1535
1536   if (tmode == VOIDmode)
1537     tmode = mode;
1538
1539   while (GET_CODE (op0) == SUBREG)
1540     {
1541       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1542       op0 = SUBREG_REG (op0);
1543     }
1544
1545   /* If we have an out-of-bounds access to a register, just return an
1546      uninitialized register of the required mode.  This can occur if the
1547      source code contains an out-of-bounds access to a small array.  */
1548   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1549     return gen_reg_rtx (tmode);
1550
1551   if (REG_P (op0)
1552       && mode == GET_MODE (op0)
1553       && bitnum == 0
1554       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1555     {
1556       if (reverse)
1557         op0 = flip_storage_order (mode, op0);
1558       /* We're trying to extract a full register from itself.  */
1559       return op0;
1560     }
1561
1562   /* See if we can get a better vector mode before extracting.  */
1563   if (VECTOR_MODE_P (GET_MODE (op0))
1564       && !MEM_P (op0)
1565       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1566     {
1567       machine_mode new_mode;
1568
1569       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1570         new_mode = MIN_MODE_VECTOR_FLOAT;
1571       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1572         new_mode = MIN_MODE_VECTOR_FRACT;
1573       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1574         new_mode = MIN_MODE_VECTOR_UFRACT;
1575       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1576         new_mode = MIN_MODE_VECTOR_ACCUM;
1577       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1578         new_mode = MIN_MODE_VECTOR_UACCUM;
1579       else
1580         new_mode = MIN_MODE_VECTOR_INT;
1581
1582       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1583         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1584             && targetm.vector_mode_supported_p (new_mode))
1585           break;
1586       if (new_mode != VOIDmode)
1587         op0 = gen_lowpart (new_mode, op0);
1588     }
1589
1590   /* Use vec_extract patterns for extracting parts of vectors whenever
1591      available.  */
1592   if (VECTOR_MODE_P (GET_MODE (op0))
1593       && !MEM_P (op0)
1594       && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1595       && ((bitnum + bitsize - 1) / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
1596           == bitnum / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
1597     {
1598       struct expand_operand ops[3];
1599       machine_mode outermode = GET_MODE (op0);
1600       machine_mode innermode = GET_MODE_INNER (outermode);
1601       enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1602       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1603
1604       create_output_operand (&ops[0], target, innermode);
1605       create_input_operand (&ops[1], op0, outermode);
1606       create_integer_operand (&ops[2], pos);
1607       if (maybe_expand_insn (icode, 3, ops))
1608         {
1609           target = ops[0].value;
1610           if (GET_MODE (target) != mode)
1611             return gen_lowpart (tmode, target);
1612           return target;
1613         }
1614     }
1615
1616   /* Make sure we are playing with integral modes.  Pun with subregs
1617      if we aren't.  */
1618   {
1619     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1620     if (imode != GET_MODE (op0))
1621       {
1622         if (MEM_P (op0))
1623           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1624         else if (imode != BLKmode)
1625           {
1626             op0 = gen_lowpart (imode, op0);
1627
1628             /* If we got a SUBREG, force it into a register since we
1629                aren't going to be able to do another SUBREG on it.  */
1630             if (GET_CODE (op0) == SUBREG)
1631               op0 = force_reg (imode, op0);
1632           }
1633         else
1634           {
1635             HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1636             rtx mem = assign_stack_temp (GET_MODE (op0), size);
1637             emit_move_insn (mem, op0);
1638             op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1639           }
1640       }
1641   }
1642
1643   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1644      If that's wrong, the solution is to test for it and set TARGET to 0
1645      if needed.  */
1646
1647   /* Get the mode of the field to use for atomic access or subreg
1648      conversion.  */
1649   mode1 = mode;
1650   if (SCALAR_INT_MODE_P (tmode))
1651     {
1652       machine_mode try_mode = mode_for_size (bitsize,
1653                                                   GET_MODE_CLASS (tmode), 0);
1654       if (try_mode != BLKmode)
1655         mode1 = try_mode;
1656     }
1657   gcc_assert (mode1 != BLKmode);
1658
1659   /* Extraction of a full MODE1 value can be done with a subreg as long
1660      as the least significant bit of the value is the least significant
1661      bit of either OP0 or a word of OP0.  */
1662   if (!MEM_P (op0)
1663       && !reverse
1664       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1665       && bitsize == GET_MODE_BITSIZE (mode1)
1666       && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1667     {
1668       rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1669                                      bitnum / BITS_PER_UNIT);
1670       if (sub)
1671         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1672     }
1673
1674   /* Extraction of a full MODE1 value can be done with a load as long as
1675      the field is on a byte boundary and is sufficiently aligned.  */
1676   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1677     {
1678       op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1679       if (reverse)
1680         op0 = flip_storage_order (mode1, op0);
1681       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1682     }
1683
1684   /* Handle fields bigger than a word.  */
1685
1686   if (bitsize > BITS_PER_WORD)
1687     {
1688       /* Here we transfer the words of the field
1689          in the order least significant first.
1690          This is because the most significant word is the one which may
1691          be less than full.  */
1692
1693       const bool backwards = WORDS_BIG_ENDIAN;
1694       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1695       unsigned int i;
1696       rtx_insn *last;
1697
1698       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1699         target = gen_reg_rtx (mode);
1700
1701       /* In case we're about to clobber a base register or something
1702          (see gcc.c-torture/execute/20040625-1.c).   */
1703       if (reg_mentioned_p (target, str_rtx))
1704         target = gen_reg_rtx (mode);
1705
1706       /* Indicate for flow that the entire target reg is being set.  */
1707       emit_clobber (target);
1708
1709       last = get_last_insn ();
1710       for (i = 0; i < nwords; i++)
1711         {
1712           /* If I is 0, use the low-order word in both field and target;
1713              if I is 1, use the next to lowest word; and so on.  */
1714           /* Word number in TARGET to use.  */
1715           unsigned int wordnum
1716             = (backwards
1717                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1718                : i);
1719           /* Offset from start of field in OP0.  */
1720           unsigned int bit_offset = (backwards ^ reverse
1721                                      ? MAX ((int) bitsize - ((int) i + 1)
1722                                             * BITS_PER_WORD,
1723                                             0)
1724                                      : (int) i * BITS_PER_WORD);
1725           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1726           rtx result_part
1727             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1728                                              bitsize - i * BITS_PER_WORD),
1729                                    bitnum + bit_offset, 1, target_part,
1730                                    mode, word_mode, reverse, fallback_p);
1731
1732           gcc_assert (target_part);
1733           if (!result_part)
1734             {
1735               delete_insns_since (last);
1736               return NULL;
1737             }
1738
1739           if (result_part != target_part)
1740             emit_move_insn (target_part, result_part);
1741         }
1742
1743       if (unsignedp)
1744         {
1745           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1746              need to be zero'd out.  */
1747           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1748             {
1749               unsigned int i, total_words;
1750
1751               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1752               for (i = nwords; i < total_words; i++)
1753                 emit_move_insn
1754                   (operand_subword (target,
1755                                     backwards ? total_words - i - 1 : i,
1756                                     1, VOIDmode),
1757                    const0_rtx);
1758             }
1759           return target;
1760         }
1761
1762       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1763       target = expand_shift (LSHIFT_EXPR, mode, target,
1764                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1765       return expand_shift (RSHIFT_EXPR, mode, target,
1766                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1767     }
1768
1769   /* If OP0 is a multi-word register, narrow it to the affected word.
1770      If the region spans two words, defer to extract_split_bit_field.  */
1771   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1772     {
1773       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1774         {
1775           if (!fallback_p)
1776             return NULL_RTX;
1777           target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
1778                                             reverse);
1779           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1780         }
1781       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1782                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1783       bitnum %= BITS_PER_WORD;
1784     }
1785
1786   /* From here on we know the desired field is smaller than a word.
1787      If OP0 is a register, it too fits within a word.  */
1788   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1789   extraction_insn extv;
1790   if (!MEM_P (op0)
1791       && !reverse
1792       /* ??? We could limit the structure size to the part of OP0 that
1793          contains the field, with appropriate checks for endianness
1794          and TRULY_NOOP_TRUNCATION.  */
1795       && get_best_reg_extraction_insn (&extv, pattern,
1796                                        GET_MODE_BITSIZE (GET_MODE (op0)),
1797                                        tmode))
1798     {
1799       rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1800                                                  unsignedp, target, mode,
1801                                                  tmode);
1802       if (result)
1803         return result;
1804     }
1805
1806   /* If OP0 is a memory, try copying it to a register and seeing if a
1807      cheap register alternative is available.  */
1808   if (MEM_P (op0) & !reverse)
1809     {
1810       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1811                                         tmode))
1812         {
1813           rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1814                                                      bitnum, unsignedp,
1815                                                      target, mode,
1816                                                      tmode);
1817           if (result)
1818             return result;
1819         }
1820
1821       rtx_insn *last = get_last_insn ();
1822
1823       /* Try loading part of OP0 into a register and extracting the
1824          bitfield from that.  */
1825       unsigned HOST_WIDE_INT bitpos;
1826       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1827                                                0, 0, tmode, &bitpos);
1828       if (xop0)
1829         {
1830           xop0 = copy_to_reg (xop0);
1831           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1832                                             unsignedp, target,
1833                                             mode, tmode, reverse, false);
1834           if (result)
1835             return result;
1836           delete_insns_since (last);
1837         }
1838     }
1839
1840   if (!fallback_p)
1841     return NULL;
1842
1843   /* Find a correspondingly-sized integer field, so we can apply
1844      shifts and masks to it.  */
1845   int_mode = int_mode_for_mode (tmode);
1846   if (int_mode == BLKmode)
1847     int_mode = int_mode_for_mode (mode);
1848   /* Should probably push op0 out to memory and then do a load.  */
1849   gcc_assert (int_mode != BLKmode);
1850
1851   target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum, target,
1852                                     unsignedp, reverse);
1853
1854   /* Complex values must be reversed piecewise, so we need to undo the global
1855      reversal, convert to the complex mode and reverse again.  */
1856   if (reverse && COMPLEX_MODE_P (tmode))
1857     {
1858       target = flip_storage_order (int_mode, target);
1859       target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1860       target = flip_storage_order (tmode, target);
1861     }
1862   else
1863     target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1864
1865   return target;
1866 }
1867
1868 /* Generate code to extract a byte-field from STR_RTX
1869    containing BITSIZE bits, starting at BITNUM,
1870    and put it in TARGET if possible (if TARGET is nonzero).
1871    Regardless of TARGET, we return the rtx for where the value is placed.
1872
1873    STR_RTX is the structure containing the byte (a REG or MEM).
1874    UNSIGNEDP is nonzero if this is an unsigned bit field.
1875    MODE is the natural mode of the field value once extracted.
1876    TMODE is the mode the caller would like the value to have;
1877    but the value may be returned with type MODE instead.
1878
1879    If REVERSE is true, the extraction is to be done in reverse order.
1880
1881    If a TARGET is specified and we can store in it at no extra cost,
1882    we do so, and return TARGET.
1883    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1884    if they are equally easy.  */
1885
1886 rtx
1887 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1888                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1889                    machine_mode mode, machine_mode tmode, bool reverse)
1890 {
1891   machine_mode mode1;
1892
1893   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1894   if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1895     mode1 = GET_MODE (str_rtx);
1896   else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1897     mode1 = GET_MODE (target);
1898   else
1899     mode1 = tmode;
1900
1901   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0))
1902     {
1903       /* Extraction of a full MODE1 value can be done with a simple load.
1904          We know here that the field can be accessed with one single
1905          instruction.  For targets that support unaligned memory,
1906          an unaligned access may be necessary.  */
1907       if (bitsize == GET_MODE_BITSIZE (mode1))
1908         {
1909           rtx result = adjust_bitfield_address (str_rtx, mode1,
1910                                                 bitnum / BITS_PER_UNIT);
1911           if (reverse)
1912             result = flip_storage_order (mode1, result);
1913           gcc_assert (bitnum % BITS_PER_UNIT == 0);
1914           return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1915         }
1916
1917       str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum,
1918                                       &bitnum);
1919       gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (mode1));
1920       str_rtx = copy_to_reg (str_rtx);
1921     }
1922
1923   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1924                               target, mode, tmode, reverse, true);
1925 }
1926 \f
1927 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1928    from bit BITNUM of OP0.
1929
1930    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1931    If REVERSE is true, the extraction is to be done in reverse order.
1932
1933    If TARGET is nonzero, attempts to store the value there
1934    and return TARGET, but this is not guaranteed.
1935    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1936
1937 static rtx
1938 extract_fixed_bit_field (machine_mode tmode, rtx op0,
1939                          unsigned HOST_WIDE_INT bitsize,
1940                          unsigned HOST_WIDE_INT bitnum, rtx target,
1941                          int unsignedp, bool reverse)
1942 {
1943   if (MEM_P (op0))
1944     {
1945       machine_mode mode
1946         = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode,
1947                          MEM_VOLATILE_P (op0));
1948
1949       if (mode == VOIDmode)
1950         /* The only way this should occur is if the field spans word
1951            boundaries.  */
1952         return extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
1953                                         reverse);
1954
1955       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1956     }
1957
1958   return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum,
1959                                     target, unsignedp, reverse);
1960 }
1961
1962 /* Helper function for extract_fixed_bit_field, extracts
1963    the bit field always using the MODE of OP0.  */
1964
1965 static rtx
1966 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0,
1967                            unsigned HOST_WIDE_INT bitsize,
1968                            unsigned HOST_WIDE_INT bitnum, rtx target,
1969                            int unsignedp, bool reverse)
1970 {
1971   machine_mode mode = GET_MODE (op0);
1972   gcc_assert (SCALAR_INT_MODE_P (mode));
1973
1974   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1975      for invalid input, such as extract equivalent of f5 from
1976      gcc.dg/pr48335-2.c.  */
1977
1978   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1979     /* BITNUM is the distance between our msb and that of OP0.
1980        Convert it to the distance from the lsb.  */
1981     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1982
1983   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1984      We have reduced the big-endian case to the little-endian case.  */
1985   if (reverse)
1986     op0 = flip_storage_order (mode, op0);
1987
1988   if (unsignedp)
1989     {
1990       if (bitnum)
1991         {
1992           /* If the field does not already start at the lsb,
1993              shift it so it does.  */
1994           /* Maybe propagate the target for the shift.  */
1995           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1996           if (tmode != mode)
1997             subtarget = 0;
1998           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
1999         }
2000       /* Convert the value to the desired mode.  */
2001       if (mode != tmode)
2002         op0 = convert_to_mode (tmode, op0, 1);
2003
2004       /* Unless the msb of the field used to be the msb when we shifted,
2005          mask out the upper bits.  */
2006
2007       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2008         return expand_binop (GET_MODE (op0), and_optab, op0,
2009                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
2010                              target, 1, OPTAB_LIB_WIDEN);
2011       return op0;
2012     }
2013
2014   /* To extract a signed bit-field, first shift its msb to the msb of the word,
2015      then arithmetic-shift its lsb to the lsb of the word.  */
2016   op0 = force_reg (mode, op0);
2017
2018   /* Find the narrowest integer mode that contains the field.  */
2019
2020   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
2021        mode = GET_MODE_WIDER_MODE (mode))
2022     if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
2023       {
2024         op0 = convert_to_mode (mode, op0, 0);
2025         break;
2026       }
2027
2028   if (mode != tmode)
2029     target = 0;
2030
2031   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2032     {
2033       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2034       /* Maybe propagate the target for the shift.  */
2035       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2036       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2037     }
2038
2039   return expand_shift (RSHIFT_EXPR, mode, op0,
2040                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2041 }
2042
2043 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2044    VALUE << BITPOS.  */
2045
2046 static rtx
2047 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2048               int bitpos)
2049 {
2050   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2051 }
2052 \f
2053 /* Extract a bit field that is split across two words
2054    and return an RTX for the result.
2055
2056    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2057    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2058    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2059
2060    If REVERSE is true, the extraction is to be done in reverse order.  */
2061
2062 static rtx
2063 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
2064                          unsigned HOST_WIDE_INT bitpos, int unsignedp,
2065                          bool reverse)
2066 {
2067   unsigned int unit;
2068   unsigned int bitsdone = 0;
2069   rtx result = NULL_RTX;
2070   int first = 1;
2071
2072   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2073      much at a time.  */
2074   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2075     unit = BITS_PER_WORD;
2076   else
2077     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2078
2079   while (bitsdone < bitsize)
2080     {
2081       unsigned HOST_WIDE_INT thissize;
2082       rtx part, word;
2083       unsigned HOST_WIDE_INT thispos;
2084       unsigned HOST_WIDE_INT offset;
2085
2086       offset = (bitpos + bitsdone) / unit;
2087       thispos = (bitpos + bitsdone) % unit;
2088
2089       /* THISSIZE must not overrun a word boundary.  Otherwise,
2090          extract_fixed_bit_field will call us again, and we will mutually
2091          recurse forever.  */
2092       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2093       thissize = MIN (thissize, unit - thispos);
2094
2095       /* If OP0 is a register, then handle OFFSET here.  */
2096       if (SUBREG_P (op0) || REG_P (op0))
2097         {
2098           word = operand_subword_force (op0, offset, GET_MODE (op0));
2099           offset = 0;
2100         }
2101       else
2102         word = op0;
2103
2104       /* Extract the parts in bit-counting order,
2105          whose meaning is determined by BYTES_PER_UNIT.
2106          OFFSET is in UNITs, and UNIT is in bits.  */
2107       part = extract_fixed_bit_field (word_mode, word, thissize,
2108                                       offset * unit + thispos, 0, 1, reverse);
2109       bitsdone += thissize;
2110
2111       /* Shift this part into place for the result.  */
2112       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2113         {
2114           if (bitsize != bitsdone)
2115             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2116                                  bitsize - bitsdone, 0, 1);
2117         }
2118       else
2119         {
2120           if (bitsdone != thissize)
2121             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2122                                  bitsdone - thissize, 0, 1);
2123         }
2124
2125       if (first)
2126         result = part;
2127       else
2128         /* Combine the parts with bitwise or.  This works
2129            because we extracted each part as an unsigned bit field.  */
2130         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2131                                OPTAB_LIB_WIDEN);
2132
2133       first = 0;
2134     }
2135
2136   /* Unsigned bit field: we are done.  */
2137   if (unsignedp)
2138     return result;
2139   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2140   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2141                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2142   return expand_shift (RSHIFT_EXPR, word_mode, result,
2143                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2144 }
2145 \f
2146 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2147    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2148    MODE, fill the upper bits with zeros.  Fail if the layout of either
2149    mode is unknown (as for CC modes) or if the extraction would involve
2150    unprofitable mode punning.  Return the value on success, otherwise
2151    return null.
2152
2153    This is different from gen_lowpart* in these respects:
2154
2155      - the returned value must always be considered an rvalue
2156
2157      - when MODE is wider than SRC_MODE, the extraction involves
2158        a zero extension
2159
2160      - when MODE is smaller than SRC_MODE, the extraction involves
2161        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2162
2163    In other words, this routine performs a computation, whereas the
2164    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2165    operations.  */
2166
2167 rtx
2168 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2169 {
2170   machine_mode int_mode, src_int_mode;
2171
2172   if (mode == src_mode)
2173     return src;
2174
2175   if (CONSTANT_P (src))
2176     {
2177       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2178          fails, it will happily create (subreg (symbol_ref)) or similar
2179          invalid SUBREGs.  */
2180       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2181       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2182       if (ret)
2183         return ret;
2184
2185       if (GET_MODE (src) == VOIDmode
2186           || !validate_subreg (mode, src_mode, src, byte))
2187         return NULL_RTX;
2188
2189       src = force_reg (GET_MODE (src), src);
2190       return gen_rtx_SUBREG (mode, src, byte);
2191     }
2192
2193   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2194     return NULL_RTX;
2195
2196   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2197       && MODES_TIEABLE_P (mode, src_mode))
2198     {
2199       rtx x = gen_lowpart_common (mode, src);
2200       if (x)
2201         return x;
2202     }
2203
2204   src_int_mode = int_mode_for_mode (src_mode);
2205   int_mode = int_mode_for_mode (mode);
2206   if (src_int_mode == BLKmode || int_mode == BLKmode)
2207     return NULL_RTX;
2208
2209   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2210     return NULL_RTX;
2211   if (!MODES_TIEABLE_P (int_mode, mode))
2212     return NULL_RTX;
2213
2214   src = gen_lowpart (src_int_mode, src);
2215   src = convert_modes (int_mode, src_int_mode, src, true);
2216   src = gen_lowpart (mode, src);
2217   return src;
2218 }
2219 \f
2220 /* Add INC into TARGET.  */
2221
2222 void
2223 expand_inc (rtx target, rtx inc)
2224 {
2225   rtx value = expand_binop (GET_MODE (target), add_optab,
2226                             target, inc,
2227                             target, 0, OPTAB_LIB_WIDEN);
2228   if (value != target)
2229     emit_move_insn (target, value);
2230 }
2231
2232 /* Subtract DEC from TARGET.  */
2233
2234 void
2235 expand_dec (rtx target, rtx dec)
2236 {
2237   rtx value = expand_binop (GET_MODE (target), sub_optab,
2238                             target, dec,
2239                             target, 0, OPTAB_LIB_WIDEN);
2240   if (value != target)
2241     emit_move_insn (target, value);
2242 }
2243 \f
2244 /* Output a shift instruction for expression code CODE,
2245    with SHIFTED being the rtx for the value to shift,
2246    and AMOUNT the rtx for the amount to shift by.
2247    Store the result in the rtx TARGET, if that is convenient.
2248    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2249    Return the rtx for where the value is.  */
2250
2251 static rtx
2252 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2253                 rtx amount, rtx target, int unsignedp)
2254 {
2255   rtx op1, temp = 0;
2256   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2257   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2258   optab lshift_optab = ashl_optab;
2259   optab rshift_arith_optab = ashr_optab;
2260   optab rshift_uns_optab = lshr_optab;
2261   optab lrotate_optab = rotl_optab;
2262   optab rrotate_optab = rotr_optab;
2263   machine_mode op1_mode;
2264   machine_mode scalar_mode = mode;
2265   int attempt;
2266   bool speed = optimize_insn_for_speed_p ();
2267
2268   if (VECTOR_MODE_P (mode))
2269     scalar_mode = GET_MODE_INNER (mode);
2270   op1 = amount;
2271   op1_mode = GET_MODE (op1);
2272
2273   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2274      shift amount is a vector, use the vector/vector shift patterns.  */
2275   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2276     {
2277       lshift_optab = vashl_optab;
2278       rshift_arith_optab = vashr_optab;
2279       rshift_uns_optab = vlshr_optab;
2280       lrotate_optab = vrotl_optab;
2281       rrotate_optab = vrotr_optab;
2282     }
2283
2284   /* Previously detected shift-counts computed by NEGATE_EXPR
2285      and shifted in the other direction; but that does not work
2286      on all machines.  */
2287
2288   if (SHIFT_COUNT_TRUNCATED)
2289     {
2290       if (CONST_INT_P (op1)
2291           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2292               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2293         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2294                        % GET_MODE_BITSIZE (scalar_mode));
2295       else if (GET_CODE (op1) == SUBREG
2296                && subreg_lowpart_p (op1)
2297                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2298                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2299         op1 = SUBREG_REG (op1);
2300     }
2301
2302   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2303      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2304      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2305      amount instead.  */
2306   if (rotate
2307       && CONST_INT_P (op1)
2308       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2309                    GET_MODE_BITSIZE (scalar_mode) - 1))
2310     {
2311       op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2312       left = !left;
2313       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2314     }
2315
2316   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2317      Note that this is not the case for bigger values.  For instance a rotation
2318      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2319      0x04030201 (bswapsi).  */
2320   if (rotate
2321       && CONST_INT_P (op1)
2322       && INTVAL (op1) == BITS_PER_UNIT
2323       && GET_MODE_SIZE (scalar_mode) == 2
2324       && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing)
2325     return expand_unop (HImode, bswap_optab, shifted, NULL_RTX,
2326                                   unsignedp);
2327
2328   if (op1 == const0_rtx)
2329     return shifted;
2330
2331   /* Check whether its cheaper to implement a left shift by a constant
2332      bit count by a sequence of additions.  */
2333   if (code == LSHIFT_EXPR
2334       && CONST_INT_P (op1)
2335       && INTVAL (op1) > 0
2336       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2337       && INTVAL (op1) < MAX_BITS_PER_WORD
2338       && (shift_cost (speed, mode, INTVAL (op1))
2339           > INTVAL (op1) * add_cost (speed, mode))
2340       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2341     {
2342       int i;
2343       for (i = 0; i < INTVAL (op1); i++)
2344         {
2345           temp = force_reg (mode, shifted);
2346           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2347                                   unsignedp, OPTAB_LIB_WIDEN);
2348         }
2349       return shifted;
2350     }
2351
2352   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2353     {
2354       enum optab_methods methods;
2355
2356       if (attempt == 0)
2357         methods = OPTAB_DIRECT;
2358       else if (attempt == 1)
2359         methods = OPTAB_WIDEN;
2360       else
2361         methods = OPTAB_LIB_WIDEN;
2362
2363       if (rotate)
2364         {
2365           /* Widening does not work for rotation.  */
2366           if (methods == OPTAB_WIDEN)
2367             continue;
2368           else if (methods == OPTAB_LIB_WIDEN)
2369             {
2370               /* If we have been unable to open-code this by a rotation,
2371                  do it as the IOR of two shifts.  I.e., to rotate A
2372                  by N bits, compute
2373                  (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2374                  where C is the bitsize of A.
2375
2376                  It is theoretically possible that the target machine might
2377                  not be able to perform either shift and hence we would
2378                  be making two libcalls rather than just the one for the
2379                  shift (similarly if IOR could not be done).  We will allow
2380                  this extremely unlikely lossage to avoid complicating the
2381                  code below.  */
2382
2383               rtx subtarget = target == shifted ? 0 : target;
2384               rtx new_amount, other_amount;
2385               rtx temp1;
2386
2387               new_amount = op1;
2388               if (op1 == const0_rtx)
2389                 return shifted;
2390               else if (CONST_INT_P (op1))
2391                 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
2392                                         - INTVAL (op1));
2393               else
2394                 {
2395                   other_amount
2396                     = simplify_gen_unary (NEG, GET_MODE (op1),
2397                                           op1, GET_MODE (op1));
2398                   HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2399                   other_amount
2400                     = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2401                                            gen_int_mode (mask, GET_MODE (op1)));
2402                 }
2403
2404               shifted = force_reg (mode, shifted);
2405
2406               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2407                                      mode, shifted, new_amount, 0, 1);
2408               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2409                                       mode, shifted, other_amount,
2410                                       subtarget, 1);
2411               return expand_binop (mode, ior_optab, temp, temp1, target,
2412                                    unsignedp, methods);
2413             }
2414
2415           temp = expand_binop (mode,
2416                                left ? lrotate_optab : rrotate_optab,
2417                                shifted, op1, target, unsignedp, methods);
2418         }
2419       else if (unsignedp)
2420         temp = expand_binop (mode,
2421                              left ? lshift_optab : rshift_uns_optab,
2422                              shifted, op1, target, unsignedp, methods);
2423
2424       /* Do arithmetic shifts.
2425          Also, if we are going to widen the operand, we can just as well
2426          use an arithmetic right-shift instead of a logical one.  */
2427       if (temp == 0 && ! rotate
2428           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2429         {
2430           enum optab_methods methods1 = methods;
2431
2432           /* If trying to widen a log shift to an arithmetic shift,
2433              don't accept an arithmetic shift of the same size.  */
2434           if (unsignedp)
2435             methods1 = OPTAB_MUST_WIDEN;
2436
2437           /* Arithmetic shift */
2438
2439           temp = expand_binop (mode,
2440                                left ? lshift_optab : rshift_arith_optab,
2441                                shifted, op1, target, unsignedp, methods1);
2442         }
2443
2444       /* We used to try extzv here for logical right shifts, but that was
2445          only useful for one machine, the VAX, and caused poor code
2446          generation there for lshrdi3, so the code was deleted and a
2447          define_expand for lshrsi3 was added to vax.md.  */
2448     }
2449
2450   gcc_assert (temp);
2451   return temp;
2452 }
2453
2454 /* Output a shift instruction for expression code CODE,
2455    with SHIFTED being the rtx for the value to shift,
2456    and AMOUNT the amount to shift by.
2457    Store the result in the rtx TARGET, if that is convenient.
2458    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2459    Return the rtx for where the value is.  */
2460
2461 rtx
2462 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2463               int amount, rtx target, int unsignedp)
2464 {
2465   return expand_shift_1 (code, mode,
2466                          shifted, GEN_INT (amount), target, unsignedp);
2467 }
2468
2469 /* Output a shift instruction for expression code CODE,
2470    with SHIFTED being the rtx for the value to shift,
2471    and AMOUNT the tree for the amount to shift by.
2472    Store the result in the rtx TARGET, if that is convenient.
2473    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2474    Return the rtx for where the value is.  */
2475
2476 rtx
2477 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2478                        tree amount, rtx target, int unsignedp)
2479 {
2480   return expand_shift_1 (code, mode,
2481                          shifted, expand_normal (amount), target, unsignedp);
2482 }
2483
2484 \f
2485 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2486                         const struct mult_cost *, machine_mode mode);
2487 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2488                               const struct algorithm *, enum mult_variant);
2489 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2490 static rtx extract_high_half (machine_mode, rtx);
2491 static rtx expmed_mult_highpart (machine_mode, rtx, rtx, rtx, int, int);
2492 static rtx expmed_mult_highpart_optab (machine_mode, rtx, rtx, rtx,
2493                                        int, int);
2494 /* Compute and return the best algorithm for multiplying by T.
2495    The algorithm must cost less than cost_limit
2496    If retval.cost >= COST_LIMIT, no algorithm was found and all
2497    other field of the returned struct are undefined.
2498    MODE is the machine mode of the multiplication.  */
2499
2500 static void
2501 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2502             const struct mult_cost *cost_limit, machine_mode mode)
2503 {
2504   int m;
2505   struct algorithm *alg_in, *best_alg;
2506   struct mult_cost best_cost;
2507   struct mult_cost new_limit;
2508   int op_cost, op_latency;
2509   unsigned HOST_WIDE_INT orig_t = t;
2510   unsigned HOST_WIDE_INT q;
2511   int maxm, hash_index;
2512   bool cache_hit = false;
2513   enum alg_code cache_alg = alg_zero;
2514   bool speed = optimize_insn_for_speed_p ();
2515   machine_mode imode;
2516   struct alg_hash_entry *entry_ptr;
2517
2518   /* Indicate that no algorithm is yet found.  If no algorithm
2519      is found, this value will be returned and indicate failure.  */
2520   alg_out->cost.cost = cost_limit->cost + 1;
2521   alg_out->cost.latency = cost_limit->latency + 1;
2522
2523   if (cost_limit->cost < 0
2524       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2525     return;
2526
2527   /* Be prepared for vector modes.  */
2528   imode = GET_MODE_INNER (mode);
2529
2530   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2531
2532   /* Restrict the bits of "t" to the multiplication's mode.  */
2533   t &= GET_MODE_MASK (imode);
2534
2535   /* t == 1 can be done in zero cost.  */
2536   if (t == 1)
2537     {
2538       alg_out->ops = 1;
2539       alg_out->cost.cost = 0;
2540       alg_out->cost.latency = 0;
2541       alg_out->op[0] = alg_m;
2542       return;
2543     }
2544
2545   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2546      fail now.  */
2547   if (t == 0)
2548     {
2549       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2550         return;
2551       else
2552         {
2553           alg_out->ops = 1;
2554           alg_out->cost.cost = zero_cost (speed);
2555           alg_out->cost.latency = zero_cost (speed);
2556           alg_out->op[0] = alg_zero;
2557           return;
2558         }
2559     }
2560
2561   /* We'll be needing a couple extra algorithm structures now.  */
2562
2563   alg_in = XALLOCA (struct algorithm);
2564   best_alg = XALLOCA (struct algorithm);
2565   best_cost = *cost_limit;
2566
2567   /* Compute the hash index.  */
2568   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2569
2570   /* See if we already know what to do for T.  */
2571   entry_ptr = alg_hash_entry_ptr (hash_index);
2572   if (entry_ptr->t == t
2573       && entry_ptr->mode == mode
2574       && entry_ptr->mode == mode
2575       && entry_ptr->speed == speed
2576       && entry_ptr->alg != alg_unknown)
2577     {
2578       cache_alg = entry_ptr->alg;
2579
2580       if (cache_alg == alg_impossible)
2581         {
2582           /* The cache tells us that it's impossible to synthesize
2583              multiplication by T within entry_ptr->cost.  */
2584           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2585             /* COST_LIMIT is at least as restrictive as the one
2586                recorded in the hash table, in which case we have no
2587                hope of synthesizing a multiplication.  Just
2588                return.  */
2589             return;
2590
2591           /* If we get here, COST_LIMIT is less restrictive than the
2592              one recorded in the hash table, so we may be able to
2593              synthesize a multiplication.  Proceed as if we didn't
2594              have the cache entry.  */
2595         }
2596       else
2597         {
2598           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2599             /* The cached algorithm shows that this multiplication
2600                requires more cost than COST_LIMIT.  Just return.  This
2601                way, we don't clobber this cache entry with
2602                alg_impossible but retain useful information.  */
2603             return;
2604
2605           cache_hit = true;
2606
2607           switch (cache_alg)
2608             {
2609             case alg_shift:
2610               goto do_alg_shift;
2611
2612             case alg_add_t_m2:
2613             case alg_sub_t_m2:
2614               goto do_alg_addsub_t_m2;
2615
2616             case alg_add_factor:
2617             case alg_sub_factor:
2618               goto do_alg_addsub_factor;
2619
2620             case alg_add_t2_m:
2621               goto do_alg_add_t2_m;
2622
2623             case alg_sub_t2_m:
2624               goto do_alg_sub_t2_m;
2625
2626             default:
2627               gcc_unreachable ();
2628             }
2629         }
2630     }
2631
2632   /* If we have a group of zero bits at the low-order part of T, try
2633      multiplying by the remaining bits and then doing a shift.  */
2634
2635   if ((t & 1) == 0)
2636     {
2637     do_alg_shift:
2638       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2639       if (m < maxm)
2640         {
2641           q = t >> m;
2642           /* The function expand_shift will choose between a shift and
2643              a sequence of additions, so the observed cost is given as
2644              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2645           op_cost = m * add_cost (speed, mode);
2646           if (shift_cost (speed, mode, m) < op_cost)
2647             op_cost = shift_cost (speed, mode, m);
2648           new_limit.cost = best_cost.cost - op_cost;
2649           new_limit.latency = best_cost.latency - op_cost;
2650           synth_mult (alg_in, q, &new_limit, mode);
2651
2652           alg_in->cost.cost += op_cost;
2653           alg_in->cost.latency += op_cost;
2654           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2655             {
2656               best_cost = alg_in->cost;
2657               std::swap (alg_in, best_alg);
2658               best_alg->log[best_alg->ops] = m;
2659               best_alg->op[best_alg->ops] = alg_shift;
2660             }
2661
2662           /* See if treating ORIG_T as a signed number yields a better
2663              sequence.  Try this sequence only for a negative ORIG_T
2664              as it would be useless for a non-negative ORIG_T.  */
2665           if ((HOST_WIDE_INT) orig_t < 0)
2666             {
2667               /* Shift ORIG_T as follows because a right shift of a
2668                  negative-valued signed type is implementation
2669                  defined.  */
2670               q = ~(~orig_t >> m);
2671               /* The function expand_shift will choose between a shift
2672                  and a sequence of additions, so the observed cost is
2673                  given as MIN (m * add_cost(speed, mode),
2674                  shift_cost(speed, mode, m)).  */
2675               op_cost = m * add_cost (speed, mode);
2676               if (shift_cost (speed, mode, m) < op_cost)
2677                 op_cost = shift_cost (speed, mode, m);
2678               new_limit.cost = best_cost.cost - op_cost;
2679               new_limit.latency = best_cost.latency - op_cost;
2680               synth_mult (alg_in, q, &new_limit, mode);
2681
2682               alg_in->cost.cost += op_cost;
2683               alg_in->cost.latency += op_cost;
2684               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2685                 {
2686                   best_cost = alg_in->cost;
2687                   std::swap (alg_in, best_alg);
2688                   best_alg->log[best_alg->ops] = m;
2689                   best_alg->op[best_alg->ops] = alg_shift;
2690                 }
2691             }
2692         }
2693       if (cache_hit)
2694         goto done;
2695     }
2696
2697   /* If we have an odd number, add or subtract one.  */
2698   if ((t & 1) != 0)
2699     {
2700       unsigned HOST_WIDE_INT w;
2701
2702     do_alg_addsub_t_m2:
2703       for (w = 1; (w & t) != 0; w <<= 1)
2704         ;
2705       /* If T was -1, then W will be zero after the loop.  This is another
2706          case where T ends with ...111.  Handling this with (T + 1) and
2707          subtract 1 produces slightly better code and results in algorithm
2708          selection much faster than treating it like the ...0111 case
2709          below.  */
2710       if (w == 0
2711           || (w > 2
2712               /* Reject the case where t is 3.
2713                  Thus we prefer addition in that case.  */
2714               && t != 3))
2715         {
2716           /* T ends with ...111.  Multiply by (T + 1) and subtract T.  */
2717
2718           op_cost = add_cost (speed, mode);
2719           new_limit.cost = best_cost.cost - op_cost;
2720           new_limit.latency = best_cost.latency - op_cost;
2721           synth_mult (alg_in, t + 1, &new_limit, mode);
2722
2723           alg_in->cost.cost += op_cost;
2724           alg_in->cost.latency += op_cost;
2725           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2726             {
2727               best_cost = alg_in->cost;
2728               std::swap (alg_in, best_alg);
2729               best_alg->log[best_alg->ops] = 0;
2730               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2731             }
2732         }
2733       else
2734         {
2735           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add T.  */
2736
2737           op_cost = add_cost (speed, mode);
2738           new_limit.cost = best_cost.cost - op_cost;
2739           new_limit.latency = best_cost.latency - op_cost;
2740           synth_mult (alg_in, t - 1, &new_limit, mode);
2741
2742           alg_in->cost.cost += op_cost;
2743           alg_in->cost.latency += op_cost;
2744           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2745             {
2746               best_cost = alg_in->cost;
2747               std::swap (alg_in, best_alg);
2748               best_alg->log[best_alg->ops] = 0;
2749               best_alg->op[best_alg->ops] = alg_add_t_m2;
2750             }
2751         }
2752
2753       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2754          quickly with a - a * n for some appropriate constant n.  */
2755       m = exact_log2 (-orig_t + 1);
2756       if (m >= 0 && m < maxm)
2757         {
2758           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2759           /* If the target has a cheap shift-and-subtract insn use
2760              that in preference to a shift insn followed by a sub insn.
2761              Assume that the shift-and-sub is "atomic" with a latency
2762              equal to it's cost, otherwise assume that on superscalar
2763              hardware the shift may be executed concurrently with the
2764              earlier steps in the algorithm.  */
2765           if (shiftsub1_cost (speed, mode, m) <= op_cost)
2766             {
2767               op_cost = shiftsub1_cost (speed, mode, m);
2768               op_latency = op_cost;
2769             }
2770           else
2771             op_latency = add_cost (speed, mode);
2772
2773           new_limit.cost = best_cost.cost - op_cost;
2774           new_limit.latency = best_cost.latency - op_latency;
2775           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2776                       &new_limit, mode);
2777
2778           alg_in->cost.cost += op_cost;
2779           alg_in->cost.latency += op_latency;
2780           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2781             {
2782               best_cost = alg_in->cost;
2783               std::swap (alg_in, best_alg);
2784               best_alg->log[best_alg->ops] = m;
2785               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2786             }
2787         }
2788
2789       if (cache_hit)
2790         goto done;
2791     }
2792
2793   /* Look for factors of t of the form
2794      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2795      If we find such a factor, we can multiply by t using an algorithm that
2796      multiplies by q, shift the result by m and add/subtract it to itself.
2797
2798      We search for large factors first and loop down, even if large factors
2799      are less probable than small; if we find a large factor we will find a
2800      good sequence quickly, and therefore be able to prune (by decreasing
2801      COST_LIMIT) the search.  */
2802
2803  do_alg_addsub_factor:
2804   for (m = floor_log2 (t - 1); m >= 2; m--)
2805     {
2806       unsigned HOST_WIDE_INT d;
2807
2808       d = (HOST_WIDE_INT_1U << m) + 1;
2809       if (t % d == 0 && t > d && m < maxm
2810           && (!cache_hit || cache_alg == alg_add_factor))
2811         {
2812           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2813           if (shiftadd_cost (speed, mode, m) <= op_cost)
2814             op_cost = shiftadd_cost (speed, mode, m);
2815
2816           op_latency = op_cost;
2817
2818
2819           new_limit.cost = best_cost.cost - op_cost;
2820           new_limit.latency = best_cost.latency - op_latency;
2821           synth_mult (alg_in, t / d, &new_limit, mode);
2822
2823           alg_in->cost.cost += op_cost;
2824           alg_in->cost.latency += op_latency;
2825           if (alg_in->cost.latency < op_cost)
2826             alg_in->cost.latency = op_cost;
2827           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2828             {
2829               best_cost = alg_in->cost;
2830               std::swap (alg_in, best_alg);
2831               best_alg->log[best_alg->ops] = m;
2832               best_alg->op[best_alg->ops] = alg_add_factor;
2833             }
2834           /* Other factors will have been taken care of in the recursion.  */
2835           break;
2836         }
2837
2838       d = (HOST_WIDE_INT_1U << m) - 1;
2839       if (t % d == 0 && t > d && m < maxm
2840           && (!cache_hit || cache_alg == alg_sub_factor))
2841         {
2842           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2843           if (shiftsub0_cost (speed, mode, m) <= op_cost)
2844             op_cost = shiftsub0_cost (speed, mode, m);
2845
2846           op_latency = op_cost;
2847
2848           new_limit.cost = best_cost.cost - op_cost;
2849           new_limit.latency = best_cost.latency - op_latency;
2850           synth_mult (alg_in, t / d, &new_limit, mode);
2851
2852           alg_in->cost.cost += op_cost;
2853           alg_in->cost.latency += op_latency;
2854           if (alg_in->cost.latency < op_cost)
2855             alg_in->cost.latency = op_cost;
2856           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2857             {
2858               best_cost = alg_in->cost;
2859               std::swap (alg_in, best_alg);
2860               best_alg->log[best_alg->ops] = m;
2861               best_alg->op[best_alg->ops] = alg_sub_factor;
2862             }
2863           break;
2864         }
2865     }
2866   if (cache_hit)
2867     goto done;
2868
2869   /* Try shift-and-add (load effective address) instructions,
2870      i.e. do a*3, a*5, a*9.  */
2871   if ((t & 1) != 0)
2872     {
2873     do_alg_add_t2_m:
2874       q = t - 1;
2875       q = q & -q;
2876       m = exact_log2 (q);
2877       if (m >= 0 && m < maxm)
2878         {
2879           op_cost = shiftadd_cost (speed, mode, m);
2880           new_limit.cost = best_cost.cost - op_cost;
2881           new_limit.latency = best_cost.latency - op_cost;
2882           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2883
2884           alg_in->cost.cost += op_cost;
2885           alg_in->cost.latency += op_cost;
2886           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2887             {
2888               best_cost = alg_in->cost;
2889               std::swap (alg_in, best_alg);
2890               best_alg->log[best_alg->ops] = m;
2891               best_alg->op[best_alg->ops] = alg_add_t2_m;
2892             }
2893         }
2894       if (cache_hit)
2895         goto done;
2896
2897     do_alg_sub_t2_m:
2898       q = t + 1;
2899       q = q & -q;
2900       m = exact_log2 (q);
2901       if (m >= 0 && m < maxm)
2902         {
2903           op_cost = shiftsub0_cost (speed, mode, m);
2904           new_limit.cost = best_cost.cost - op_cost;
2905           new_limit.latency = best_cost.latency - op_cost;
2906           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2907
2908           alg_in->cost.cost += op_cost;
2909           alg_in->cost.latency += op_cost;
2910           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2911             {
2912               best_cost = alg_in->cost;
2913               std::swap (alg_in, best_alg);
2914               best_alg->log[best_alg->ops] = m;
2915               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2916             }
2917         }
2918       if (cache_hit)
2919         goto done;
2920     }
2921
2922  done:
2923   /* If best_cost has not decreased, we have not found any algorithm.  */
2924   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2925     {
2926       /* We failed to find an algorithm.  Record alg_impossible for
2927          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2928          we are asked to find an algorithm for T within the same or
2929          lower COST_LIMIT, we can immediately return to the
2930          caller.  */
2931       entry_ptr->t = t;
2932       entry_ptr->mode = mode;
2933       entry_ptr->speed = speed;
2934       entry_ptr->alg = alg_impossible;
2935       entry_ptr->cost = *cost_limit;
2936       return;
2937     }
2938
2939   /* Cache the result.  */
2940   if (!cache_hit)
2941     {
2942       entry_ptr->t = t;
2943       entry_ptr->mode = mode;
2944       entry_ptr->speed = speed;
2945       entry_ptr->alg = best_alg->op[best_alg->ops];
2946       entry_ptr->cost.cost = best_cost.cost;
2947       entry_ptr->cost.latency = best_cost.latency;
2948     }
2949
2950   /* If we are getting a too long sequence for `struct algorithm'
2951      to record, make this search fail.  */
2952   if (best_alg->ops == MAX_BITS_PER_WORD)
2953     return;
2954
2955   /* Copy the algorithm from temporary space to the space at alg_out.
2956      We avoid using structure assignment because the majority of
2957      best_alg is normally undefined, and this is a critical function.  */
2958   alg_out->ops = best_alg->ops + 1;
2959   alg_out->cost = best_cost;
2960   memcpy (alg_out->op, best_alg->op,
2961           alg_out->ops * sizeof *alg_out->op);
2962   memcpy (alg_out->log, best_alg->log,
2963           alg_out->ops * sizeof *alg_out->log);
2964 }
2965 \f
2966 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2967    Try three variations:
2968
2969        - a shift/add sequence based on VAL itself
2970        - a shift/add sequence based on -VAL, followed by a negation
2971        - a shift/add sequence based on VAL - 1, followed by an addition.
2972
2973    Return true if the cheapest of these cost less than MULT_COST,
2974    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2975
2976 bool
2977 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
2978                      struct algorithm *alg, enum mult_variant *variant,
2979                      int mult_cost)
2980 {
2981   struct algorithm alg2;
2982   struct mult_cost limit;
2983   int op_cost;
2984   bool speed = optimize_insn_for_speed_p ();
2985
2986   /* Fail quickly for impossible bounds.  */
2987   if (mult_cost < 0)
2988     return false;
2989
2990   /* Ensure that mult_cost provides a reasonable upper bound.
2991      Any constant multiplication can be performed with less
2992      than 2 * bits additions.  */
2993   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
2994   if (mult_cost > op_cost)
2995     mult_cost = op_cost;
2996
2997   *variant = basic_variant;
2998   limit.cost = mult_cost;
2999   limit.latency = mult_cost;
3000   synth_mult (alg, val, &limit, mode);
3001
3002   /* This works only if the inverted value actually fits in an
3003      `unsigned int' */
3004   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3005     {
3006       op_cost = neg_cost (speed, mode);
3007       if (MULT_COST_LESS (&alg->cost, mult_cost))
3008         {
3009           limit.cost = alg->cost.cost - op_cost;
3010           limit.latency = alg->cost.latency - op_cost;
3011         }
3012       else
3013         {
3014           limit.cost = mult_cost - op_cost;
3015           limit.latency = mult_cost - op_cost;
3016         }
3017
3018       synth_mult (&alg2, -val, &limit, mode);
3019       alg2.cost.cost += op_cost;
3020       alg2.cost.latency += op_cost;
3021       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3022         *alg = alg2, *variant = negate_variant;
3023     }
3024
3025   /* This proves very useful for division-by-constant.  */
3026   op_cost = add_cost (speed, mode);
3027   if (MULT_COST_LESS (&alg->cost, mult_cost))
3028     {
3029       limit.cost = alg->cost.cost - op_cost;
3030       limit.latency = alg->cost.latency - op_cost;
3031     }
3032   else
3033     {
3034       limit.cost = mult_cost - op_cost;
3035       limit.latency = mult_cost - op_cost;
3036     }
3037
3038   synth_mult (&alg2, val - 1, &limit, mode);
3039   alg2.cost.cost += op_cost;
3040   alg2.cost.latency += op_cost;
3041   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3042     *alg = alg2, *variant = add_variant;
3043
3044   return MULT_COST_LESS (&alg->cost, mult_cost);
3045 }
3046
3047 /* A subroutine of expand_mult, used for constant multiplications.
3048    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3049    convenient.  Use the shift/add sequence described by ALG and apply
3050    the final fixup specified by VARIANT.  */
3051
3052 static rtx
3053 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3054                    rtx target, const struct algorithm *alg,
3055                    enum mult_variant variant)
3056 {
3057   HOST_WIDE_INT val_so_far;
3058   rtx_insn *insn;
3059   rtx accum, tem;
3060   int opno;
3061   machine_mode nmode;
3062
3063   /* Avoid referencing memory over and over and invalid sharing
3064      on SUBREGs.  */
3065   op0 = force_reg (mode, op0);
3066
3067   /* ACCUM starts out either as OP0 or as a zero, depending on
3068      the first operation.  */
3069
3070   if (alg->op[0] == alg_zero)
3071     {
3072       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3073       val_so_far = 0;
3074     }
3075   else if (alg->op[0] == alg_m)
3076     {
3077       accum = copy_to_mode_reg (mode, op0);
3078       val_so_far = 1;
3079     }
3080   else
3081     gcc_unreachable ();
3082
3083   for (opno = 1; opno < alg->ops; opno++)
3084     {
3085       int log = alg->log[opno];
3086       rtx shift_subtarget = optimize ? 0 : accum;
3087       rtx add_target
3088         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3089            && !optimize)
3090           ? target : 0;
3091       rtx accum_target = optimize ? 0 : accum;
3092       rtx accum_inner;
3093
3094       switch (alg->op[opno])
3095         {
3096         case alg_shift:
3097           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3098           /* REG_EQUAL note will be attached to the following insn.  */
3099           emit_move_insn (accum, tem);
3100           val_so_far <<= log;
3101           break;
3102
3103         case alg_add_t_m2:
3104           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3105           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3106                                  add_target ? add_target : accum_target);
3107           val_so_far += HOST_WIDE_INT_1 << log;
3108           break;
3109
3110         case alg_sub_t_m2:
3111           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3112           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3113                                  add_target ? add_target : accum_target);
3114           val_so_far -= HOST_WIDE_INT_1 << log;
3115           break;
3116
3117         case alg_add_t2_m:
3118           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3119                                 log, shift_subtarget, 0);
3120           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3121                                  add_target ? add_target : accum_target);
3122           val_so_far = (val_so_far << log) + 1;
3123           break;
3124
3125         case alg_sub_t2_m:
3126           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3127                                 log, shift_subtarget, 0);
3128           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3129                                  add_target ? add_target : accum_target);
3130           val_so_far = (val_so_far << log) - 1;
3131           break;
3132
3133         case alg_add_factor:
3134           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3135           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3136                                  add_target ? add_target : accum_target);
3137           val_so_far += val_so_far << log;
3138           break;
3139
3140         case alg_sub_factor:
3141           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3142           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3143                                  (add_target
3144                                   ? add_target : (optimize ? 0 : tem)));
3145           val_so_far = (val_so_far << log) - val_so_far;
3146           break;
3147
3148         default:
3149           gcc_unreachable ();
3150         }
3151
3152       if (SCALAR_INT_MODE_P (mode))
3153         {
3154           /* Write a REG_EQUAL note on the last insn so that we can cse
3155              multiplication sequences.  Note that if ACCUM is a SUBREG,
3156              we've set the inner register and must properly indicate that.  */
3157           tem = op0, nmode = mode;
3158           accum_inner = accum;
3159           if (GET_CODE (accum) == SUBREG)
3160             {
3161               accum_inner = SUBREG_REG (accum);
3162               nmode = GET_MODE (accum_inner);
3163               tem = gen_lowpart (nmode, op0);
3164             }
3165
3166           insn = get_last_insn ();
3167           set_dst_reg_note (insn, REG_EQUAL,
3168                             gen_rtx_MULT (nmode, tem,
3169                                           gen_int_mode (val_so_far, nmode)),
3170                             accum_inner);
3171         }
3172     }
3173
3174   if (variant == negate_variant)
3175     {
3176       val_so_far = -val_so_far;
3177       accum = expand_unop (mode, neg_optab, accum, target, 0);
3178     }
3179   else if (variant == add_variant)
3180     {
3181       val_so_far = val_so_far + 1;
3182       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3183     }
3184
3185   /* Compare only the bits of val and val_so_far that are significant
3186      in the result mode, to avoid sign-/zero-extension confusion.  */
3187   nmode = GET_MODE_INNER (mode);
3188   val &= GET_MODE_MASK (nmode);
3189   val_so_far &= GET_MODE_MASK (nmode);
3190   gcc_assert (val == val_so_far);
3191
3192   return accum;
3193 }
3194
3195 /* Perform a multiplication and return an rtx for the result.
3196    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3197    TARGET is a suggestion for where to store the result (an rtx).
3198
3199    We check specially for a constant integer as OP1.
3200    If you want this check for OP0 as well, then before calling
3201    you should swap the two operands if OP0 would be constant.  */
3202
3203 rtx
3204 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3205              int unsignedp)
3206 {
3207   enum mult_variant variant;
3208   struct algorithm algorithm;
3209   rtx scalar_op1;
3210   int max_cost;
3211   bool speed = optimize_insn_for_speed_p ();
3212   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3213
3214   if (CONSTANT_P (op0))
3215     std::swap (op0, op1);
3216
3217   /* For vectors, there are several simplifications that can be made if
3218      all elements of the vector constant are identical.  */
3219   scalar_op1 = unwrap_const_vec_duplicate (op1);
3220
3221   if (INTEGRAL_MODE_P (mode))
3222     {
3223       rtx fake_reg;
3224       HOST_WIDE_INT coeff;
3225       bool is_neg;
3226       int mode_bitsize;
3227
3228       if (op1 == CONST0_RTX (mode))
3229         return op1;
3230       if (op1 == CONST1_RTX (mode))
3231         return op0;
3232       if (op1 == CONSTM1_RTX (mode))
3233         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3234                             op0, target, 0);
3235
3236       if (do_trapv)
3237         goto skip_synth;
3238
3239       /* If mode is integer vector mode, check if the backend supports
3240          vector lshift (by scalar or vector) at all.  If not, we can't use
3241          synthetized multiply.  */
3242       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3243           && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3244           && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3245         goto skip_synth;
3246
3247       /* These are the operations that are potentially turned into
3248          a sequence of shifts and additions.  */
3249       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3250
3251       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3252          less than or equal in size to `unsigned int' this doesn't matter.
3253          If the mode is larger than `unsigned int', then synth_mult works
3254          only if the constant value exactly fits in an `unsigned int' without
3255          any truncation.  This means that multiplying by negative values does
3256          not work; results are off by 2^32 on a 32 bit machine.  */
3257       if (CONST_INT_P (scalar_op1))
3258         {
3259           coeff = INTVAL (scalar_op1);
3260           is_neg = coeff < 0;
3261         }
3262 #if TARGET_SUPPORTS_WIDE_INT
3263       else if (CONST_WIDE_INT_P (scalar_op1))
3264 #else
3265       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3266 #endif
3267         {
3268           int shift = wi::exact_log2 (std::make_pair (scalar_op1, mode));
3269           /* Perfect power of 2 (other than 1, which is handled above).  */
3270           if (shift > 0)
3271             return expand_shift (LSHIFT_EXPR, mode, op0,
3272                                  shift, target, unsignedp);
3273           else
3274             goto skip_synth;
3275         }
3276       else
3277         goto skip_synth;
3278
3279       /* We used to test optimize here, on the grounds that it's better to
3280          produce a smaller program when -O is not used.  But this causes
3281          such a terrible slowdown sometimes that it seems better to always
3282          use synth_mult.  */
3283
3284       /* Special case powers of two.  */
3285       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3286           && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3287         return expand_shift (LSHIFT_EXPR, mode, op0,
3288                              floor_log2 (coeff), target, unsignedp);
3289
3290       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3291
3292       /* Attempt to handle multiplication of DImode values by negative
3293          coefficients, by performing the multiplication by a positive
3294          multiplier and then inverting the result.  */
3295       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3296         {
3297           /* Its safe to use -coeff even for INT_MIN, as the
3298              result is interpreted as an unsigned coefficient.
3299              Exclude cost of op0 from max_cost to match the cost
3300              calculation of the synth_mult.  */
3301           coeff = -(unsigned HOST_WIDE_INT) coeff;
3302           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3303                                     mode, speed)
3304                       - neg_cost (speed, mode));
3305           if (max_cost <= 0)
3306             goto skip_synth;
3307
3308           /* Special case powers of two.  */
3309           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3310             {
3311               rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3312                                        floor_log2 (coeff), target, unsignedp);
3313               return expand_unop (mode, neg_optab, temp, target, 0);
3314             }
3315
3316           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3317                                    max_cost))
3318             {
3319               rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3320                                             &algorithm, variant);
3321               return expand_unop (mode, neg_optab, temp, target, 0);
3322             }
3323           goto skip_synth;
3324         }
3325
3326       /* Exclude cost of op0 from max_cost to match the cost
3327          calculation of the synth_mult.  */
3328       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3329       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3330         return expand_mult_const (mode, op0, coeff, target,
3331                                   &algorithm, variant);
3332     }
3333  skip_synth:
3334
3335   /* Expand x*2.0 as x+x.  */
3336   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3337       && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3338     {
3339       op0 = force_reg (GET_MODE (op0), op0);
3340       return expand_binop (mode, add_optab, op0, op0,
3341                            target, unsignedp, OPTAB_LIB_WIDEN);
3342     }
3343
3344   /* This used to use umul_optab if unsigned, but for non-widening multiply
3345      there is no difference between signed and unsigned.  */
3346   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3347                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3348   gcc_assert (op0);
3349   return op0;
3350 }
3351
3352 /* Return a cost estimate for multiplying a register by the given
3353    COEFFicient in the given MODE and SPEED.  */
3354
3355 int
3356 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3357 {
3358   int max_cost;
3359   struct algorithm algorithm;
3360   enum mult_variant variant;
3361
3362   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3363   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3364                            mode, speed);
3365   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3366     return algorithm.cost.cost;
3367   else
3368     return max_cost;
3369 }
3370
3371 /* Perform a widening multiplication and return an rtx for the result.
3372    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3373    TARGET is a suggestion for where to store the result (an rtx).
3374    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3375    or smul_widen_optab.
3376
3377    We check specially for a constant integer as OP1, comparing the
3378    cost of a widening multiply against the cost of a sequence of shifts
3379    and adds.  */
3380
3381 rtx
3382 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3383                       int unsignedp, optab this_optab)
3384 {
3385   bool speed = optimize_insn_for_speed_p ();
3386   rtx cop1;
3387
3388   if (CONST_INT_P (op1)
3389       && GET_MODE (op0) != VOIDmode
3390       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3391                                 this_optab == umul_widen_optab))
3392       && CONST_INT_P (cop1)
3393       && (INTVAL (cop1) >= 0
3394           || HWI_COMPUTABLE_MODE_P (mode)))
3395     {
3396       HOST_WIDE_INT coeff = INTVAL (cop1);
3397       int max_cost;
3398       enum mult_variant variant;
3399       struct algorithm algorithm;
3400
3401       if (coeff == 0)
3402         return CONST0_RTX (mode);
3403
3404       /* Special case powers of two.  */
3405       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3406         {
3407           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3408           return expand_shift (LSHIFT_EXPR, mode, op0,
3409                                floor_log2 (coeff), target, unsignedp);
3410         }
3411
3412       /* Exclude cost of op0 from max_cost to match the cost
3413          calculation of the synth_mult.  */
3414       max_cost = mul_widen_cost (speed, mode);
3415       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3416                                max_cost))
3417         {
3418           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3419           return expand_mult_const (mode, op0, coeff, target,
3420                                     &algorithm, variant);
3421         }
3422     }
3423   return expand_binop (mode, this_optab, op0, op1, target,
3424                        unsignedp, OPTAB_LIB_WIDEN);
3425 }
3426 \f
3427 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3428    replace division by D, and put the least significant N bits of the result
3429    in *MULTIPLIER_PTR and return the most significant bit.
3430
3431    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3432    needed precision is in PRECISION (should be <= N).
3433
3434    PRECISION should be as small as possible so this function can choose
3435    multiplier more freely.
3436
3437    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3438    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3439
3440    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3441    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3442
3443 unsigned HOST_WIDE_INT
3444 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3445                    unsigned HOST_WIDE_INT *multiplier_ptr,
3446                    int *post_shift_ptr, int *lgup_ptr)
3447 {
3448   int lgup, post_shift;
3449   int pow, pow2;
3450
3451   /* lgup = ceil(log2(divisor)); */
3452   lgup = ceil_log2 (d);
3453
3454   gcc_assert (lgup <= n);
3455
3456   pow = n + lgup;
3457   pow2 = n + lgup - precision;
3458
3459   /* mlow = 2^(N + lgup)/d */
3460   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3461   wide_int mlow = wi::udiv_trunc (val, d);
3462
3463   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3464   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3465   wide_int mhigh = wi::udiv_trunc (val, d);
3466
3467   /* If precision == N, then mlow, mhigh exceed 2^N
3468      (but they do not exceed 2^(N+1)).  */
3469
3470   /* Reduce to lowest terms.  */
3471   for (post_shift = lgup; post_shift > 0; post_shift--)
3472     {
3473       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3474                                                        HOST_BITS_PER_WIDE_INT);
3475       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3476                                                        HOST_BITS_PER_WIDE_INT);
3477       if (ml_lo >= mh_lo)
3478         break;
3479
3480       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3481       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3482     }
3483
3484   *post_shift_ptr = post_shift;
3485   *lgup_ptr = lgup;
3486   if (n < HOST_BITS_PER_WIDE_INT)
3487     {
3488       unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3489       *multiplier_ptr = mhigh.to_uhwi () & mask;
3490       return mhigh.to_uhwi () >= mask;
3491     }
3492   else
3493     {
3494       *multiplier_ptr = mhigh.to_uhwi ();
3495       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3496     }
3497 }
3498
3499 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3500    congruent to 1 (mod 2**N).  */
3501
3502 static unsigned HOST_WIDE_INT
3503 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3504 {
3505   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3506
3507   /* The algorithm notes that the choice y = x satisfies
3508      x*y == 1 mod 2^3, since x is assumed odd.
3509      Each iteration doubles the number of bits of significance in y.  */
3510
3511   unsigned HOST_WIDE_INT mask;
3512   unsigned HOST_WIDE_INT y = x;
3513   int nbit = 3;
3514
3515   mask = (n == HOST_BITS_PER_WIDE_INT
3516           ? HOST_WIDE_INT_M1U
3517           : (HOST_WIDE_INT_1U << n) - 1);
3518
3519   while (nbit < n)
3520     {
3521       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3522       nbit *= 2;
3523     }
3524   return y;
3525 }
3526
3527 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3528    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3529    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3530    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3531    become signed.
3532
3533    The result is put in TARGET if that is convenient.
3534
3535    MODE is the mode of operation.  */
3536
3537 rtx
3538 expand_mult_highpart_adjust (machine_mode mode, rtx adj_operand, rtx op0,
3539                              rtx op1, rtx target, int unsignedp)
3540 {
3541   rtx tem;
3542   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3543
3544   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3545                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3546   tem = expand_and (mode, tem, op1, NULL_RTX);
3547   adj_operand
3548     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3549                      adj_operand);
3550
3551   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3552                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3553   tem = expand_and (mode, tem, op0, NULL_RTX);
3554   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3555                           target);
3556
3557   return target;
3558 }
3559
3560 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3561
3562 static rtx
3563 extract_high_half (machine_mode mode, rtx op)
3564 {
3565   machine_mode wider_mode;
3566
3567   if (mode == word_mode)
3568     return gen_highpart (mode, op);
3569
3570   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3571
3572   wider_mode = GET_MODE_WIDER_MODE (mode);
3573   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3574                      GET_MODE_BITSIZE (mode), 0, 1);
3575   return convert_modes (mode, wider_mode, op, 0);
3576 }
3577
3578 /* Like expmed_mult_highpart, but only consider using a multiplication
3579    optab.  OP1 is an rtx for the constant operand.  */
3580
3581 static rtx
3582 expmed_mult_highpart_optab (machine_mode mode, rtx op0, rtx op1,
3583                             rtx target, int unsignedp, int max_cost)
3584 {
3585   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3586   machine_mode wider_mode;
3587   optab moptab;
3588   rtx tem;
3589   int size;
3590   bool speed = optimize_insn_for_speed_p ();
3591
3592   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3593
3594   wider_mode = GET_MODE_WIDER_MODE (mode);
3595   size = GET_MODE_BITSIZE (mode);
3596
3597   /* Firstly, try using a multiplication insn that only generates the needed
3598      high part of the product, and in the sign flavor of unsignedp.  */
3599   if (mul_highpart_cost (speed, mode) < max_cost)
3600     {
3601       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3602       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3603                           unsignedp, OPTAB_DIRECT);
3604       if (tem)
3605         return tem;
3606     }
3607
3608   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3609      Need to adjust the result after the multiplication.  */
3610   if (size - 1 < BITS_PER_WORD
3611       && (mul_highpart_cost (speed, mode)
3612           + 2 * shift_cost (speed, mode, size-1)
3613           + 4 * add_cost (speed, mode) < max_cost))
3614     {
3615       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3616       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3617                           unsignedp, OPTAB_DIRECT);
3618       if (tem)
3619         /* We used the wrong signedness.  Adjust the result.  */
3620         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3621                                             tem, unsignedp);
3622     }
3623
3624   /* Try widening multiplication.  */
3625   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3626   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3627       && mul_widen_cost (speed, wider_mode) < max_cost)
3628     {
3629       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3630                           unsignedp, OPTAB_WIDEN);
3631       if (tem)
3632         return extract_high_half (mode, tem);
3633     }
3634
3635   /* Try widening the mode and perform a non-widening multiplication.  */
3636   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3637       && size - 1 < BITS_PER_WORD
3638       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3639           < max_cost))
3640     {
3641       rtx_insn *insns;
3642       rtx wop0, wop1;
3643
3644       /* We need to widen the operands, for example to ensure the
3645          constant multiplier is correctly sign or zero extended.
3646          Use a sequence to clean-up any instructions emitted by
3647          the conversions if things don't work out.  */
3648       start_sequence ();
3649       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3650       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3651       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3652                           unsignedp, OPTAB_WIDEN);
3653       insns = get_insns ();
3654       end_sequence ();
3655
3656       if (tem)
3657         {
3658           emit_insn (insns);
3659           return extract_high_half (mode, tem);
3660         }
3661     }
3662
3663   /* Try widening multiplication of opposite signedness, and adjust.  */
3664   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3665   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3666       && size - 1 < BITS_PER_WORD
3667       && (mul_widen_cost (speed, wider_mode)
3668           + 2 * shift_cost (speed, mode, size-1)
3669           + 4 * add_cost (speed, mode) < max_cost))
3670     {
3671       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3672                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3673       if (tem != 0)
3674         {
3675           tem = extract_high_half (mode, tem);
3676           /* We used the wrong signedness.  Adjust the result.  */
3677           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3678                                               target, unsignedp);
3679         }
3680     }
3681
3682   return 0;
3683 }
3684
3685 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3686    putting the high half of the result in TARGET if that is convenient,
3687    and return where the result is.  If the operation can not be performed,
3688    0 is returned.
3689
3690    MODE is the mode of operation and result.
3691
3692    UNSIGNEDP nonzero means unsigned multiply.
3693
3694    MAX_COST is the total allowed cost for the expanded RTL.  */
3695
3696 static rtx
3697 expmed_mult_highpart (machine_mode mode, rtx op0, rtx op1,
3698                       rtx target, int unsignedp, int max_cost)
3699 {
3700   machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3701   unsigned HOST_WIDE_INT cnst1;
3702   int extra_cost;
3703   bool sign_adjust = false;
3704   enum mult_variant variant;
3705   struct algorithm alg;
3706   rtx tem;
3707   bool speed = optimize_insn_for_speed_p ();
3708
3709   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3710   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3711   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3712
3713   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3714
3715   /* We can't optimize modes wider than BITS_PER_WORD.
3716      ??? We might be able to perform double-word arithmetic if
3717      mode == word_mode, however all the cost calculations in
3718      synth_mult etc. assume single-word operations.  */
3719   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3720     return expmed_mult_highpart_optab (mode, op0, op1, target,
3721                                        unsignedp, max_cost);
3722
3723   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3724
3725   /* Check whether we try to multiply by a negative constant.  */
3726   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3727     {
3728       sign_adjust = true;
3729       extra_cost += add_cost (speed, mode);
3730     }
3731
3732   /* See whether shift/add multiplication is cheap enough.  */
3733   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3734                            max_cost - extra_cost))
3735     {
3736       /* See whether the specialized multiplication optabs are
3737          cheaper than the shift/add version.  */
3738       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3739                                         alg.cost.cost + extra_cost);
3740       if (tem)
3741         return tem;
3742
3743       tem = convert_to_mode (wider_mode, op0, unsignedp);
3744       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3745       tem = extract_high_half (mode, tem);
3746
3747       /* Adjust result for signedness.  */
3748       if (sign_adjust)
3749         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3750
3751       return tem;
3752     }
3753   return expmed_mult_highpart_optab (mode, op0, op1, target,
3754                                      unsignedp, max_cost);
3755 }
3756
3757
3758 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3759
3760 static rtx
3761 expand_smod_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3762 {
3763   rtx result, temp, shift;
3764   rtx_code_label *label;
3765   int logd;
3766   int prec = GET_MODE_PRECISION (mode);
3767
3768   logd = floor_log2 (d);
3769   result = gen_reg_rtx (mode);
3770
3771   /* Avoid conditional branches when they're expensive.  */
3772   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3773       && optimize_insn_for_speed_p ())
3774     {
3775       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3776                                       mode, 0, -1);
3777       if (signmask)
3778         {
3779           HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
3780           signmask = force_reg (mode, signmask);
3781           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3782
3783           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3784              which instruction sequence to use.  If logical right shifts
3785              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3786              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3787
3788           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3789           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3790               || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
3791                   > COSTS_N_INSNS (2)))
3792             {
3793               temp = expand_binop (mode, xor_optab, op0, signmask,
3794                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3795               temp = expand_binop (mode, sub_optab, temp, signmask,
3796                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3797               temp = expand_binop (mode, and_optab, temp,
3798                                    gen_int_mode (masklow, mode),
3799                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3800               temp = expand_binop (mode, xor_optab, temp, signmask,
3801                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3802               temp = expand_binop (mode, sub_optab, temp, signmask,
3803                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3804             }
3805           else
3806             {
3807               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3808                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3809               signmask = force_reg (mode, signmask);
3810
3811               temp = expand_binop (mode, add_optab, op0, signmask,
3812                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3813               temp = expand_binop (mode, and_optab, temp,
3814                                    gen_int_mode (masklow, mode),
3815                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3816               temp = expand_binop (mode, sub_optab, temp, signmask,
3817                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3818             }
3819           return temp;
3820         }
3821     }
3822
3823   /* Mask contains the mode's signbit and the significant bits of the
3824      modulus.  By including the signbit in the operation, many targets
3825      can avoid an explicit compare operation in the following comparison
3826      against zero.  */
3827   wide_int mask = wi::mask (logd, false, prec);
3828   mask = wi::set_bit (mask, prec - 1);
3829
3830   temp = expand_binop (mode, and_optab, op0,
3831                        immed_wide_int_const (mask, mode),
3832                        result, 1, OPTAB_LIB_WIDEN);
3833   if (temp != result)
3834     emit_move_insn (result, temp);
3835
3836   label = gen_label_rtx ();
3837   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3838
3839   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3840                        0, OPTAB_LIB_WIDEN);
3841
3842   mask = wi::mask (logd, true, prec);
3843   temp = expand_binop (mode, ior_optab, temp,
3844                        immed_wide_int_const (mask, mode),
3845                        result, 1, OPTAB_LIB_WIDEN);
3846   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3847                        0, OPTAB_LIB_WIDEN);
3848   if (temp != result)
3849     emit_move_insn (result, temp);
3850   emit_label (label);
3851   return result;
3852 }
3853
3854 /* Expand signed division of OP0 by a power of two D in mode MODE.
3855    This routine is only called for positive values of D.  */
3856
3857 static rtx
3858 expand_sdiv_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3859 {
3860   rtx temp;
3861   rtx_code_label *label;
3862   int logd;
3863
3864   logd = floor_log2 (d);
3865
3866   if (d == 2
3867       && BRANCH_COST (optimize_insn_for_speed_p (),
3868                       false) >= 1)
3869     {
3870       temp = gen_reg_rtx (mode);
3871       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3872       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3873                            0, OPTAB_LIB_WIDEN);
3874       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3875     }
3876
3877   if (HAVE_conditional_move
3878       && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
3879     {
3880       rtx temp2;
3881
3882       start_sequence ();
3883       temp2 = copy_to_mode_reg (mode, op0);
3884       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3885                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3886       temp = force_reg (mode, temp);
3887
3888       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3889       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3890                                      mode, temp, temp2, mode, 0);
3891       if (temp2)
3892         {
3893           rtx_insn *seq = get_insns ();
3894           end_sequence ();
3895           emit_insn (seq);
3896           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3897         }
3898       end_sequence ();
3899     }
3900
3901   if (BRANCH_COST (optimize_insn_for_speed_p (),
3902                    false) >= 2)
3903     {
3904       int ushift = GET_MODE_BITSIZE (mode) - logd;
3905
3906       temp = gen_reg_rtx (mode);
3907       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3908       if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
3909           || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3910              > COSTS_N_INSNS (1))
3911         temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3912                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3913       else
3914         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3915                              ushift, NULL_RTX, 1);
3916       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3917                            0, OPTAB_LIB_WIDEN);
3918       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3919     }
3920
3921   label = gen_label_rtx ();
3922   temp = copy_to_mode_reg (mode, op0);
3923   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3924   expand_inc (temp, gen_int_mode (d - 1, mode));
3925   emit_label (label);
3926   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3927 }
3928 \f
3929 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3930    if that is convenient, and returning where the result is.
3931    You may request either the quotient or the remainder as the result;
3932    specify REM_FLAG nonzero to get the remainder.
3933
3934    CODE is the expression code for which kind of division this is;
3935    it controls how rounding is done.  MODE is the machine mode to use.
3936    UNSIGNEDP nonzero means do unsigned division.  */
3937
3938 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3939    and then correct it by or'ing in missing high bits
3940    if result of ANDI is nonzero.
3941    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3942    This could optimize to a bfexts instruction.
3943    But C doesn't use these operations, so their optimizations are
3944    left for later.  */
3945 /* ??? For modulo, we don't actually need the highpart of the first product,
3946    the low part will do nicely.  And for small divisors, the second multiply
3947    can also be a low-part only multiply or even be completely left out.
3948    E.g. to calculate the remainder of a division by 3 with a 32 bit
3949    multiply, multiply with 0x55555556 and extract the upper two bits;
3950    the result is exact for inputs up to 0x1fffffff.
3951    The input range can be reduced by using cross-sum rules.
3952    For odd divisors >= 3, the following table gives right shift counts
3953    so that if a number is shifted by an integer multiple of the given
3954    amount, the remainder stays the same:
3955    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3956    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3957    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3958    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3959    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3960
3961    Cross-sum rules for even numbers can be derived by leaving as many bits
3962    to the right alone as the divisor has zeros to the right.
3963    E.g. if x is an unsigned 32 bit number:
3964    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3965    */
3966
3967 rtx
3968 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
3969                rtx op0, rtx op1, rtx target, int unsignedp)
3970 {
3971   machine_mode compute_mode;
3972   rtx tquotient;
3973   rtx quotient = 0, remainder = 0;
3974   rtx_insn *last;
3975   int size;
3976   rtx_insn *insn;
3977   optab optab1, optab2;
3978   int op1_is_constant, op1_is_pow2 = 0;
3979   int max_cost, extra_cost;
3980   static HOST_WIDE_INT last_div_const = 0;
3981   bool speed = optimize_insn_for_speed_p ();
3982
3983   op1_is_constant = CONST_INT_P (op1);
3984   if (op1_is_constant)
3985     {
3986       unsigned HOST_WIDE_INT ext_op1 = UINTVAL (op1);
3987       if (unsignedp)
3988         ext_op1 &= GET_MODE_MASK (mode);
3989       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3990                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3991     }
3992
3993   /*
3994      This is the structure of expand_divmod:
3995
3996      First comes code to fix up the operands so we can perform the operations
3997      correctly and efficiently.
3998
3999      Second comes a switch statement with code specific for each rounding mode.
4000      For some special operands this code emits all RTL for the desired
4001      operation, for other cases, it generates only a quotient and stores it in
4002      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
4003      to indicate that it has not done anything.
4004
4005      Last comes code that finishes the operation.  If QUOTIENT is set and
4006      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
4007      QUOTIENT is not set, it is computed using trunc rounding.
4008
4009      We try to generate special code for division and remainder when OP1 is a
4010      constant.  If |OP1| = 2**n we can use shifts and some other fast
4011      operations.  For other values of OP1, we compute a carefully selected
4012      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4013      by m.
4014
4015      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4016      half of the product.  Different strategies for generating the product are
4017      implemented in expmed_mult_highpart.
4018
4019      If what we actually want is the remainder, we generate that by another
4020      by-constant multiplication and a subtraction.  */
4021
4022   /* We shouldn't be called with OP1 == const1_rtx, but some of the
4023      code below will malfunction if we are, so check here and handle
4024      the special case if so.  */
4025   if (op1 == const1_rtx)
4026     return rem_flag ? const0_rtx : op0;
4027
4028     /* When dividing by -1, we could get an overflow.
4029      negv_optab can handle overflows.  */
4030   if (! unsignedp && op1 == constm1_rtx)
4031     {
4032       if (rem_flag)
4033         return const0_rtx;
4034       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4035                           ? negv_optab : neg_optab, op0, target, 0);
4036     }
4037
4038   if (target
4039       /* Don't use the function value register as a target
4040          since we have to read it as well as write it,
4041          and function-inlining gets confused by this.  */
4042       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4043           /* Don't clobber an operand while doing a multi-step calculation.  */
4044           || ((rem_flag || op1_is_constant)
4045               && (reg_mentioned_p (target, op0)
4046                   || (MEM_P (op0) && MEM_P (target))))
4047           || reg_mentioned_p (target, op1)
4048           || (MEM_P (op1) && MEM_P (target))))
4049     target = 0;
4050
4051   /* Get the mode in which to perform this computation.  Normally it will
4052      be MODE, but sometimes we can't do the desired operation in MODE.
4053      If so, pick a wider mode in which we can do the operation.  Convert
4054      to that mode at the start to avoid repeated conversions.
4055
4056      First see what operations we need.  These depend on the expression
4057      we are evaluating.  (We assume that divxx3 insns exist under the
4058      same conditions that modxx3 insns and that these insns don't normally
4059      fail.  If these assumptions are not correct, we may generate less
4060      efficient code in some cases.)
4061
4062      Then see if we find a mode in which we can open-code that operation
4063      (either a division, modulus, or shift).  Finally, check for the smallest
4064      mode for which we can do the operation with a library call.  */
4065
4066   /* We might want to refine this now that we have division-by-constant
4067      optimization.  Since expmed_mult_highpart tries so many variants, it is
4068      not straightforward to generalize this.  Maybe we should make an array
4069      of possible modes in init_expmed?  Save this for GCC 2.7.  */
4070
4071   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
4072             ? (unsignedp ? lshr_optab : ashr_optab)
4073             : (unsignedp ? udiv_optab : sdiv_optab));
4074   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
4075             ? optab1
4076             : (unsignedp ? udivmod_optab : sdivmod_optab));
4077
4078   for (compute_mode = mode; compute_mode != VOIDmode;
4079        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4080     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4081         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4082       break;
4083
4084   if (compute_mode == VOIDmode)
4085     for (compute_mode = mode; compute_mode != VOIDmode;
4086          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4087       if (optab_libfunc (optab1, compute_mode)
4088           || optab_libfunc (optab2, compute_mode))
4089         break;
4090
4091   /* If we still couldn't find a mode, use MODE, but expand_binop will
4092      probably die.  */
4093   if (compute_mode == VOIDmode)
4094     compute_mode = mode;
4095
4096   if (target && GET_MODE (target) == compute_mode)
4097     tquotient = target;
4098   else
4099     tquotient = gen_reg_rtx (compute_mode);
4100
4101   size = GET_MODE_BITSIZE (compute_mode);
4102 #if 0
4103   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4104      (mode), and thereby get better code when OP1 is a constant.  Do that
4105      later.  It will require going over all usages of SIZE below.  */
4106   size = GET_MODE_BITSIZE (mode);
4107 #endif
4108
4109   /* Only deduct something for a REM if the last divide done was
4110      for a different constant.   Then set the constant of the last
4111      divide.  */
4112   max_cost = (unsignedp
4113               ? udiv_cost (speed, compute_mode)
4114               : sdiv_cost (speed, compute_mode));
4115   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4116                      && INTVAL (op1) == last_div_const))
4117     max_cost -= (mul_cost (speed, compute_mode)
4118                  + add_cost (speed, compute_mode));
4119
4120   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4121
4122   /* Now convert to the best mode to use.  */
4123   if (compute_mode != mode)
4124     {
4125       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4126       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4127
4128       /* convert_modes may have placed op1 into a register, so we
4129          must recompute the following.  */
4130       op1_is_constant = CONST_INT_P (op1);
4131       op1_is_pow2 = (op1_is_constant
4132                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4133                           || (! unsignedp
4134                               && EXACT_POWER_OF_2_OR_ZERO_P (-UINTVAL (op1))))));
4135     }
4136
4137   /* If one of the operands is a volatile MEM, copy it into a register.  */
4138
4139   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4140     op0 = force_reg (compute_mode, op0);
4141   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4142     op1 = force_reg (compute_mode, op1);
4143
4144   /* If we need the remainder or if OP1 is constant, we need to
4145      put OP0 in a register in case it has any queued subexpressions.  */
4146   if (rem_flag || op1_is_constant)
4147     op0 = force_reg (compute_mode, op0);
4148
4149   last = get_last_insn ();
4150
4151   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4152   if (unsignedp)
4153     {
4154       if (code == FLOOR_DIV_EXPR)
4155         code = TRUNC_DIV_EXPR;
4156       if (code == FLOOR_MOD_EXPR)
4157         code = TRUNC_MOD_EXPR;
4158       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4159         code = TRUNC_DIV_EXPR;
4160     }
4161
4162   if (op1 != const0_rtx)
4163     switch (code)
4164       {
4165       case TRUNC_MOD_EXPR:
4166       case TRUNC_DIV_EXPR:
4167         if (op1_is_constant)
4168           {
4169             if (unsignedp)
4170               {
4171                 unsigned HOST_WIDE_INT mh, ml;
4172                 int pre_shift, post_shift;
4173                 int dummy;
4174                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4175                                             & GET_MODE_MASK (compute_mode));
4176
4177                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4178                   {
4179                     pre_shift = floor_log2 (d);
4180                     if (rem_flag)
4181                       {
4182                         unsigned HOST_WIDE_INT mask
4183                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4184                         remainder
4185                           = expand_binop (compute_mode, and_optab, op0,
4186                                           gen_int_mode (mask, compute_mode),
4187                                           remainder, 1,
4188                                           OPTAB_LIB_WIDEN);
4189                         if (remainder)
4190                           return gen_lowpart (mode, remainder);
4191                       }
4192                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4193                                              pre_shift, tquotient, 1);
4194                   }
4195                 else if (size <= HOST_BITS_PER_WIDE_INT)
4196                   {
4197                     if (d >= (HOST_WIDE_INT_1U << (size - 1)))
4198                       {
4199                         /* Most significant bit of divisor is set; emit an scc
4200                            insn.  */
4201                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4202                                                           compute_mode, 1, 1);
4203                       }
4204                     else
4205                       {
4206                         /* Find a suitable multiplier and right shift count
4207                            instead of multiplying with D.  */
4208
4209                         mh = choose_multiplier (d, size, size,
4210                                                 &ml, &post_shift, &dummy);
4211
4212                         /* If the suggested multiplier is more than SIZE bits,
4213                            we can do better for even divisors, using an
4214                            initial right shift.  */
4215                         if (mh != 0 && (d & 1) == 0)
4216                           {
4217                             pre_shift = floor_log2 (d & -d);
4218                             mh = choose_multiplier (d >> pre_shift, size,
4219                                                     size - pre_shift,
4220                                                     &ml, &post_shift, &dummy);
4221                             gcc_assert (!mh);
4222                           }
4223                         else
4224                           pre_shift = 0;
4225
4226                         if (mh != 0)
4227                           {
4228                             rtx t1, t2, t3, t4;
4229
4230                             if (post_shift - 1 >= BITS_PER_WORD)
4231                               goto fail1;
4232
4233                             extra_cost
4234                               = (shift_cost (speed, compute_mode, post_shift - 1)
4235                                  + shift_cost (speed, compute_mode, 1)
4236                                  + 2 * add_cost (speed, compute_mode));
4237                             t1 = expmed_mult_highpart
4238                               (compute_mode, op0,
4239                                gen_int_mode (ml, compute_mode),
4240                                NULL_RTX, 1, max_cost - extra_cost);
4241                             if (t1 == 0)
4242                               goto fail1;
4243                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4244                                                                op0, t1),
4245                                                 NULL_RTX);
4246                             t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4247                                                t2, 1, NULL_RTX, 1);
4248                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4249                                                               t1, t3),
4250                                                 NULL_RTX);
4251                             quotient = expand_shift
4252                               (RSHIFT_EXPR, compute_mode, t4,
4253                                post_shift - 1, tquotient, 1);
4254                           }
4255                         else
4256                           {
4257                             rtx t1, t2;
4258
4259                             if (pre_shift >= BITS_PER_WORD
4260                                 || post_shift >= BITS_PER_WORD)
4261                               goto fail1;
4262
4263                             t1 = expand_shift
4264                               (RSHIFT_EXPR, compute_mode, op0,
4265                                pre_shift, NULL_RTX, 1);
4266                             extra_cost
4267                               = (shift_cost (speed, compute_mode, pre_shift)
4268                                  + shift_cost (speed, compute_mode, post_shift));
4269                             t2 = expmed_mult_highpart
4270                               (compute_mode, t1,
4271                                gen_int_mode (ml, compute_mode),
4272                                NULL_RTX, 1, max_cost - extra_cost);
4273                             if (t2 == 0)
4274                               goto fail1;
4275                             quotient = expand_shift
4276                               (RSHIFT_EXPR, compute_mode, t2,
4277                                post_shift, tquotient, 1);
4278                           }
4279                       }
4280                   }
4281                 else            /* Too wide mode to use tricky code */
4282                   break;
4283
4284                 insn = get_last_insn ();
4285                 if (insn != last)
4286                   set_dst_reg_note (insn, REG_EQUAL,
4287                                     gen_rtx_UDIV (compute_mode, op0, op1),
4288                                     quotient);
4289               }
4290             else                /* TRUNC_DIV, signed */
4291               {
4292                 unsigned HOST_WIDE_INT ml;
4293                 int lgup, post_shift;
4294                 rtx mlr;
4295                 HOST_WIDE_INT d = INTVAL (op1);
4296                 unsigned HOST_WIDE_INT abs_d;
4297
4298                 /* Since d might be INT_MIN, we have to cast to
4299                    unsigned HOST_WIDE_INT before negating to avoid
4300                    undefined signed overflow.  */
4301                 abs_d = (d >= 0
4302                          ? (unsigned HOST_WIDE_INT) d
4303                          : - (unsigned HOST_WIDE_INT) d);
4304
4305                 /* n rem d = n rem -d */
4306                 if (rem_flag && d < 0)
4307                   {
4308                     d = abs_d;
4309                     op1 = gen_int_mode (abs_d, compute_mode);
4310                   }
4311
4312                 if (d == 1)
4313                   quotient = op0;
4314                 else if (d == -1)
4315                   quotient = expand_unop (compute_mode, neg_optab, op0,
4316                                           tquotient, 0);
4317                 else if (HOST_BITS_PER_WIDE_INT >= size
4318                          && abs_d == HOST_WIDE_INT_1U << (size - 1))
4319                   {
4320                     /* This case is not handled correctly below.  */
4321                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4322                                                 compute_mode, 1, 1);
4323                     if (quotient == 0)
4324                       goto fail1;
4325                   }
4326                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4327                          && (rem_flag
4328                              ? smod_pow2_cheap (speed, compute_mode)
4329                              : sdiv_pow2_cheap (speed, compute_mode))
4330                          /* We assume that cheap metric is true if the
4331                             optab has an expander for this mode.  */
4332                          && ((optab_handler ((rem_flag ? smod_optab
4333                                               : sdiv_optab),
4334                                              compute_mode)
4335                               != CODE_FOR_nothing)
4336                              || (optab_handler (sdivmod_optab,
4337                                                 compute_mode)
4338                                  != CODE_FOR_nothing)))
4339                   ;
4340                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4341                   {
4342                     if (rem_flag)
4343                       {
4344                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4345                         if (remainder)
4346                           return gen_lowpart (mode, remainder);
4347                       }
4348
4349                     if (sdiv_pow2_cheap (speed, compute_mode)
4350                         && ((optab_handler (sdiv_optab, compute_mode)
4351                              != CODE_FOR_nothing)
4352                             || (optab_handler (sdivmod_optab, compute_mode)
4353                                 != CODE_FOR_nothing)))
4354                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4355                                                 compute_mode, op0,
4356                                                 gen_int_mode (abs_d,
4357                                                               compute_mode),
4358                                                 NULL_RTX, 0);
4359                     else
4360                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4361
4362                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4363                        negate the quotient.  */
4364                     if (d < 0)
4365                       {
4366                         insn = get_last_insn ();
4367                         if (insn != last
4368                             && abs_d < (HOST_WIDE_INT_1U
4369                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4370                           set_dst_reg_note (insn, REG_EQUAL,
4371                                             gen_rtx_DIV (compute_mode, op0,
4372                                                          gen_int_mode
4373                                                            (abs_d,
4374                                                             compute_mode)),
4375                                             quotient);
4376
4377                         quotient = expand_unop (compute_mode, neg_optab,
4378                                                 quotient, quotient, 0);
4379                       }
4380                   }
4381                 else if (size <= HOST_BITS_PER_WIDE_INT)
4382                   {
4383                     choose_multiplier (abs_d, size, size - 1,
4384                                        &ml, &post_shift, &lgup);
4385                     if (ml < HOST_WIDE_INT_1U << (size - 1))
4386                       {
4387                         rtx t1, t2, t3;
4388
4389                         if (post_shift >= BITS_PER_WORD
4390                             || size - 1 >= BITS_PER_WORD)
4391                           goto fail1;
4392
4393                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4394                                       + shift_cost (speed, compute_mode, size - 1)
4395                                       + add_cost (speed, compute_mode));
4396                         t1 = expmed_mult_highpart
4397                           (compute_mode, op0, gen_int_mode (ml, compute_mode),
4398                            NULL_RTX, 0, max_cost - extra_cost);
4399                         if (t1 == 0)
4400                           goto fail1;
4401                         t2 = expand_shift
4402                           (RSHIFT_EXPR, compute_mode, t1,
4403                            post_shift, NULL_RTX, 0);
4404                         t3 = expand_shift
4405                           (RSHIFT_EXPR, compute_mode, op0,
4406                            size - 1, NULL_RTX, 0);
4407                         if (d < 0)
4408                           quotient
4409                             = force_operand (gen_rtx_MINUS (compute_mode,
4410                                                             t3, t2),
4411                                              tquotient);
4412                         else
4413                           quotient
4414                             = force_operand (gen_rtx_MINUS (compute_mode,
4415                                                             t2, t3),
4416                                              tquotient);
4417                       }
4418                     else
4419                       {
4420                         rtx t1, t2, t3, t4;
4421
4422                         if (post_shift >= BITS_PER_WORD
4423                             || size - 1 >= BITS_PER_WORD)
4424                           goto fail1;
4425
4426                         ml |= HOST_WIDE_INT_M1U << (size - 1);
4427                         mlr = gen_int_mode (ml, compute_mode);
4428                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4429                                       + shift_cost (speed, compute_mode, size - 1)
4430                                       + 2 * add_cost (speed, compute_mode));
4431                         t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4432                                                    NULL_RTX, 0,
4433                                                    max_cost - extra_cost);
4434                         if (t1 == 0)
4435                           goto fail1;
4436                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4437                                                           t1, op0),
4438                                             NULL_RTX);
4439                         t3 = expand_shift
4440                           (RSHIFT_EXPR, compute_mode, t2,
4441                            post_shift, NULL_RTX, 0);
4442                         t4 = expand_shift
4443                           (RSHIFT_EXPR, compute_mode, op0,
4444                            size - 1, NULL_RTX, 0);
4445                         if (d < 0)
4446                           quotient
4447                             = force_operand (gen_rtx_MINUS (compute_mode,
4448                                                             t4, t3),
4449                                              tquotient);
4450                         else
4451                           quotient
4452                             = force_operand (gen_rtx_MINUS (compute_mode,
4453                                                             t3, t4),
4454                                              tquotient);
4455                       }
4456                   }
4457                 else            /* Too wide mode to use tricky code */
4458                   break;
4459
4460                 insn = get_last_insn ();
4461                 if (insn != last)
4462                   set_dst_reg_note (insn, REG_EQUAL,
4463                                     gen_rtx_DIV (compute_mode, op0, op1),
4464                                     quotient);
4465               }
4466             break;
4467           }
4468       fail1:
4469         delete_insns_since (last);
4470         break;
4471
4472       case FLOOR_DIV_EXPR:
4473       case FLOOR_MOD_EXPR:
4474       /* We will come here only for signed operations.  */
4475         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4476           {
4477             unsigned HOST_WIDE_INT mh, ml;
4478             int pre_shift, lgup, post_shift;
4479             HOST_WIDE_INT d = INTVAL (op1);
4480
4481             if (d > 0)
4482               {
4483                 /* We could just as easily deal with negative constants here,
4484                    but it does not seem worth the trouble for GCC 2.6.  */
4485                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4486                   {
4487                     pre_shift = floor_log2 (d);
4488                     if (rem_flag)
4489                       {
4490                         unsigned HOST_WIDE_INT mask
4491                           = (HOST_WIDE_INT_1U << pre_shift) - 1;
4492                         remainder = expand_binop
4493                           (compute_mode, and_optab, op0,
4494                            gen_int_mode (mask, compute_mode),
4495                            remainder, 0, OPTAB_LIB_WIDEN);
4496                         if (remainder)
4497                           return gen_lowpart (mode, remainder);
4498                       }
4499                     quotient = expand_shift
4500                       (RSHIFT_EXPR, compute_mode, op0,
4501                        pre_shift, tquotient, 0);
4502                   }
4503                 else
4504                   {
4505                     rtx t1, t2, t3, t4;
4506
4507                     mh = choose_multiplier (d, size, size - 1,
4508                                             &ml, &post_shift, &lgup);
4509                     gcc_assert (!mh);
4510
4511                     if (post_shift < BITS_PER_WORD
4512                         && size - 1 < BITS_PER_WORD)
4513                       {
4514                         t1 = expand_shift
4515                           (RSHIFT_EXPR, compute_mode, op0,
4516                            size - 1, NULL_RTX, 0);
4517                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4518                                            NULL_RTX, 0, OPTAB_WIDEN);
4519                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4520                                       + shift_cost (speed, compute_mode, size - 1)
4521                                       + 2 * add_cost (speed, compute_mode));
4522                         t3 = expmed_mult_highpart
4523                           (compute_mode, t2, gen_int_mode (ml, compute_mode),
4524                            NULL_RTX, 1, max_cost - extra_cost);
4525                         if (t3 != 0)
4526                           {
4527                             t4 = expand_shift
4528                               (RSHIFT_EXPR, compute_mode, t3,
4529                                post_shift, NULL_RTX, 1);
4530                             quotient = expand_binop (compute_mode, xor_optab,
4531                                                      t4, t1, tquotient, 0,
4532                                                      OPTAB_WIDEN);
4533                           }
4534                       }
4535                   }
4536               }
4537             else
4538               {
4539                 rtx nsign, t1, t2, t3, t4;
4540                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4541                                                   op0, constm1_rtx), NULL_RTX);
4542                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4543                                    0, OPTAB_WIDEN);
4544                 nsign = expand_shift
4545                   (RSHIFT_EXPR, compute_mode, t2,
4546                    size - 1, NULL_RTX, 0);
4547                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4548                                     NULL_RTX);
4549                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4550                                     NULL_RTX, 0);
4551                 if (t4)
4552                   {
4553                     rtx t5;
4554                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4555                                       NULL_RTX, 0);
4556                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4557                                                             t4, t5),
4558                                               tquotient);
4559                   }
4560               }
4561           }
4562
4563         if (quotient != 0)
4564           break;
4565         delete_insns_since (last);
4566
4567         /* Try using an instruction that produces both the quotient and
4568            remainder, using truncation.  We can easily compensate the quotient
4569            or remainder to get floor rounding, once we have the remainder.
4570            Notice that we compute also the final remainder value here,
4571            and return the result right away.  */
4572         if (target == 0 || GET_MODE (target) != compute_mode)
4573           target = gen_reg_rtx (compute_mode);
4574
4575         if (rem_flag)
4576           {
4577             remainder
4578               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4579             quotient = gen_reg_rtx (compute_mode);
4580           }
4581         else
4582           {
4583             quotient
4584               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4585             remainder = gen_reg_rtx (compute_mode);
4586           }
4587
4588         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4589                                  quotient, remainder, 0))
4590           {
4591             /* This could be computed with a branch-less sequence.
4592                Save that for later.  */
4593             rtx tem;
4594             rtx_code_label *label = gen_label_rtx ();
4595             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4596             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4597                                 NULL_RTX, 0, OPTAB_WIDEN);
4598             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4599             expand_dec (quotient, const1_rtx);
4600             expand_inc (remainder, op1);
4601             emit_label (label);
4602             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4603           }
4604
4605         /* No luck with division elimination or divmod.  Have to do it
4606            by conditionally adjusting op0 *and* the result.  */
4607         {
4608           rtx_code_label *label1, *label2, *label3, *label4, *label5;
4609           rtx adjusted_op0;
4610           rtx tem;
4611
4612           quotient = gen_reg_rtx (compute_mode);
4613           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4614           label1 = gen_label_rtx ();
4615           label2 = gen_label_rtx ();
4616           label3 = gen_label_rtx ();
4617           label4 = gen_label_rtx ();
4618           label5 = gen_label_rtx ();
4619           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4620           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4621           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4622                               quotient, 0, OPTAB_LIB_WIDEN);
4623           if (tem != quotient)
4624             emit_move_insn (quotient, tem);
4625           emit_jump_insn (targetm.gen_jump (label5));
4626           emit_barrier ();
4627           emit_label (label1);
4628           expand_inc (adjusted_op0, const1_rtx);
4629           emit_jump_insn (targetm.gen_jump (label4));
4630           emit_barrier ();
4631           emit_label (label2);
4632           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4633           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4634                               quotient, 0, OPTAB_LIB_WIDEN);
4635           if (tem != quotient)
4636             emit_move_insn (quotient, tem);
4637           emit_jump_insn (targetm.gen_jump (label5));
4638           emit_barrier ();
4639           emit_label (label3);
4640           expand_dec (adjusted_op0, const1_rtx);
4641           emit_label (label4);
4642           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4643                               quotient, 0, OPTAB_LIB_WIDEN);
4644           if (tem != quotient)
4645             emit_move_insn (quotient, tem);
4646           expand_dec (quotient, const1_rtx);
4647           emit_label (label5);
4648         }
4649         break;
4650
4651       case CEIL_DIV_EXPR:
4652       case CEIL_MOD_EXPR:
4653         if (unsignedp)
4654           {
4655             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4656               {
4657                 rtx t1, t2, t3;
4658                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4659                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4660                                    floor_log2 (d), tquotient, 1);
4661                 t2 = expand_binop (compute_mode, and_optab, op0,
4662                                    gen_int_mode (d - 1, compute_mode),
4663                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4664                 t3 = gen_reg_rtx (compute_mode);
4665                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4666                                       compute_mode, 1, 1);
4667                 if (t3 == 0)
4668                   {
4669                     rtx_code_label *lab;
4670                     lab = gen_label_rtx ();
4671                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4672                     expand_inc (t1, const1_rtx);
4673                     emit_label (lab);
4674                     quotient = t1;
4675                   }
4676                 else
4677                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4678                                                           t1, t3),
4679                                             tquotient);
4680                 break;
4681               }
4682
4683             /* Try using an instruction that produces both the quotient and
4684                remainder, using truncation.  We can easily compensate the
4685                quotient or remainder to get ceiling rounding, once we have the
4686                remainder.  Notice that we compute also the final remainder
4687                value here, and return the result right away.  */
4688             if (target == 0 || GET_MODE (target) != compute_mode)
4689               target = gen_reg_rtx (compute_mode);
4690
4691             if (rem_flag)
4692               {
4693                 remainder = (REG_P (target)
4694                              ? target : gen_reg_rtx (compute_mode));
4695                 quotient = gen_reg_rtx (compute_mode);
4696               }
4697             else
4698               {
4699                 quotient = (REG_P (target)
4700                             ? target : gen_reg_rtx (compute_mode));
4701                 remainder = gen_reg_rtx (compute_mode);
4702               }
4703
4704             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4705                                      remainder, 1))
4706               {
4707                 /* This could be computed with a branch-less sequence.
4708                    Save that for later.  */
4709                 rtx_code_label *label = gen_label_rtx ();
4710                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4711                                  compute_mode, label);
4712                 expand_inc (quotient, const1_rtx);
4713                 expand_dec (remainder, op1);
4714                 emit_label (label);
4715                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4716               }
4717
4718             /* No luck with division elimination or divmod.  Have to do it
4719                by conditionally adjusting op0 *and* the result.  */
4720             {
4721               rtx_code_label *label1, *label2;
4722               rtx adjusted_op0, tem;
4723
4724               quotient = gen_reg_rtx (compute_mode);
4725               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4726               label1 = gen_label_rtx ();
4727               label2 = gen_label_rtx ();
4728               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4729                                compute_mode, label1);
4730               emit_move_insn  (quotient, const0_rtx);
4731               emit_jump_insn (targetm.gen_jump (label2));
4732               emit_barrier ();
4733               emit_label (label1);
4734               expand_dec (adjusted_op0, const1_rtx);
4735               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4736                                   quotient, 1, OPTAB_LIB_WIDEN);
4737               if (tem != quotient)
4738                 emit_move_insn (quotient, tem);
4739               expand_inc (quotient, const1_rtx);
4740               emit_label (label2);
4741             }
4742           }
4743         else /* signed */
4744           {
4745             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4746                 && INTVAL (op1) >= 0)
4747               {
4748                 /* This is extremely similar to the code for the unsigned case
4749                    above.  For 2.7 we should merge these variants, but for
4750                    2.6.1 I don't want to touch the code for unsigned since that
4751                    get used in C.  The signed case will only be used by other
4752                    languages (Ada).  */
4753
4754                 rtx t1, t2, t3;
4755                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4756                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4757                                    floor_log2 (d), tquotient, 0);
4758                 t2 = expand_binop (compute_mode, and_optab, op0,
4759                                    gen_int_mode (d - 1, compute_mode),
4760                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4761                 t3 = gen_reg_rtx (compute_mode);
4762                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4763                                       compute_mode, 1, 1);
4764                 if (t3 == 0)
4765                   {
4766                     rtx_code_label *lab;
4767                     lab = gen_label_rtx ();
4768                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4769                     expand_inc (t1, const1_rtx);
4770                     emit_label (lab);
4771                     quotient = t1;
4772                   }
4773                 else
4774                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4775                                                           t1, t3),
4776                                             tquotient);
4777                 break;
4778               }
4779
4780             /* Try using an instruction that produces both the quotient and
4781                remainder, using truncation.  We can easily compensate the
4782                quotient or remainder to get ceiling rounding, once we have the
4783                remainder.  Notice that we compute also the final remainder
4784                value here, and return the result right away.  */
4785             if (target == 0 || GET_MODE (target) != compute_mode)
4786               target = gen_reg_rtx (compute_mode);
4787             if (rem_flag)
4788               {
4789                 remainder= (REG_P (target)
4790                             ? target : gen_reg_rtx (compute_mode));
4791                 quotient = gen_reg_rtx (compute_mode);
4792               }
4793             else
4794               {
4795                 quotient = (REG_P (target)
4796                             ? target : gen_reg_rtx (compute_mode));
4797                 remainder = gen_reg_rtx (compute_mode);
4798               }
4799
4800             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4801                                      remainder, 0))
4802               {
4803                 /* This could be computed with a branch-less sequence.
4804                    Save that for later.  */
4805                 rtx tem;
4806                 rtx_code_label *label = gen_label_rtx ();
4807                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4808                                  compute_mode, label);
4809                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4810                                     NULL_RTX, 0, OPTAB_WIDEN);
4811                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4812                 expand_inc (quotient, const1_rtx);
4813                 expand_dec (remainder, op1);
4814                 emit_label (label);
4815                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4816               }
4817
4818             /* No luck with division elimination or divmod.  Have to do it
4819                by conditionally adjusting op0 *and* the result.  */
4820             {
4821               rtx_code_label *label1, *label2, *label3, *label4, *label5;
4822               rtx adjusted_op0;
4823               rtx tem;
4824
4825               quotient = gen_reg_rtx (compute_mode);
4826               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4827               label1 = gen_label_rtx ();
4828               label2 = gen_label_rtx ();
4829               label3 = gen_label_rtx ();
4830               label4 = gen_label_rtx ();
4831               label5 = gen_label_rtx ();
4832               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4833               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4834                                compute_mode, label1);
4835               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4836                                   quotient, 0, OPTAB_LIB_WIDEN);
4837               if (tem != quotient)
4838                 emit_move_insn (quotient, tem);
4839               emit_jump_insn (targetm.gen_jump (label5));
4840               emit_barrier ();
4841               emit_label (label1);
4842               expand_dec (adjusted_op0, const1_rtx);
4843               emit_jump_insn (targetm.gen_jump (label4));
4844               emit_barrier ();
4845               emit_label (label2);
4846               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4847                                compute_mode, label3);
4848               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4849                                   quotient, 0, OPTAB_LIB_WIDEN);
4850               if (tem != quotient)
4851                 emit_move_insn (quotient, tem);
4852               emit_jump_insn (targetm.gen_jump (label5));
4853               emit_barrier ();
4854               emit_label (label3);
4855               expand_inc (adjusted_op0, const1_rtx);
4856               emit_label (label4);
4857               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4858                                   quotient, 0, OPTAB_LIB_WIDEN);
4859               if (tem != quotient)
4860                 emit_move_insn (quotient, tem);
4861               expand_inc (quotient, const1_rtx);
4862               emit_label (label5);
4863             }
4864           }
4865         break;
4866
4867       case EXACT_DIV_EXPR:
4868         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4869           {
4870             HOST_WIDE_INT d = INTVAL (op1);
4871             unsigned HOST_WIDE_INT ml;
4872             int pre_shift;
4873             rtx t1;
4874
4875             pre_shift = floor_log2 (d & -d);
4876             ml = invert_mod2n (d >> pre_shift, size);
4877             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4878                                pre_shift, NULL_RTX, unsignedp);
4879             quotient = expand_mult (compute_mode, t1,
4880                                     gen_int_mode (ml, compute_mode),
4881                                     NULL_RTX, 1);
4882
4883             insn = get_last_insn ();
4884             set_dst_reg_note (insn, REG_EQUAL,
4885                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4886                                               compute_mode, op0, op1),
4887                               quotient);
4888           }
4889         break;
4890
4891       case ROUND_DIV_EXPR:
4892       case ROUND_MOD_EXPR:
4893         if (unsignedp)
4894           {
4895             rtx tem;
4896             rtx_code_label *label;
4897             label = gen_label_rtx ();
4898             quotient = gen_reg_rtx (compute_mode);
4899             remainder = gen_reg_rtx (compute_mode);
4900             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4901               {
4902                 rtx tem;
4903                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4904                                          quotient, 1, OPTAB_LIB_WIDEN);
4905                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4906                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4907                                           remainder, 1, OPTAB_LIB_WIDEN);
4908               }
4909             tem = plus_constant (compute_mode, op1, -1);
4910             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4911             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4912             expand_inc (quotient, const1_rtx);
4913             expand_dec (remainder, op1);
4914             emit_label (label);
4915           }
4916         else
4917           {
4918             rtx abs_rem, abs_op1, tem, mask;
4919             rtx_code_label *label;
4920             label = gen_label_rtx ();
4921             quotient = gen_reg_rtx (compute_mode);
4922             remainder = gen_reg_rtx (compute_mode);
4923             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4924               {
4925                 rtx tem;
4926                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4927                                          quotient, 0, OPTAB_LIB_WIDEN);
4928                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4929                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4930                                           remainder, 0, OPTAB_LIB_WIDEN);
4931               }
4932             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4933             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4934             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4935                                 1, NULL_RTX, 1);
4936             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4937             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4938                                 NULL_RTX, 0, OPTAB_WIDEN);
4939             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4940                                  size - 1, NULL_RTX, 0);
4941             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4942                                 NULL_RTX, 0, OPTAB_WIDEN);
4943             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4944                                 NULL_RTX, 0, OPTAB_WIDEN);
4945             expand_inc (quotient, tem);
4946             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4947                                 NULL_RTX, 0, OPTAB_WIDEN);
4948             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4949                                 NULL_RTX, 0, OPTAB_WIDEN);
4950             expand_dec (remainder, tem);
4951             emit_label (label);
4952           }
4953         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4954
4955       default:
4956         gcc_unreachable ();
4957       }
4958
4959   if (quotient == 0)
4960     {
4961       if (target && GET_MODE (target) != compute_mode)
4962         target = 0;
4963
4964       if (rem_flag)
4965         {
4966           /* Try to produce the remainder without producing the quotient.
4967              If we seem to have a divmod pattern that does not require widening,
4968              don't try widening here.  We should really have a WIDEN argument
4969              to expand_twoval_binop, since what we'd really like to do here is
4970              1) try a mod insn in compute_mode
4971              2) try a divmod insn in compute_mode
4972              3) try a div insn in compute_mode and multiply-subtract to get
4973                 remainder
4974              4) try the same things with widening allowed.  */
4975           remainder
4976             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4977                                  op0, op1, target,
4978                                  unsignedp,
4979                                  ((optab_handler (optab2, compute_mode)
4980                                    != CODE_FOR_nothing)
4981                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4982           if (remainder == 0)
4983             {
4984               /* No luck there.  Can we do remainder and divide at once
4985                  without a library call?  */
4986               remainder = gen_reg_rtx (compute_mode);
4987               if (! expand_twoval_binop ((unsignedp
4988                                           ? udivmod_optab
4989                                           : sdivmod_optab),
4990                                          op0, op1,
4991                                          NULL_RTX, remainder, unsignedp))
4992                 remainder = 0;
4993             }
4994
4995           if (remainder)
4996             return gen_lowpart (mode, remainder);
4997         }
4998
4999       /* Produce the quotient.  Try a quotient insn, but not a library call.
5000          If we have a divmod in this mode, use it in preference to widening
5001          the div (for this test we assume it will not fail). Note that optab2
5002          is set to the one of the two optabs that the call below will use.  */
5003       quotient
5004         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5005                              op0, op1, rem_flag ? NULL_RTX : target,
5006                              unsignedp,
5007                              ((optab_handler (optab2, compute_mode)
5008                                != CODE_FOR_nothing)
5009                               ? OPTAB_DIRECT : OPTAB_WIDEN));
5010
5011       if (quotient == 0)
5012         {
5013           /* No luck there.  Try a quotient-and-remainder insn,
5014              keeping the quotient alone.  */
5015           quotient = gen_reg_rtx (compute_mode);
5016           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5017                                      op0, op1,
5018                                      quotient, NULL_RTX, unsignedp))
5019             {
5020               quotient = 0;
5021               if (! rem_flag)
5022                 /* Still no luck.  If we are not computing the remainder,
5023                    use a library call for the quotient.  */
5024                 quotient = sign_expand_binop (compute_mode,
5025                                               udiv_optab, sdiv_optab,
5026                                               op0, op1, target,
5027                                               unsignedp, OPTAB_LIB_WIDEN);
5028             }
5029         }
5030     }
5031
5032   if (rem_flag)
5033     {
5034       if (target && GET_MODE (target) != compute_mode)
5035         target = 0;
5036
5037       if (quotient == 0)
5038         {
5039           /* No divide instruction either.  Use library for remainder.  */
5040           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5041                                          op0, op1, target,
5042                                          unsignedp, OPTAB_LIB_WIDEN);
5043           /* No remainder function.  Try a quotient-and-remainder
5044              function, keeping the remainder.  */
5045           if (!remainder)
5046             {
5047               remainder = gen_reg_rtx (compute_mode);
5048               if (!expand_twoval_binop_libfunc
5049                   (unsignedp ? udivmod_optab : sdivmod_optab,
5050                    op0, op1,
5051                    NULL_RTX, remainder,
5052                    unsignedp ? UMOD : MOD))
5053                 remainder = NULL_RTX;
5054             }
5055         }
5056       else
5057         {
5058           /* We divided.  Now finish doing X - Y * (X / Y).  */
5059           remainder = expand_mult (compute_mode, quotient, op1,
5060                                    NULL_RTX, unsignedp);
5061           remainder = expand_binop (compute_mode, sub_optab, op0,
5062                                     remainder, target, unsignedp,
5063                                     OPTAB_LIB_WIDEN);
5064         }
5065     }
5066
5067   return gen_lowpart (mode, rem_flag ? remainder : quotient);
5068 }
5069 \f
5070 /* Return a tree node with data type TYPE, describing the value of X.
5071    Usually this is an VAR_DECL, if there is no obvious better choice.
5072    X may be an expression, however we only support those expressions
5073    generated by loop.c.  */
5074
5075 tree
5076 make_tree (tree type, rtx x)
5077 {
5078   tree t;
5079
5080   switch (GET_CODE (x))
5081     {
5082     case CONST_INT:
5083     case CONST_WIDE_INT:
5084       t = wide_int_to_tree (type, std::make_pair (x, TYPE_MODE (type)));
5085       return t;
5086
5087     case CONST_DOUBLE:
5088       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5089       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5090         t = wide_int_to_tree (type,
5091                               wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5092                                                     HOST_BITS_PER_WIDE_INT * 2));
5093       else
5094         t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5095
5096       return t;
5097
5098     case CONST_VECTOR:
5099       {
5100         int units = CONST_VECTOR_NUNITS (x);
5101         tree itype = TREE_TYPE (type);
5102         tree *elts;
5103         int i;
5104
5105         /* Build a tree with vector elements.  */
5106         elts = XALLOCAVEC (tree, units);
5107         for (i = units - 1; i >= 0; --i)
5108           {
5109             rtx elt = CONST_VECTOR_ELT (x, i);
5110             elts[i] = make_tree (itype, elt);
5111           }
5112
5113         return build_vector (type, elts);
5114       }
5115
5116     case PLUS:
5117       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5118                           make_tree (type, XEXP (x, 1)));
5119
5120     case MINUS:
5121       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5122                           make_tree (type, XEXP (x, 1)));
5123
5124     case NEG:
5125       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5126
5127     case MULT:
5128       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5129                           make_tree (type, XEXP (x, 1)));
5130
5131     case ASHIFT:
5132       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5133                           make_tree (type, XEXP (x, 1)));
5134
5135     case LSHIFTRT:
5136       t = unsigned_type_for (type);
5137       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5138                                          make_tree (t, XEXP (x, 0)),
5139                                          make_tree (type, XEXP (x, 1))));
5140
5141     case ASHIFTRT:
5142       t = signed_type_for (type);
5143       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5144                                          make_tree (t, XEXP (x, 0)),
5145                                          make_tree (type, XEXP (x, 1))));
5146
5147     case DIV:
5148       if (TREE_CODE (type) != REAL_TYPE)
5149         t = signed_type_for (type);
5150       else
5151         t = type;
5152
5153       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5154                                          make_tree (t, XEXP (x, 0)),
5155                                          make_tree (t, XEXP (x, 1))));
5156     case UDIV:
5157       t = unsigned_type_for (type);
5158       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5159                                          make_tree (t, XEXP (x, 0)),
5160                                          make_tree (t, XEXP (x, 1))));
5161
5162     case SIGN_EXTEND:
5163     case ZERO_EXTEND:
5164       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5165                                           GET_CODE (x) == ZERO_EXTEND);
5166       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5167
5168     case CONST:
5169       return make_tree (type, XEXP (x, 0));
5170
5171     case SYMBOL_REF:
5172       t = SYMBOL_REF_DECL (x);
5173       if (t)
5174         return fold_convert (type, build_fold_addr_expr (t));
5175       /* else fall through.  */
5176
5177     default:
5178       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5179
5180       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5181          address mode to pointer mode.  */
5182       if (POINTER_TYPE_P (type))
5183         x = convert_memory_address_addr_space
5184               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5185
5186       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5187          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5188       t->decl_with_rtl.rtl = x;
5189
5190       return t;
5191     }
5192 }
5193 \f
5194 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5195    and returning TARGET.
5196
5197    If TARGET is 0, a pseudo-register or constant is returned.  */
5198
5199 rtx
5200 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5201 {
5202   rtx tem = 0;
5203
5204   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5205     tem = simplify_binary_operation (AND, mode, op0, op1);
5206   if (tem == 0)
5207     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5208
5209   if (target == 0)
5210     target = tem;
5211   else if (tem != target)
5212     emit_move_insn (target, tem);
5213   return target;
5214 }
5215
5216 /* Helper function for emit_store_flag.  */
5217 rtx
5218 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5219              machine_mode mode, machine_mode compare_mode,
5220              int unsignedp, rtx x, rtx y, int normalizep,
5221              machine_mode target_mode)
5222 {
5223   struct expand_operand ops[4];
5224   rtx op0, comparison, subtarget;
5225   rtx_insn *last;
5226   machine_mode result_mode = targetm.cstore_mode (icode);
5227
5228   last = get_last_insn ();
5229   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5230   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5231   if (!x || !y)
5232     {
5233       delete_insns_since (last);
5234       return NULL_RTX;
5235     }
5236
5237   if (target_mode == VOIDmode)
5238     target_mode = result_mode;
5239   if (!target)
5240     target = gen_reg_rtx (target_mode);
5241
5242   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5243
5244   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5245   create_fixed_operand (&ops[1], comparison);
5246   create_fixed_operand (&ops[2], x);
5247   create_fixed_operand (&ops[3], y);
5248   if (!maybe_expand_insn (icode, 4, ops))
5249     {
5250       delete_insns_since (last);
5251       return NULL_RTX;
5252     }
5253   subtarget = ops[0].value;
5254
5255   /* If we are converting to a wider mode, first convert to
5256      TARGET_MODE, then normalize.  This produces better combining
5257      opportunities on machines that have a SIGN_EXTRACT when we are
5258      testing a single bit.  This mostly benefits the 68k.
5259
5260      If STORE_FLAG_VALUE does not have the sign bit set when
5261      interpreted in MODE, we can do this conversion as unsigned, which
5262      is usually more efficient.  */
5263   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5264     {
5265       convert_move (target, subtarget,
5266                     val_signbit_known_clear_p (result_mode,
5267                                                STORE_FLAG_VALUE));
5268       op0 = target;
5269       result_mode = target_mode;
5270     }
5271   else
5272     op0 = subtarget;
5273
5274   /* If we want to keep subexpressions around, don't reuse our last
5275      target.  */
5276   if (optimize)
5277     subtarget = 0;
5278
5279   /* Now normalize to the proper value in MODE.  Sometimes we don't
5280      have to do anything.  */
5281   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5282     ;
5283   /* STORE_FLAG_VALUE might be the most negative number, so write
5284      the comparison this way to avoid a compiler-time warning.  */
5285   else if (- normalizep == STORE_FLAG_VALUE)
5286     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5287
5288   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5289      it hard to use a value of just the sign bit due to ANSI integer
5290      constant typing rules.  */
5291   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5292     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5293                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5294                         normalizep == 1);
5295   else
5296     {
5297       gcc_assert (STORE_FLAG_VALUE & 1);
5298
5299       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5300       if (normalizep == -1)
5301         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5302     }
5303
5304   /* If we were converting to a smaller mode, do the conversion now.  */
5305   if (target_mode != result_mode)
5306     {
5307       convert_move (target, op0, 0);
5308       return target;
5309     }
5310   else
5311     return op0;
5312 }
5313
5314
5315 /* A subroutine of emit_store_flag only including "tricks" that do not
5316    need a recursive call.  These are kept separate to avoid infinite
5317    loops.  */
5318
5319 static rtx
5320 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5321                    machine_mode mode, int unsignedp, int normalizep,
5322                    machine_mode target_mode)
5323 {
5324   rtx subtarget;
5325   enum insn_code icode;
5326   machine_mode compare_mode;
5327   enum mode_class mclass;
5328   enum rtx_code scode;
5329
5330   if (unsignedp)
5331     code = unsigned_condition (code);
5332   scode = swap_condition (code);
5333
5334   /* If one operand is constant, make it the second one.  Only do this
5335      if the other operand is not constant as well.  */
5336
5337   if (swap_commutative_operands_p (op0, op1))
5338     {
5339       std::swap (op0, op1);
5340       code = swap_condition (code);
5341     }
5342
5343   if (mode == VOIDmode)
5344     mode = GET_MODE (op0);
5345
5346   /* For some comparisons with 1 and -1, we can convert this to
5347      comparisons with zero.  This will often produce more opportunities for
5348      store-flag insns.  */
5349
5350   switch (code)
5351     {
5352     case LT:
5353       if (op1 == const1_rtx)
5354         op1 = const0_rtx, code = LE;
5355       break;
5356     case LE:
5357       if (op1 == constm1_rtx)
5358         op1 = const0_rtx, code = LT;
5359       break;
5360     case GE:
5361       if (op1 == const1_rtx)
5362         op1 = const0_rtx, code = GT;
5363       break;
5364     case GT:
5365       if (op1 == constm1_rtx)
5366         op1 = const0_rtx, code = GE;
5367       break;
5368     case GEU:
5369       if (op1 == const1_rtx)
5370         op1 = const0_rtx, code = NE;
5371       break;
5372     case LTU:
5373       if (op1 == const1_rtx)
5374         op1 = const0_rtx, code = EQ;
5375       break;
5376     default:
5377       break;
5378     }
5379
5380   /* If we are comparing a double-word integer with zero or -1, we can
5381      convert the comparison into one involving a single word.  */
5382   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5383       && GET_MODE_CLASS (mode) == MODE_INT
5384       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5385     {
5386       rtx tem;
5387       if ((code == EQ || code == NE)
5388           && (op1 == const0_rtx || op1 == constm1_rtx))
5389         {
5390           rtx op00, op01;
5391
5392           /* Do a logical OR or AND of the two words and compare the
5393              result.  */
5394           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5395           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5396           tem = expand_binop (word_mode,
5397                               op1 == const0_rtx ? ior_optab : and_optab,
5398                               op00, op01, NULL_RTX, unsignedp,
5399                               OPTAB_DIRECT);
5400
5401           if (tem != 0)
5402             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5403                                    unsignedp, normalizep);
5404         }
5405       else if ((code == LT || code == GE) && op1 == const0_rtx)
5406         {
5407           rtx op0h;
5408
5409           /* If testing the sign bit, can just test on high word.  */
5410           op0h = simplify_gen_subreg (word_mode, op0, mode,
5411                                       subreg_highpart_offset (word_mode,
5412                                                               mode));
5413           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5414                                  unsignedp, normalizep);
5415         }
5416       else
5417         tem = NULL_RTX;
5418
5419       if (tem)
5420         {
5421           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5422             return tem;
5423           if (!target)
5424             target = gen_reg_rtx (target_mode);
5425
5426           convert_move (target, tem,
5427                         !val_signbit_known_set_p (word_mode,
5428                                                   (normalizep ? normalizep
5429                                                    : STORE_FLAG_VALUE)));
5430           return target;
5431         }
5432     }
5433
5434   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5435      complement of A (for GE) and shifting the sign bit to the low bit.  */
5436   if (op1 == const0_rtx && (code == LT || code == GE)
5437       && GET_MODE_CLASS (mode) == MODE_INT
5438       && (normalizep || STORE_FLAG_VALUE == 1
5439           || val_signbit_p (mode, STORE_FLAG_VALUE)))
5440     {
5441       subtarget = target;
5442
5443       if (!target)
5444         target_mode = mode;
5445
5446       /* If the result is to be wider than OP0, it is best to convert it
5447          first.  If it is to be narrower, it is *incorrect* to convert it
5448          first.  */
5449       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5450         {
5451           op0 = convert_modes (target_mode, mode, op0, 0);
5452           mode = target_mode;
5453         }
5454
5455       if (target_mode != mode)
5456         subtarget = 0;
5457
5458       if (code == GE)
5459         op0 = expand_unop (mode, one_cmpl_optab, op0,
5460                            ((STORE_FLAG_VALUE == 1 || normalizep)
5461                             ? 0 : subtarget), 0);
5462
5463       if (STORE_FLAG_VALUE == 1 || normalizep)
5464         /* If we are supposed to produce a 0/1 value, we want to do
5465            a logical shift from the sign bit to the low-order bit; for
5466            a -1/0 value, we do an arithmetic shift.  */
5467         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5468                             GET_MODE_BITSIZE (mode) - 1,
5469                             subtarget, normalizep != -1);
5470
5471       if (mode != target_mode)
5472         op0 = convert_modes (target_mode, mode, op0, 0);
5473
5474       return op0;
5475     }
5476
5477   mclass = GET_MODE_CLASS (mode);
5478   for (compare_mode = mode; compare_mode != VOIDmode;
5479        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5480     {
5481      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5482      icode = optab_handler (cstore_optab, optab_mode);
5483      if (icode != CODE_FOR_nothing)
5484         {
5485           do_pending_stack_adjust ();
5486           rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5487                                  unsignedp, op0, op1, normalizep, target_mode);
5488           if (tem)
5489             return tem;
5490
5491           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5492             {
5493               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5494                                  unsignedp, op1, op0, normalizep, target_mode);
5495               if (tem)
5496                 return tem;
5497             }
5498           break;
5499         }
5500     }
5501
5502   return 0;
5503 }
5504
5505 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5506    and storing in TARGET.  Normally return TARGET.
5507    Return 0 if that cannot be done.
5508
5509    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5510    it is VOIDmode, they cannot both be CONST_INT.
5511
5512    UNSIGNEDP is for the case where we have to widen the operands
5513    to perform the operation.  It says to use zero-extension.
5514
5515    NORMALIZEP is 1 if we should convert the result to be either zero
5516    or one.  Normalize is -1 if we should convert the result to be
5517    either zero or -1.  If NORMALIZEP is zero, the result will be left
5518    "raw" out of the scc insn.  */
5519
5520 rtx
5521 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5522                  machine_mode mode, int unsignedp, int normalizep)
5523 {
5524   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5525   enum rtx_code rcode;
5526   rtx subtarget;
5527   rtx tem, trueval;
5528   rtx_insn *last;
5529
5530   /* If we compare constants, we shouldn't use a store-flag operation,
5531      but a constant load.  We can get there via the vanilla route that
5532      usually generates a compare-branch sequence, but will in this case
5533      fold the comparison to a constant, and thus elide the branch.  */
5534   if (CONSTANT_P (op0) && CONSTANT_P (op1))
5535     return NULL_RTX;
5536
5537   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5538                            target_mode);
5539   if (tem)
5540     return tem;
5541
5542   /* If we reached here, we can't do this with a scc insn, however there
5543      are some comparisons that can be done in other ways.  Don't do any
5544      of these cases if branches are very cheap.  */
5545   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5546     return 0;
5547
5548   /* See what we need to return.  We can only return a 1, -1, or the
5549      sign bit.  */
5550
5551   if (normalizep == 0)
5552     {
5553       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5554         normalizep = STORE_FLAG_VALUE;
5555
5556       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5557         ;
5558       else
5559         return 0;
5560     }
5561
5562   last = get_last_insn ();
5563
5564   /* If optimizing, use different pseudo registers for each insn, instead
5565      of reusing the same pseudo.  This leads to better CSE, but slows
5566      down the compiler, since there are more pseudos */
5567   subtarget = (!optimize
5568                && (target_mode == mode)) ? target : NULL_RTX;
5569   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5570
5571   /* For floating-point comparisons, try the reverse comparison or try
5572      changing the "orderedness" of the comparison.  */
5573   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5574     {
5575       enum rtx_code first_code;
5576       bool and_them;
5577
5578       rcode = reverse_condition_maybe_unordered (code);
5579       if (can_compare_p (rcode, mode, ccp_store_flag)
5580           && (code == ORDERED || code == UNORDERED
5581               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5582               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5583         {
5584           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5585                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5586
5587           /* For the reverse comparison, use either an addition or a XOR.  */
5588           if (want_add
5589               && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5590                            optimize_insn_for_speed_p ()) == 0)
5591             {
5592               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5593                                        STORE_FLAG_VALUE, target_mode);
5594               if (tem)
5595                 return expand_binop (target_mode, add_optab, tem,
5596                                      gen_int_mode (normalizep, target_mode),
5597                                      target, 0, OPTAB_WIDEN);
5598             }
5599           else if (!want_add
5600                    && rtx_cost (trueval, mode, XOR, 1,
5601                                 optimize_insn_for_speed_p ()) == 0)
5602             {
5603               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5604                                        normalizep, target_mode);
5605               if (tem)
5606                 return expand_binop (target_mode, xor_optab, tem, trueval,
5607                                      target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5608             }
5609         }
5610
5611       delete_insns_since (last);
5612
5613       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5614       if (code == ORDERED || code == UNORDERED)
5615         return 0;
5616
5617       and_them = split_comparison (code, mode, &first_code, &code);
5618
5619       /* If there are no NaNs, the first comparison should always fall through.
5620          Effectively change the comparison to the other one.  */
5621       if (!HONOR_NANS (mode))
5622         {
5623           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5624           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5625                                     target_mode);
5626         }
5627
5628       if (!HAVE_conditional_move)
5629         return 0;
5630
5631       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5632          conditional move.  */
5633       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5634                                normalizep, target_mode);
5635       if (tem == 0)
5636         return 0;
5637
5638       if (and_them)
5639         tem = emit_conditional_move (target, code, op0, op1, mode,
5640                                      tem, const0_rtx, GET_MODE (tem), 0);
5641       else
5642         tem = emit_conditional_move (target, code, op0, op1, mode,
5643                                      trueval, tem, GET_MODE (tem), 0);
5644
5645       if (tem == 0)
5646         delete_insns_since (last);
5647       return tem;
5648     }
5649
5650   /* The remaining tricks only apply to integer comparisons.  */
5651
5652   if (GET_MODE_CLASS (mode) != MODE_INT)
5653     return 0;
5654
5655   /* If this is an equality comparison of integers, we can try to exclusive-or
5656      (or subtract) the two operands and use a recursive call to try the
5657      comparison with zero.  Don't do any of these cases if branches are
5658      very cheap.  */
5659
5660   if ((code == EQ || code == NE) && op1 != const0_rtx)
5661     {
5662       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5663                           OPTAB_WIDEN);
5664
5665       if (tem == 0)
5666         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5667                             OPTAB_WIDEN);
5668       if (tem != 0)
5669         tem = emit_store_flag (target, code, tem, const0_rtx,
5670                                mode, unsignedp, normalizep);
5671       if (tem != 0)
5672         return tem;
5673
5674       delete_insns_since (last);
5675     }
5676
5677   /* For integer comparisons, try the reverse comparison.  However, for
5678      small X and if we'd have anyway to extend, implementing "X != 0"
5679      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5680   rcode = reverse_condition (code);
5681   if (can_compare_p (rcode, mode, ccp_store_flag)
5682       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5683             && code == NE
5684             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5685             && op1 == const0_rtx))
5686     {
5687       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5688                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5689
5690       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5691       if (want_add
5692           && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5693                        optimize_insn_for_speed_p ()) == 0)
5694         {
5695           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5696                                    STORE_FLAG_VALUE, target_mode);
5697           if (tem != 0)
5698             tem = expand_binop (target_mode, add_optab, tem,
5699                                 gen_int_mode (normalizep, target_mode),
5700                                 target, 0, OPTAB_WIDEN);
5701         }
5702       else if (!want_add
5703                && rtx_cost (trueval, mode, XOR, 1,
5704                             optimize_insn_for_speed_p ()) == 0)
5705         {
5706           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5707                                    normalizep, target_mode);
5708           if (tem != 0)
5709             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5710                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5711         }
5712
5713       if (tem != 0)
5714         return tem;
5715       delete_insns_since (last);
5716     }
5717
5718   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5719      the constant zero.  Reject all other comparisons at this point.  Only
5720      do LE and GT if branches are expensive since they are expensive on
5721      2-operand machines.  */
5722
5723   if (op1 != const0_rtx
5724       || (code != EQ && code != NE
5725           && (BRANCH_COST (optimize_insn_for_speed_p (),
5726                            false) <= 1 || (code != LE && code != GT))))
5727     return 0;
5728
5729   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5730      do the necessary operation below.  */
5731
5732   tem = 0;
5733
5734   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5735      the sign bit set.  */
5736
5737   if (code == LE)
5738     {
5739       /* This is destructive, so SUBTARGET can't be OP0.  */
5740       if (rtx_equal_p (subtarget, op0))
5741         subtarget = 0;
5742
5743       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5744                           OPTAB_WIDEN);
5745       if (tem)
5746         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5747                             OPTAB_WIDEN);
5748     }
5749
5750   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5751      number of bits in the mode of OP0, minus one.  */
5752
5753   if (code == GT)
5754     {
5755       if (rtx_equal_p (subtarget, op0))
5756         subtarget = 0;
5757
5758       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5759                           GET_MODE_BITSIZE (mode) - 1,
5760                           subtarget, 0);
5761       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5762                           OPTAB_WIDEN);
5763     }
5764
5765   if (code == EQ || code == NE)
5766     {
5767       /* For EQ or NE, one way to do the comparison is to apply an operation
5768          that converts the operand into a positive number if it is nonzero
5769          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5770          for NE we negate.  This puts the result in the sign bit.  Then we
5771          normalize with a shift, if needed.
5772
5773          Two operations that can do the above actions are ABS and FFS, so try
5774          them.  If that doesn't work, and MODE is smaller than a full word,
5775          we can use zero-extension to the wider mode (an unsigned conversion)
5776          as the operation.  */
5777
5778       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5779          that is compensated by the subsequent overflow when subtracting
5780          one / negating.  */
5781
5782       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5783         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5784       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5785         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5786       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5787         {
5788           tem = convert_modes (word_mode, mode, op0, 1);
5789           mode = word_mode;
5790         }
5791
5792       if (tem != 0)
5793         {
5794           if (code == EQ)
5795             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5796                                 0, OPTAB_WIDEN);
5797           else
5798             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5799         }
5800
5801       /* If we couldn't do it that way, for NE we can "or" the two's complement
5802          of the value with itself.  For EQ, we take the one's complement of
5803          that "or", which is an extra insn, so we only handle EQ if branches
5804          are expensive.  */
5805
5806       if (tem == 0
5807           && (code == NE
5808               || BRANCH_COST (optimize_insn_for_speed_p (),
5809                               false) > 1))
5810         {
5811           if (rtx_equal_p (subtarget, op0))
5812             subtarget = 0;
5813
5814           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5815           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5816                               OPTAB_WIDEN);
5817
5818           if (tem && code == EQ)
5819             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5820         }
5821     }
5822
5823   if (tem && normalizep)
5824     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5825                         GET_MODE_BITSIZE (mode) - 1,
5826                         subtarget, normalizep == 1);
5827
5828   if (tem)
5829     {
5830       if (!target)
5831         ;
5832       else if (GET_MODE (tem) != target_mode)
5833         {
5834           convert_move (target, tem, 0);
5835           tem = target;
5836         }
5837       else if (!subtarget)
5838         {
5839           emit_move_insn (target, tem);
5840           tem = target;
5841         }
5842     }
5843   else
5844     delete_insns_since (last);
5845
5846   return tem;
5847 }
5848
5849 /* Like emit_store_flag, but always succeeds.  */
5850
5851 rtx
5852 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5853                        machine_mode mode, int unsignedp, int normalizep)
5854 {
5855   rtx tem;
5856   rtx_code_label *label;
5857   rtx trueval, falseval;
5858
5859   /* First see if emit_store_flag can do the job.  */
5860   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5861   if (tem != 0)
5862     return tem;
5863
5864   if (!target)
5865     target = gen_reg_rtx (word_mode);
5866
5867   /* If this failed, we have to do this with set/compare/jump/set code.
5868      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5869   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5870   if (code == NE
5871       && GET_MODE_CLASS (mode) == MODE_INT
5872       && REG_P (target)
5873       && op0 == target
5874       && op1 == const0_rtx)
5875     {
5876       label = gen_label_rtx ();
5877       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
5878                                NULL_RTX, NULL, label, -1);
5879       emit_move_insn (target, trueval);
5880       emit_label (label);
5881       return target;
5882     }
5883
5884   if (!REG_P (target)
5885       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5886     target = gen_reg_rtx (GET_MODE (target));
5887
5888   /* Jump in the right direction if the target cannot implement CODE
5889      but can jump on its reverse condition.  */
5890   falseval = const0_rtx;
5891   if (! can_compare_p (code, mode, ccp_jump)
5892       && (! FLOAT_MODE_P (mode)
5893           || code == ORDERED || code == UNORDERED
5894           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5895           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5896     {
5897       enum rtx_code rcode;
5898       if (FLOAT_MODE_P (mode))
5899         rcode = reverse_condition_maybe_unordered (code);
5900       else
5901         rcode = reverse_condition (code);
5902
5903       /* Canonicalize to UNORDERED for the libcall.  */
5904       if (can_compare_p (rcode, mode, ccp_jump)
5905           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5906         {
5907           falseval = trueval;
5908           trueval = const0_rtx;
5909           code = rcode;
5910         }
5911     }
5912
5913   emit_move_insn (target, trueval);
5914   label = gen_label_rtx ();
5915   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
5916                            label, -1);
5917
5918   emit_move_insn (target, falseval);
5919   emit_label (label);
5920
5921   return target;
5922 }
5923 \f
5924 /* Perform possibly multi-word comparison and conditional jump to LABEL
5925    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5926    now a thin wrapper around do_compare_rtx_and_jump.  */
5927
5928 static void
5929 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
5930                  rtx_code_label *label)
5931 {
5932   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5933   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
5934                            NULL, label, -1);
5935 }