gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2016 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "backend.h"
  26 #include "target.h"
  27 #include "rtl.h"
  28 #include "tree.h"
  29 #include "predict.h"
  30 #include "tm_p.h"
  31 #include "expmed.h"
  32 #include "optabs.h"
  33 #include "emit-rtl.h"
  34 #include "diagnostic-core.h"
  35 #include "fold-const.h"
  36 #include "stor-layout.h"
  37 #include "dojump.h"
  38 #include "explow.h"
  39 #include "expr.h"
  40 #include "langhooks.h"
  41
  42 struct target_expmed default_target_expmed;
  43 #if SWITCHABLE_TARGET
  44 struct target_expmed *this_target_expmed = &default_target_expmed;
  45 #endif
  46
  47 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  48                                    unsigned HOST_WIDE_INT,
  49                                    unsigned HOST_WIDE_INT,
  50                                    unsigned HOST_WIDE_INT,
  51                                    rtx, bool);
  52 static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT,
  53                                      unsigned HOST_WIDE_INT,
  54                                      rtx, bool);
  55 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  56                                    unsigned HOST_WIDE_INT,
  57                                    unsigned HOST_WIDE_INT,
  58                                    unsigned HOST_WIDE_INT,
  59                                    rtx, bool);
  60 static rtx extract_fixed_bit_field (machine_mode, rtx,
  61                                     unsigned HOST_WIDE_INT,
  62                                     unsigned HOST_WIDE_INT, rtx, int, bool);
  63 static rtx extract_fixed_bit_field_1 (machine_mode, rtx,
  64                                       unsigned HOST_WIDE_INT,
  65                                       unsigned HOST_WIDE_INT, rtx, int, bool);
  66 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
  67 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  68                                     unsigned HOST_WIDE_INT, int, bool);
  69 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
  70 static rtx expand_smod_pow2 (machine_mode, rtx, HOST_WIDE_INT);
  71 static rtx expand_sdiv_pow2 (machine_mode, rtx, HOST_WIDE_INT);
  72
  73 /* Return a constant integer mask value of mode MODE with BITSIZE ones
  74    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
  75    The mask is truncated if necessary to the width of mode MODE.  The
  76    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
  77
  78 static inline rtx
  79 mask_rtx (machine_mode mode, int bitpos, int bitsize, bool complement)
  80 {
  81   return immed_wide_int_const
  82     (wi::shifted_mask (bitpos, bitsize, complement,
  83                        GET_MODE_PRECISION (mode)), mode);
  84 }
  85
  86 /* Test whether a value is zero of a power of two.  */
  87 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
  88   (((x) & ((x) - (unsigned HOST_WIDE_INT) 1)) == 0)
  89
  90 struct init_expmed_rtl
  91 {
  92   rtx reg;
  93   rtx plus;
  94   rtx neg;
  95   rtx mult;
  96   rtx sdiv;
  97   rtx udiv;
  98   rtx sdiv_32;
  99   rtx smod_32;
 100   rtx wide_mult;
 101   rtx wide_lshr;
 102   rtx wide_trunc;
 103   rtx shift;
 104   rtx shift_mult;
 105   rtx shift_add;
 106   rtx shift_sub0;
 107   rtx shift_sub1;
 108   rtx zext;
 109   rtx trunc;
 110
 111   rtx pow2[MAX_BITS_PER_WORD];
 112   rtx cint[MAX_BITS_PER_WORD];
 113 };
 114
 115 static void
 116 init_expmed_one_conv (struct init_expmed_rtl *all, machine_mode to_mode,
 117                       machine_mode from_mode, bool speed)
 118 {
 119   int to_size, from_size;
 120   rtx which;
 121
 122   to_size = GET_MODE_PRECISION (to_mode);
 123   from_size = GET_MODE_PRECISION (from_mode);
 124
 125   /* Most partial integers have a precision less than the "full"
 126      integer it requires for storage.  In case one doesn't, for
 127      comparison purposes here, reduce the bit size by one in that
 128      case.  */
 129   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
 130       && exact_log2 (to_size) != -1)
 131     to_size --;
 132   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
 133       && exact_log2 (from_size) != -1)
 134     from_size --;
 135
 136   /* Assume cost of zero-extend and sign-extend is the same.  */
 137   which = (to_size < from_size ? all->trunc : all->zext);
 138
 139   PUT_MODE (all->reg, from_mode);
 140   set_convert_cost (to_mode, from_mode, speed,
 141                     set_src_cost (which, to_mode, speed));
 142 }
 143
 144 static void
 145 init_expmed_one_mode (struct init_expmed_rtl *all,
 146                       machine_mode mode, int speed)
 147 {
 148   int m, n, mode_bitsize;
 149   machine_mode mode_from;
 150
 151   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 152
 153   PUT_MODE (all->reg, mode);
 154   PUT_MODE (all->plus, mode);
 155   PUT_MODE (all->neg, mode);
 156   PUT_MODE (all->mult, mode);
 157   PUT_MODE (all->sdiv, mode);
 158   PUT_MODE (all->udiv, mode);
 159   PUT_MODE (all->sdiv_32, mode);
 160   PUT_MODE (all->smod_32, mode);
 161   PUT_MODE (all->wide_trunc, mode);
 162   PUT_MODE (all->shift, mode);
 163   PUT_MODE (all->shift_mult, mode);
 164   PUT_MODE (all->shift_add, mode);
 165   PUT_MODE (all->shift_sub0, mode);
 166   PUT_MODE (all->shift_sub1, mode);
 167   PUT_MODE (all->zext, mode);
 168   PUT_MODE (all->trunc, mode);
 169
 170   set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
 171   set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
 172   set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
 173   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
 174   set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
 175
 176   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
 177                                      <= 2 * add_cost (speed, mode)));
 178   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
 179                                      <= 4 * add_cost (speed, mode)));
 180
 181   set_shift_cost (speed, mode, 0, 0);
 182   {
 183     int cost = add_cost (speed, mode);
 184     set_shiftadd_cost (speed, mode, 0, cost);
 185     set_shiftsub0_cost (speed, mode, 0, cost);
 186     set_shiftsub1_cost (speed, mode, 0, cost);
 187   }
 188
 189   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 190   for (m = 1; m < n; m++)
 191     {
 192       XEXP (all->shift, 1) = all->cint[m];
 193       XEXP (all->shift_mult, 1) = all->pow2[m];
 194
 195       set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
 196       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
 197                                                        speed));
 198       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
 199                                                         speed));
 200       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
 201                                                         speed));
 202     }
 203
 204   if (SCALAR_INT_MODE_P (mode))
 205     {
 206       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 207            mode_from = (machine_mode)(mode_from + 1))
 208         init_expmed_one_conv (all, mode, mode_from, speed);
 209     }
 210   if (GET_MODE_CLASS (mode) == MODE_INT)
 211     {
 212       machine_mode  wider_mode = GET_MODE_WIDER_MODE (mode);
 213       if (wider_mode != VOIDmode)
 214         {
 215           PUT_MODE (all->zext, wider_mode);
 216           PUT_MODE (all->wide_mult, wider_mode);
 217           PUT_MODE (all->wide_lshr, wider_mode);
 218           XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize);
 219
 220           set_mul_widen_cost (speed, wider_mode,
 221                               set_src_cost (all->wide_mult, wider_mode, speed));
 222           set_mul_highpart_cost (speed, mode,
 223                                  set_src_cost (all->wide_trunc, mode, speed));
 224         }
 225     }
 226 }
 227
 228 void
 229 init_expmed (void)
 230 {
 231   struct init_expmed_rtl all;
 232   machine_mode mode = QImode;
 233   int m, speed;
 234
 235   memset (&all, 0, sizeof all);
 236   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 237     {
 238       all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 239       all.cint[m] = GEN_INT (m);
 240     }
 241
 242   /* Avoid using hard regs in ways which may be unsupported.  */
 243   all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
 244   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
 245   all.neg = gen_rtx_NEG (mode, all.reg);
 246   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
 247   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
 248   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
 249   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
 250   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
 251   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
 252   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
 253   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
 254   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
 255   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
 256   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
 257   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
 258   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
 259   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
 260   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
 261
 262   for (speed = 0; speed < 2; speed++)
 263     {
 264       crtl->maybe_hot_insn_p = speed;
 265       set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
 266
 267       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 268            mode = (machine_mode)(mode + 1))
 269         init_expmed_one_mode (&all, mode, speed);
 270
 271       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 272         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 273              mode = (machine_mode)(mode + 1))
 274           init_expmed_one_mode (&all, mode, speed);
 275
 276       if (MIN_MODE_VECTOR_INT != VOIDmode)
 277         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 278              mode = (machine_mode)(mode + 1))
 279           init_expmed_one_mode (&all, mode, speed);
 280     }
 281
 282   if (alg_hash_used_p ())
 283     {
 284       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 285       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 286     }
 287   else
 288     set_alg_hash_used_p (true);
 289   default_rtl_profile ();
 290
 291   ggc_free (all.trunc);
 292   ggc_free (all.shift_sub1);
 293   ggc_free (all.shift_sub0);
 294   ggc_free (all.shift_add);
 295   ggc_free (all.shift_mult);
 296   ggc_free (all.shift);
 297   ggc_free (all.wide_trunc);
 298   ggc_free (all.wide_lshr);
 299   ggc_free (all.wide_mult);
 300   ggc_free (all.zext);
 301   ggc_free (all.smod_32);
 302   ggc_free (all.sdiv_32);
 303   ggc_free (all.udiv);
 304   ggc_free (all.sdiv);
 305   ggc_free (all.mult);
 306   ggc_free (all.neg);
 307   ggc_free (all.plus);
 308   ggc_free (all.reg);
 309 }
 310
 311 /* Return an rtx representing minus the value of X.
 312    MODE is the intended mode of the result,
 313    useful if X is a CONST_INT.  */
 314
 315 rtx
 316 negate_rtx (machine_mode mode, rtx x)
 317 {
 318   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 319
 320   if (result == 0)
 321     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 322
 323   return result;
 324 }
 325
 326 /* Whether reverse storage order is supported on the target.  */
 327 static int reverse_storage_order_supported = -1;
 328
 329 /* Check whether reverse storage order is supported on the target.  */
 330
 331 static void
 332 check_reverse_storage_order_support (void)
 333 {
 334   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 335     {
 336       reverse_storage_order_supported = 0;
 337       sorry ("reverse scalar storage order");
 338     }
 339   else
 340     reverse_storage_order_supported = 1;
 341 }
 342
 343 /* Whether reverse FP storage order is supported on the target.  */
 344 static int reverse_float_storage_order_supported = -1;
 345
 346 /* Check whether reverse FP storage order is supported on the target.  */
 347
 348 static void
 349 check_reverse_float_storage_order_support (void)
 350 {
 351   if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 352     {
 353       reverse_float_storage_order_supported = 0;
 354       sorry ("reverse floating-point scalar storage order");
 355     }
 356   else
 357     reverse_float_storage_order_supported = 1;
 358 }
 359
 360 /* Return an rtx representing value of X with reverse storage order.
 361    MODE is the intended mode of the result,
 362    useful if X is a CONST_INT.  */
 363
 364 rtx
 365 flip_storage_order (enum machine_mode mode, rtx x)
 366 {
 367   enum machine_mode int_mode;
 368   rtx result;
 369
 370   if (mode == QImode)
 371     return x;
 372
 373   if (COMPLEX_MODE_P (mode))
 374     {
 375       rtx real = read_complex_part (x, false);
 376       rtx imag = read_complex_part (x, true);
 377
 378       real = flip_storage_order (GET_MODE_INNER (mode), real);
 379       imag = flip_storage_order (GET_MODE_INNER (mode), imag);
 380
 381       return gen_rtx_CONCAT (mode, real, imag);
 382     }
 383
 384   if (__builtin_expect (reverse_storage_order_supported < 0, 0))
 385     check_reverse_storage_order_support ();
 386
 387   if (SCALAR_INT_MODE_P (mode))
 388     int_mode = mode;
 389   else
 390     {
 391       if (FLOAT_MODE_P (mode)
 392           && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
 393         check_reverse_float_storage_order_support ();
 394
 395       int_mode = mode_for_size (GET_MODE_PRECISION (mode), MODE_INT, 0);
 396       if (int_mode == BLKmode)
 397         {
 398           sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
 399           return x;
 400         }
 401       x = gen_lowpart (int_mode, x);
 402     }
 403
 404   result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
 405   if (result == 0)
 406     result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
 407
 408   if (int_mode != mode)
 409     result = gen_lowpart (mode, result);
 410
 411   return result;
 412 }
 413
 414 /* Adjust bitfield memory MEM so that it points to the first unit of mode
 415    MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
 416    If MODE is BLKmode, return a reference to every byte in the bitfield.
 417    Set *NEW_BITNUM to the bit position of the field within the new memory.  */
 418
 419 static rtx
 420 narrow_bit_field_mem (rtx mem, machine_mode mode,
 421                       unsigned HOST_WIDE_INT bitsize,
 422                       unsigned HOST_WIDE_INT bitnum,
 423                       unsigned HOST_WIDE_INT *new_bitnum)
 424 {
 425   if (mode == BLKmode)
 426     {
 427       *new_bitnum = bitnum % BITS_PER_UNIT;
 428       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 429       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 430                             / BITS_PER_UNIT);
 431       return adjust_bitfield_address_size (mem, mode, offset, size);
 432     }
 433   else
 434     {
 435       unsigned int unit = GET_MODE_BITSIZE (mode);
 436       *new_bitnum = bitnum % unit;
 437       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 438       return adjust_bitfield_address (mem, mode, offset);
 439     }
 440 }
 441
 442 /* The caller wants to perform insertion or extraction PATTERN on a
 443    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 444    BITREGION_START and BITREGION_END are as for store_bit_field
 445    and FIELDMODE is the natural mode of the field.
 446
 447    Search for a mode that is compatible with the memory access
 448    restrictions and (where applicable) with a register insertion or
 449    extraction.  Return the new memory on success, storing the adjusted
 450    bit position in *NEW_BITNUM.  Return null otherwise.  */
 451
 452 static rtx
 453 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 454                               rtx op0, HOST_WIDE_INT bitsize,
 455                               HOST_WIDE_INT bitnum,
 456                               unsigned HOST_WIDE_INT bitregion_start,
 457                               unsigned HOST_WIDE_INT bitregion_end,
 458                               machine_mode fieldmode,
 459                               unsigned HOST_WIDE_INT *new_bitnum)
 460 {
 461   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 462                                 bitregion_end, MEM_ALIGN (op0),
 463                                 MEM_VOLATILE_P (op0));
 464   machine_mode best_mode;
 465   if (iter.next_mode (&best_mode))
 466     {
 467       /* We can use a memory in BEST_MODE.  See whether this is true for
 468          any wider modes.  All other things being equal, we prefer to
 469          use the widest mode possible because it tends to expose more
 470          CSE opportunities.  */
 471       if (!iter.prefer_smaller_modes ())
 472         {
 473           /* Limit the search to the mode required by the corresponding
 474              register insertion or extraction instruction, if any.  */
 475           machine_mode limit_mode = word_mode;
 476           extraction_insn insn;
 477           if (get_best_reg_extraction_insn (&insn, pattern,
 478                                             GET_MODE_BITSIZE (best_mode),
 479                                             fieldmode))
 480             limit_mode = insn.field_mode;
 481
 482           machine_mode wider_mode;
 483           while (iter.next_mode (&wider_mode)
 484                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 485             best_mode = wider_mode;
 486         }
 487       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 488                                    new_bitnum);
 489     }
 490   return NULL_RTX;
 491 }
 492
 493 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 494    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 495    offset is then BITNUM / BITS_PER_UNIT.  */
 496
 497 static bool
 498 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
 499                      unsigned HOST_WIDE_INT bitsize,
 500                      machine_mode struct_mode)
 501 {
 502   if (BYTES_BIG_ENDIAN)
 503     return (bitnum % BITS_PER_UNIT == 0
 504             && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
 505                 || (bitnum + bitsize) % BITS_PER_WORD == 0));
 506   else
 507     return bitnum % BITS_PER_WORD == 0;
 508 }
 509
 510 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
 511    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
 512    Return false if the access would touch memory outside the range
 513    BITREGION_START to BITREGION_END for conformance to the C++ memory
 514    model.  */
 515
 516 static bool
 517 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 518                             unsigned HOST_WIDE_INT bitnum,
 519                             machine_mode fieldmode,
 520                             unsigned HOST_WIDE_INT bitregion_start,
 521                             unsigned HOST_WIDE_INT bitregion_end)
 522 {
 523   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
 524
 525   /* -fstrict-volatile-bitfields must be enabled and we must have a
 526      volatile MEM.  */
 527   if (!MEM_P (op0)
 528       || !MEM_VOLATILE_P (op0)
 529       || flag_strict_volatile_bitfields <= 0)
 530     return false;
 531
 532   /* Non-integral modes likely only happen with packed structures.
 533      Punt.  */
 534   if (!SCALAR_INT_MODE_P (fieldmode))
 535     return false;
 536
 537   /* The bit size must not be larger than the field mode, and
 538      the field mode must not be larger than a word.  */
 539   if (bitsize > modesize || modesize > BITS_PER_WORD)
 540     return false;
 541
 542   /* Check for cases of unaligned fields that must be split.  */
 543   if (bitnum % modesize + bitsize > modesize)
 544     return false;
 545
 546   /* The memory must be sufficiently aligned for a MODESIZE access.
 547      This condition guarantees, that the memory access will not
 548      touch anything after the end of the structure.  */
 549   if (MEM_ALIGN (op0) < modesize)
 550     return false;
 551
 552   /* Check for cases where the C++ memory model applies.  */
 553   if (bitregion_end != 0
 554       && (bitnum - bitnum % modesize < bitregion_start
 555           || bitnum - bitnum % modesize + modesize - 1 > bitregion_end))
 556     return false;
 557
 558   return true;
 559 }
 560
 561 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 562    bit number BITNUM can be treated as a simple value of mode MODE.  */
 563
 564 static bool
 565 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 566                        unsigned HOST_WIDE_INT bitnum, machine_mode mode)
 567 {
 568   return (MEM_P (op0)
 569           && bitnum % BITS_PER_UNIT == 0
 570           && bitsize == GET_MODE_BITSIZE (mode)
 571           && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
 572               || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
 573                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 574 }
 575 \f
 576 /* Try to use instruction INSV to store VALUE into a field of OP0.
 577    BITSIZE and BITNUM are as for store_bit_field.  */
 578
 579 static bool
 580 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 581                             unsigned HOST_WIDE_INT bitsize,
 582                             unsigned HOST_WIDE_INT bitnum,
 583                             rtx value)
 584 {
 585   struct expand_operand ops[4];
 586   rtx value1;
 587   rtx xop0 = op0;
 588   rtx_insn *last = get_last_insn ();
 589   bool copy_back = false;
 590
 591   machine_mode op_mode = insv->field_mode;
 592   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 593   if (bitsize == 0 || bitsize > unit)
 594     return false;
 595
 596   if (MEM_P (xop0))
 597     /* Get a reference to the first byte of the field.  */
 598     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 599                                  &bitnum);
 600   else
 601     {
 602       /* Convert from counting within OP0 to counting in OP_MODE.  */
 603       if (BYTES_BIG_ENDIAN)
 604         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 605
 606       /* If xop0 is a register, we need it in OP_MODE
 607          to make it acceptable to the format of insv.  */
 608       if (GET_CODE (xop0) == SUBREG)
 609         /* We can't just change the mode, because this might clobber op0,
 610            and we will need the original value of op0 if insv fails.  */
 611         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 612       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 613         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 614     }
 615
 616   /* If the destination is a paradoxical subreg such that we need a
 617      truncate to the inner mode, perform the insertion on a temporary and
 618      truncate the result to the original destination.  Note that we can't
 619      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 620      X) 0)) is (reg:N X).  */
 621   if (GET_CODE (xop0) == SUBREG
 622       && REG_P (SUBREG_REG (xop0))
 623       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 624                                          op_mode))
 625     {
 626       rtx tem = gen_reg_rtx (op_mode);
 627       emit_move_insn (tem, xop0);
 628       xop0 = tem;
 629       copy_back = true;
 630     }
 631
 632   /* There are similar overflow check at the start of store_bit_field_1,
 633      but that only check the situation where the field lies completely
 634      outside the register, while there do have situation where the field
 635      lies partialy in the register, we need to adjust bitsize for this
 636      partial overflow situation.  Without this fix, pr48335-2.c on big-endian
 637      will broken on those arch support bit insert instruction, like arm, aarch64
 638      etc.  */
 639   if (bitsize + bitnum > unit && bitnum < unit)
 640     {
 641       warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
 642                "destination object, data truncated into %wu-bit",
 643                bitsize, unit - bitnum);
 644       bitsize = unit - bitnum;
 645     }
 646
 647   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 648      "backwards" from the size of the unit we are inserting into.
 649      Otherwise, we count bits from the most significant on a
 650      BYTES/BITS_BIG_ENDIAN machine.  */
 651
 652   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 653     bitnum = unit - bitsize - bitnum;
 654
 655   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 656   value1 = value;
 657   if (GET_MODE (value) != op_mode)
 658     {
 659       if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 660         {
 661           rtx tmp;
 662           /* Optimization: Don't bother really extending VALUE
 663              if it has all the bits we will actually use.  However,
 664              if we must narrow it, be sure we do it correctly.  */
 665
 666           if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 667             {
 668               tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 669               if (! tmp)
 670                 tmp = simplify_gen_subreg (op_mode,
 671                                            force_reg (GET_MODE (value),
 672                                                       value1),
 673                                            GET_MODE (value), 0);
 674             }
 675           else
 676             {
 677               tmp = gen_lowpart_if_possible (op_mode, value1);
 678               if (! tmp)
 679                 tmp = gen_lowpart (op_mode, force_reg (GET_MODE (value),
 680                                                        value1));
 681             }
 682           value1 = tmp;
 683         }
 684       else if (CONST_INT_P (value))
 685         value1 = gen_int_mode (INTVAL (value), op_mode);
 686       else
 687         /* Parse phase is supposed to make VALUE's data type
 688            match that of the component reference, which is a type
 689            at least as wide as the field; so VALUE should have
 690            a mode that corresponds to that type.  */
 691         gcc_assert (CONSTANT_P (value));
 692     }
 693
 694   create_fixed_operand (&ops[0], xop0);
 695   create_integer_operand (&ops[1], bitsize);
 696   create_integer_operand (&ops[2], bitnum);
 697   create_input_operand (&ops[3], value1, op_mode);
 698   if (maybe_expand_insn (insv->icode, 4, ops))
 699     {
 700       if (copy_back)
 701         convert_move (op0, xop0, true);
 702       return true;
 703     }
 704   delete_insns_since (last);
 705   return false;
 706 }
 707
 708 /* A subroutine of store_bit_field, with the same arguments.  Return true
 709    if the operation could be implemented.
 710
 711    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 712    no other way of implementing the operation.  If FALLBACK_P is false,
 713    return false instead.  */
 714
 715 static bool
 716 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 717                    unsigned HOST_WIDE_INT bitnum,
 718                    unsigned HOST_WIDE_INT bitregion_start,
 719                    unsigned HOST_WIDE_INT bitregion_end,
 720                    machine_mode fieldmode,
 721                    rtx value, bool reverse, bool fallback_p)
 722 {
 723   rtx op0 = str_rtx;
 724   rtx orig_value;
 725
 726   while (GET_CODE (op0) == SUBREG)
 727     {
 728       /* The following line once was done only if WORDS_BIG_ENDIAN,
 729          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 730          meaningful at a much higher level; when structures are copied
 731          between memory and regs, the higher-numbered regs
 732          always get higher addresses.  */
 733       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 734       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 735       int byte_offset = 0;
 736
 737       /* Paradoxical subregs need special handling on big-endian machines.  */
 738       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 739         {
 740           int difference = inner_mode_size - outer_mode_size;
 741
 742           if (WORDS_BIG_ENDIAN)
 743             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 744           if (BYTES_BIG_ENDIAN)
 745             byte_offset += difference % UNITS_PER_WORD;
 746         }
 747       else
 748         byte_offset = SUBREG_BYTE (op0);
 749
 750       bitnum += byte_offset * BITS_PER_UNIT;
 751       op0 = SUBREG_REG (op0);
 752     }
 753
 754   /* No action is needed if the target is a register and if the field
 755      lies completely outside that register.  This can occur if the source
 756      code contains an out-of-bounds access to a small array.  */
 757   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 758     return true;
 759
 760   /* Use vec_set patterns for inserting parts of vectors whenever
 761      available.  */
 762   if (VECTOR_MODE_P (GET_MODE (op0))
 763       && !MEM_P (op0)
 764       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
 765       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 766       && bitsize == GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
 767       && !(bitnum % GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
 768     {
 769       struct expand_operand ops[3];
 770       machine_mode outermode = GET_MODE (op0);
 771       machine_mode innermode = GET_MODE_INNER (outermode);
 772       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 773       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 774
 775       create_fixed_operand (&ops[0], op0);
 776       create_input_operand (&ops[1], value, innermode);
 777       create_integer_operand (&ops[2], pos);
 778       if (maybe_expand_insn (icode, 3, ops))
 779         return true;
 780     }
 781
 782   /* If the target is a register, overwriting the entire object, or storing
 783      a full-word or multi-word field can be done with just a SUBREG.  */
 784   if (!MEM_P (op0)
 785       && bitsize == GET_MODE_BITSIZE (fieldmode)
 786       && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
 787           || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
 788     {
 789       /* Use the subreg machinery either to narrow OP0 to the required
 790          words or to cope with mode punning between equal-sized modes.
 791          In the latter case, use subreg on the rhs side, not lhs.  */
 792       rtx sub;
 793
 794       if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
 795         {
 796           sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
 797           if (sub)
 798             {
 799               if (reverse)
 800                 sub = flip_storage_order (GET_MODE (op0), sub);
 801               emit_move_insn (op0, sub);
 802               return true;
 803             }
 804         }
 805       else
 806         {
 807           sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 808                                      bitnum / BITS_PER_UNIT);
 809           if (sub)
 810             {
 811               if (reverse)
 812                 value = flip_storage_order (fieldmode, value);
 813               emit_move_insn (sub, value);
 814               return true;
 815             }
 816         }
 817     }
 818
 819   /* If the target is memory, storing any naturally aligned field can be
 820      done with a simple store.  For targets that support fast unaligned
 821      memory, any naturally sized, unit aligned field can be done directly.  */
 822   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
 823     {
 824       op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
 825       if (reverse)
 826         value = flip_storage_order (fieldmode, value);
 827       emit_move_insn (op0, value);
 828       return true;
 829     }
 830
 831   /* Make sure we are playing with integral modes.  Pun with subregs
 832      if we aren't.  This must come after the entire register case above,
 833      since that case is valid for any mode.  The following cases are only
 834      valid for integral modes.  */
 835   {
 836     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 837     if (imode != GET_MODE (op0))
 838       {
 839         if (MEM_P (op0))
 840           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
 841         else
 842           {
 843             gcc_assert (imode != BLKmode);
 844             op0 = gen_lowpart (imode, op0);
 845           }
 846       }
 847   }
 848
 849   /* Storing an lsb-aligned field in a register
 850      can be done with a movstrict instruction.  */
 851
 852   if (!MEM_P (op0)
 853       && !reverse
 854       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
 855       && bitsize == GET_MODE_BITSIZE (fieldmode)
 856       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 857     {
 858       struct expand_operand ops[2];
 859       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 860       rtx arg0 = op0;
 861       unsigned HOST_WIDE_INT subreg_off;
 862
 863       if (GET_CODE (arg0) == SUBREG)
 864         {
 865           /* Else we've got some float mode source being extracted into
 866              a different float mode destination -- this combination of
 867              subregs results in Severe Tire Damage.  */
 868           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 869                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 870                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 871           arg0 = SUBREG_REG (arg0);
 872         }
 873
 874       subreg_off = bitnum / BITS_PER_UNIT;
 875       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 876         {
 877           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 878
 879           create_fixed_operand (&ops[0], arg0);
 880           /* Shrink the source operand to FIELDMODE.  */
 881           create_convert_operand_to (&ops[1], value, fieldmode, false);
 882           if (maybe_expand_insn (icode, 2, ops))
 883             return true;
 884         }
 885     }
 886
 887   /* Handle fields bigger than a word.  */
 888
 889   if (bitsize > BITS_PER_WORD)
 890     {
 891       /* Here we transfer the words of the field
 892          in the order least significant first.
 893          This is because the most significant word is the one which may
 894          be less than full.
 895          However, only do that if the value is not BLKmode.  */
 896
 897       const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 898       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 899       unsigned int i;
 900       rtx_insn *last;
 901
 902       /* This is the mode we must force value to, so that there will be enough
 903          subwords to extract.  Note that fieldmode will often (always?) be
 904          VOIDmode, because that is what store_field uses to indicate that this
 905          is a bit field, but passing VOIDmode to operand_subword_force
 906          is not allowed.  */
 907       fieldmode = GET_MODE (value);
 908       if (fieldmode == VOIDmode)
 909         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 910
 911       last = get_last_insn ();
 912       for (i = 0; i < nwords; i++)
 913         {
 914           /* If I is 0, use the low-order word in both field and target;
 915              if I is 1, use the next to lowest word; and so on.  */
 916           unsigned int wordnum = (backwards
 917                                   ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
 918                                   - i - 1
 919                                   : i);
 920           unsigned int bit_offset = (backwards ^ reverse
 921                                      ? MAX ((int) bitsize - ((int) i + 1)
 922                                             * BITS_PER_WORD,
 923                                             0)
 924                                      : (int) i * BITS_PER_WORD);
 925           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 926           unsigned HOST_WIDE_INT new_bitsize =
 927             MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 928
 929           /* If the remaining chunk doesn't have full wordsize we have
 930              to make sure that for big-endian machines the higher order
 931              bits are used.  */
 932           if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
 933             value_word = simplify_expand_binop (word_mode, lshr_optab,
 934                                                 value_word,
 935                                                 GEN_INT (BITS_PER_WORD
 936                                                          - new_bitsize),
 937                                                 NULL_RTX, true,
 938                                                 OPTAB_LIB_WIDEN);
 939
 940           if (!store_bit_field_1 (op0, new_bitsize,
 941                                   bitnum + bit_offset,
 942                                   bitregion_start, bitregion_end,
 943                                   word_mode,
 944                                   value_word, reverse, fallback_p))
 945             {
 946               delete_insns_since (last);
 947               return false;
 948             }
 949         }
 950       return true;
 951     }
 952
 953   /* If VALUE has a floating-point or complex mode, access it as an
 954      integer of the corresponding size.  This can occur on a machine
 955      with 64 bit registers that uses SFmode for float.  It can also
 956      occur for unaligned float or complex fields.  */
 957   orig_value = value;
 958   if (GET_MODE (value) != VOIDmode
 959       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 960       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 961     {
 962       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 963       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 964     }
 965
 966   /* If OP0 is a multi-word register, narrow it to the affected word.
 967      If the region spans two words, defer to store_split_bit_field.  */
 968   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 969     {
 970       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
 971                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
 972       gcc_assert (op0);
 973       bitnum %= BITS_PER_WORD;
 974       if (bitnum + bitsize > BITS_PER_WORD)
 975         {
 976           if (!fallback_p)
 977             return false;
 978
 979           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
 980                                  bitregion_end, value, reverse);
 981           return true;
 982         }
 983     }
 984
 985   /* From here on we can assume that the field to be stored in fits
 986      within a word.  If the destination is a register, it too fits
 987      in a word.  */
 988
 989   extraction_insn insv;
 990   if (!MEM_P (op0)
 991       && !reverse
 992       && get_best_reg_extraction_insn (&insv, EP_insv,
 993                                        GET_MODE_BITSIZE (GET_MODE (op0)),
 994                                        fieldmode)
 995       && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 996     return true;
 997
 998   /* If OP0 is a memory, try copying it to a register and seeing if a
 999      cheap register alternative is available.  */
1000   if (MEM_P (op0) && !reverse)
1001     {
1002       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1003                                         fieldmode)
1004           && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
1005         return true;
1006
1007       rtx_insn *last = get_last_insn ();
1008
1009       /* Try loading part of OP0 into a register, inserting the bitfield
1010          into that, and then copying the result back to OP0.  */
1011       unsigned HOST_WIDE_INT bitpos;
1012       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1013                                                bitregion_start, bitregion_end,
1014                                                fieldmode, &bitpos);
1015       if (xop0)
1016         {
1017           rtx tempreg = copy_to_reg (xop0);
1018           if (store_bit_field_1 (tempreg, bitsize, bitpos,
1019                                  bitregion_start, bitregion_end,
1020                                  fieldmode, orig_value, reverse, false))
1021             {
1022               emit_move_insn (xop0, tempreg);
1023               return true;
1024             }
1025           delete_insns_since (last);
1026         }
1027     }
1028
1029   if (!fallback_p)
1030     return false;
1031
1032   store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
1033                          bitregion_end, value, reverse);
1034   return true;
1035 }
1036
1037 /* Generate code to store value from rtx VALUE
1038    into a bit-field within structure STR_RTX
1039    containing BITSIZE bits starting at bit BITNUM.
1040
1041    BITREGION_START is bitpos of the first bitfield in this region.
1042    BITREGION_END is the bitpos of the ending bitfield in this region.
1043    These two fields are 0, if the C++ memory model does not apply,
1044    or we are not interested in keeping track of bitfield regions.
1045
1046    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1047
1048    If REVERSE is true, the store is to be done in reverse order.  */
1049
1050 void
1051 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1052                  unsigned HOST_WIDE_INT bitnum,
1053                  unsigned HOST_WIDE_INT bitregion_start,
1054                  unsigned HOST_WIDE_INT bitregion_end,
1055                  machine_mode fieldmode,
1056                  rtx value, bool reverse)
1057 {
1058   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1059   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode,
1060                                   bitregion_start, bitregion_end))
1061     {
1062       /* Storing of a full word can be done with a simple store.
1063          We know here that the field can be accessed with one single
1064          instruction.  For targets that support unaligned memory,
1065          an unaligned access may be necessary.  */
1066       if (bitsize == GET_MODE_BITSIZE (fieldmode))
1067         {
1068           str_rtx = adjust_bitfield_address (str_rtx, fieldmode,
1069                                              bitnum / BITS_PER_UNIT);
1070           if (reverse)
1071             value = flip_storage_order (fieldmode, value);
1072           gcc_assert (bitnum % BITS_PER_UNIT == 0);
1073           emit_move_insn (str_rtx, value);
1074         }
1075       else
1076         {
1077           rtx temp;
1078
1079           str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum,
1080                                           &bitnum);
1081           gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (fieldmode));
1082           temp = copy_to_reg (str_rtx);
1083           if (!store_bit_field_1 (temp, bitsize, bitnum, 0, 0,
1084                                   fieldmode, value, reverse, true))
1085             gcc_unreachable ();
1086
1087           emit_move_insn (str_rtx, temp);
1088         }
1089
1090       return;
1091     }
1092
1093   /* Under the C++0x memory model, we must not touch bits outside the
1094      bit region.  Adjust the address to start at the beginning of the
1095      bit region.  */
1096   if (MEM_P (str_rtx) && bitregion_start > 0)
1097     {
1098       machine_mode bestmode;
1099       HOST_WIDE_INT offset, size;
1100
1101       gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
1102
1103       offset = bitregion_start / BITS_PER_UNIT;
1104       bitnum -= bitregion_start;
1105       size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
1106       bitregion_end -= bitregion_start;
1107       bitregion_start = 0;
1108       bestmode = get_best_mode (bitsize, bitnum,
1109                                 bitregion_start, bitregion_end,
1110                                 MEM_ALIGN (str_rtx), VOIDmode,
1111                                 MEM_VOLATILE_P (str_rtx));
1112       str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
1113     }
1114
1115   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1116                           bitregion_start, bitregion_end,
1117                           fieldmode, value, reverse, true))
1118     gcc_unreachable ();
1119 }
1120 \f
1121 /* Use shifts and boolean operations to store VALUE into a bit field of
1122    width BITSIZE in OP0, starting at bit BITNUM.
1123
1124    If REVERSE is true, the store is to be done in reverse order.  */
1125
1126 static void
1127 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1128                        unsigned HOST_WIDE_INT bitnum,
1129                        unsigned HOST_WIDE_INT bitregion_start,
1130                        unsigned HOST_WIDE_INT bitregion_end,
1131                        rtx value, bool reverse)
1132 {
1133   /* There is a case not handled here:
1134      a structure with a known alignment of just a halfword
1135      and a field split across two aligned halfwords within the structure.
1136      Or likewise a structure with a known alignment of just a byte
1137      and a field split across two bytes.
1138      Such cases are not supposed to be able to occur.  */
1139
1140   if (MEM_P (op0))
1141     {
1142       machine_mode mode = GET_MODE (op0);
1143       if (GET_MODE_BITSIZE (mode) == 0
1144           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
1145         mode = word_mode;
1146       mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1147                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
1148
1149       if (mode == VOIDmode)
1150         {
1151           /* The only way this should occur is if the field spans word
1152              boundaries.  */
1153           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
1154                                  bitregion_end, value, reverse);
1155           return;
1156         }
1157
1158       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1159     }
1160
1161   store_fixed_bit_field_1 (op0, bitsize, bitnum, value, reverse);
1162 }
1163
1164 /* Helper function for store_fixed_bit_field, stores
1165    the bit field always using the MODE of OP0.  */
1166
1167 static void
1168 store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize,
1169                          unsigned HOST_WIDE_INT bitnum,
1170                          rtx value, bool reverse)
1171 {
1172   machine_mode mode;
1173   rtx temp;
1174   int all_zero = 0;
1175   int all_one = 0;
1176
1177   mode = GET_MODE (op0);
1178   gcc_assert (SCALAR_INT_MODE_P (mode));
1179
1180   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1181      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1182
1183   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1184     /* BITNUM is the distance between our msb
1185        and that of the containing datum.
1186        Convert it to the distance from the lsb.  */
1187     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1188
1189   /* Now BITNUM is always the distance between our lsb
1190      and that of OP0.  */
1191
1192   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1193      we must first convert its mode to MODE.  */
1194
1195   if (CONST_INT_P (value))
1196     {
1197       unsigned HOST_WIDE_INT v = UINTVAL (value);
1198
1199       if (bitsize < HOST_BITS_PER_WIDE_INT)
1200         v &= ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1;
1201
1202       if (v == 0)
1203         all_zero = 1;
1204       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1205                 && v == ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1)
1206                || (bitsize == HOST_BITS_PER_WIDE_INT
1207                    && v == (unsigned HOST_WIDE_INT) -1))
1208         all_one = 1;
1209
1210       value = lshift_value (mode, v, bitnum);
1211     }
1212   else
1213     {
1214       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1215                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1216
1217       if (GET_MODE (value) != mode)
1218         value = convert_to_mode (mode, value, 1);
1219
1220       if (must_and)
1221         value = expand_binop (mode, and_optab, value,
1222                               mask_rtx (mode, 0, bitsize, 0),
1223                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1224       if (bitnum > 0)
1225         value = expand_shift (LSHIFT_EXPR, mode, value,
1226                               bitnum, NULL_RTX, 1);
1227     }
1228
1229   if (reverse)
1230     value = flip_storage_order (mode, value);
1231
1232   /* Now clear the chosen bits in OP0,
1233      except that if VALUE is -1 we need not bother.  */
1234   /* We keep the intermediates in registers to allow CSE to combine
1235      consecutive bitfield assignments.  */
1236
1237   temp = force_reg (mode, op0);
1238
1239   if (! all_one)
1240     {
1241       rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1242       if (reverse)
1243         mask = flip_storage_order (mode, mask);
1244       temp = expand_binop (mode, and_optab, temp, mask,
1245                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1246       temp = force_reg (mode, temp);
1247     }
1248
1249   /* Now logical-or VALUE into OP0, unless it is zero.  */
1250
1251   if (! all_zero)
1252     {
1253       temp = expand_binop (mode, ior_optab, temp, value,
1254                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1255       temp = force_reg (mode, temp);
1256     }
1257
1258   if (op0 != temp)
1259     {
1260       op0 = copy_rtx (op0);
1261       emit_move_insn (op0, temp);
1262     }
1263 }
1264 \f
1265 /* Store a bit field that is split across multiple accessible memory objects.
1266
1267    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1268    BITSIZE is the field width; BITPOS the position of its first bit
1269    (within the word).
1270    VALUE is the value to store.
1271
1272    If REVERSE is true, the store is to be done in reverse order.
1273
1274    This does not yet handle fields wider than BITS_PER_WORD.  */
1275
1276 static void
1277 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1278                        unsigned HOST_WIDE_INT bitpos,
1279                        unsigned HOST_WIDE_INT bitregion_start,
1280                        unsigned HOST_WIDE_INT bitregion_end,
1281                        rtx value, bool reverse)
1282 {
1283   unsigned int unit, total_bits, bitsdone = 0;
1284
1285   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1286      much at a time.  */
1287   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1288     unit = BITS_PER_WORD;
1289   else
1290     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1291
1292   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1293      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1294      again, and we will mutually recurse forever.  */
1295   if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1296     unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0)));
1297
1298   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1299      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1300      that VALUE might be a floating-point constant.  */
1301   if (CONSTANT_P (value) && !CONST_INT_P (value))
1302     {
1303       rtx word = gen_lowpart_common (word_mode, value);
1304
1305       if (word && (value != word))
1306         value = word;
1307       else
1308         value = gen_lowpart_common (word_mode,
1309                                     force_reg (GET_MODE (value) != VOIDmode
1310                                                ? GET_MODE (value)
1311                                                : word_mode, value));
1312     }
1313
1314   total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1315
1316   while (bitsdone < bitsize)
1317     {
1318       unsigned HOST_WIDE_INT thissize;
1319       unsigned HOST_WIDE_INT thispos;
1320       unsigned HOST_WIDE_INT offset;
1321       rtx part, word;
1322
1323       offset = (bitpos + bitsdone) / unit;
1324       thispos = (bitpos + bitsdone) % unit;
1325
1326       /* When region of bytes we can touch is restricted, decrease
1327          UNIT close to the end of the region as needed.  If op0 is a REG
1328          or SUBREG of REG, don't do this, as there can't be data races
1329          on a register and we can expand shorter code in some cases.  */
1330       if (bitregion_end
1331           && unit > BITS_PER_UNIT
1332           && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1333           && !REG_P (op0)
1334           && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1335         {
1336           unit = unit / 2;
1337           continue;
1338         }
1339
1340       /* THISSIZE must not overrun a word boundary.  Otherwise,
1341          store_fixed_bit_field will call us again, and we will mutually
1342          recurse forever.  */
1343       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1344       thissize = MIN (thissize, unit - thispos);
1345
1346       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1347         {
1348           /* Fetch successively less significant portions.  */
1349           if (CONST_INT_P (value))
1350             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1351                              >> (bitsize - bitsdone - thissize))
1352                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1353           /* Likewise, but the source is little-endian.  */
1354           else if (reverse)
1355             part = extract_fixed_bit_field (word_mode, value, thissize,
1356                                             bitsize - bitsdone - thissize,
1357                                             NULL_RTX, 1, false);
1358           else
1359             {
1360               int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1361               /* The args are chosen so that the last part includes the
1362                  lsb.  Give extract_bit_field the value it needs (with
1363                  endianness compensation) to fetch the piece we want.  */
1364               part = extract_fixed_bit_field (word_mode, value, thissize,
1365                                               total_bits - bitsize + bitsdone,
1366                                               NULL_RTX, 1, false);
1367             }
1368         }
1369       else
1370         {
1371           /* Fetch successively more significant portions.  */
1372           if (CONST_INT_P (value))
1373             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1374                              >> bitsdone)
1375                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1376           /* Likewise, but the source is big-endian.  */
1377           else if (reverse)
1378             part = extract_fixed_bit_field (word_mode, value, thissize,
1379                                             total_bits - bitsdone - thissize,
1380                                             NULL_RTX, 1, false);
1381           else
1382             part = extract_fixed_bit_field (word_mode, value, thissize,
1383                                             bitsdone, NULL_RTX, 1, false);
1384         }
1385
1386       /* If OP0 is a register, then handle OFFSET here.
1387
1388          When handling multiword bitfields, extract_bit_field may pass
1389          down a word_mode SUBREG of a larger REG for a bitfield that actually
1390          crosses a word boundary.  Thus, for a SUBREG, we must find
1391          the current word starting from the base register.  */
1392       if (GET_CODE (op0) == SUBREG)
1393         {
1394           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD)
1395                             + (offset * unit / BITS_PER_WORD);
1396           machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1397           if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1398             word = word_offset ? const0_rtx : op0;
1399           else
1400             word = operand_subword_force (SUBREG_REG (op0), word_offset,
1401                                           GET_MODE (SUBREG_REG (op0)));
1402           offset &= BITS_PER_WORD / unit - 1;
1403         }
1404       else if (REG_P (op0))
1405         {
1406           machine_mode op0_mode = GET_MODE (op0);
1407           if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1408             word = offset ? const0_rtx : op0;
1409           else
1410             word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1411                                           GET_MODE (op0));
1412           offset &= BITS_PER_WORD / unit - 1;
1413         }
1414       else
1415         word = op0;
1416
1417       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1418          it is just an out-of-bounds access.  Ignore it.  */
1419       if (word != const0_rtx)
1420         store_fixed_bit_field (word, thissize, offset * unit + thispos,
1421                                bitregion_start, bitregion_end, part,
1422                                reverse);
1423       bitsdone += thissize;
1424     }
1425 }
1426 \f
1427 /* A subroutine of extract_bit_field_1 that converts return value X
1428    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1429    to extract_bit_field.  */
1430
1431 static rtx
1432 convert_extracted_bit_field (rtx x, machine_mode mode,
1433                              machine_mode tmode, bool unsignedp)
1434 {
1435   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1436     return x;
1437
1438   /* If the x mode is not a scalar integral, first convert to the
1439      integer mode of that size and then access it as a floating-point
1440      value via a SUBREG.  */
1441   if (!SCALAR_INT_MODE_P (tmode))
1442     {
1443       machine_mode smode;
1444
1445       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1446       x = convert_to_mode (smode, x, unsignedp);
1447       x = force_reg (smode, x);
1448       return gen_lowpart (tmode, x);
1449     }
1450
1451   return convert_to_mode (tmode, x, unsignedp);
1452 }
1453
1454 /* Try to use an ext(z)v pattern to extract a field from OP0.
1455    Return the extracted value on success, otherwise return null.
1456    EXT_MODE is the mode of the extraction and the other arguments
1457    are as for extract_bit_field.  */
1458
1459 static rtx
1460 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1461                               unsigned HOST_WIDE_INT bitsize,
1462                               unsigned HOST_WIDE_INT bitnum,
1463                               int unsignedp, rtx target,
1464                               machine_mode mode, machine_mode tmode)
1465 {
1466   struct expand_operand ops[4];
1467   rtx spec_target = target;
1468   rtx spec_target_subreg = 0;
1469   machine_mode ext_mode = extv->field_mode;
1470   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1471
1472   if (bitsize == 0 || unit < bitsize)
1473     return NULL_RTX;
1474
1475   if (MEM_P (op0))
1476     /* Get a reference to the first byte of the field.  */
1477     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1478                                 &bitnum);
1479   else
1480     {
1481       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1482       if (BYTES_BIG_ENDIAN)
1483         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1484
1485       /* If op0 is a register, we need it in EXT_MODE to make it
1486          acceptable to the format of ext(z)v.  */
1487       if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1488         return NULL_RTX;
1489       if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1490         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1491     }
1492
1493   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1494      "backwards" from the size of the unit we are extracting from.
1495      Otherwise, we count bits from the most significant on a
1496      BYTES/BITS_BIG_ENDIAN machine.  */
1497
1498   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1499     bitnum = unit - bitsize - bitnum;
1500
1501   if (target == 0)
1502     target = spec_target = gen_reg_rtx (tmode);
1503
1504   if (GET_MODE (target) != ext_mode)
1505     {
1506       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1507          between the mode of the extraction (word_mode) and the target
1508          mode.  Instead, create a temporary and use convert_move to set
1509          the target.  */
1510       if (REG_P (target)
1511           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1512         {
1513           target = gen_lowpart (ext_mode, target);
1514           if (GET_MODE_PRECISION (ext_mode)
1515               > GET_MODE_PRECISION (GET_MODE (spec_target)))
1516             spec_target_subreg = target;
1517         }
1518       else
1519         target = gen_reg_rtx (ext_mode);
1520     }
1521
1522   create_output_operand (&ops[0], target, ext_mode);
1523   create_fixed_operand (&ops[1], op0);
1524   create_integer_operand (&ops[2], bitsize);
1525   create_integer_operand (&ops[3], bitnum);
1526   if (maybe_expand_insn (extv->icode, 4, ops))
1527     {
1528       target = ops[0].value;
1529       if (target == spec_target)
1530         return target;
1531       if (target == spec_target_subreg)
1532         return spec_target;
1533       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1534     }
1535   return NULL_RTX;
1536 }
1537
1538 /* A subroutine of extract_bit_field, with the same arguments.
1539    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1540    if we can find no other means of implementing the operation.
1541    if FALLBACK_P is false, return NULL instead.  */
1542
1543 static rtx
1544 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1545                      unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1546                      machine_mode mode, machine_mode tmode,
1547                      bool reverse, bool fallback_p)
1548 {
1549   rtx op0 = str_rtx;
1550   machine_mode int_mode;
1551   machine_mode mode1;
1552
1553   if (tmode == VOIDmode)
1554     tmode = mode;
1555
1556   while (GET_CODE (op0) == SUBREG)
1557     {
1558       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1559       op0 = SUBREG_REG (op0);
1560     }
1561
1562   /* If we have an out-of-bounds access to a register, just return an
1563      uninitialized register of the required mode.  This can occur if the
1564      source code contains an out-of-bounds access to a small array.  */
1565   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1566     return gen_reg_rtx (tmode);
1567
1568   if (REG_P (op0)
1569       && mode == GET_MODE (op0)
1570       && bitnum == 0
1571       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1572     {
1573       if (reverse)
1574         op0 = flip_storage_order (mode, op0);
1575       /* We're trying to extract a full register from itself.  */
1576       return op0;
1577     }
1578
1579   /* See if we can get a better vector mode before extracting.  */
1580   if (VECTOR_MODE_P (GET_MODE (op0))
1581       && !MEM_P (op0)
1582       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1583     {
1584       machine_mode new_mode;
1585
1586       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1587         new_mode = MIN_MODE_VECTOR_FLOAT;
1588       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1589         new_mode = MIN_MODE_VECTOR_FRACT;
1590       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1591         new_mode = MIN_MODE_VECTOR_UFRACT;
1592       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1593         new_mode = MIN_MODE_VECTOR_ACCUM;
1594       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1595         new_mode = MIN_MODE_VECTOR_UACCUM;
1596       else
1597         new_mode = MIN_MODE_VECTOR_INT;
1598
1599       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1600         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1601             && targetm.vector_mode_supported_p (new_mode))
1602           break;
1603       if (new_mode != VOIDmode)
1604         op0 = gen_lowpart (new_mode, op0);
1605     }
1606
1607   /* Use vec_extract patterns for extracting parts of vectors whenever
1608      available.  */
1609   if (VECTOR_MODE_P (GET_MODE (op0))
1610       && !MEM_P (op0)
1611       && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1612       && ((bitnum + bitsize - 1) / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
1613           == bitnum / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
1614     {
1615       struct expand_operand ops[3];
1616       machine_mode outermode = GET_MODE (op0);
1617       machine_mode innermode = GET_MODE_INNER (outermode);
1618       enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1619       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1620
1621       create_output_operand (&ops[0], target, innermode);
1622       create_input_operand (&ops[1], op0, outermode);
1623       create_integer_operand (&ops[2], pos);
1624       if (maybe_expand_insn (icode, 3, ops))
1625         {
1626           target = ops[0].value;
1627           if (GET_MODE (target) != mode)
1628             return gen_lowpart (tmode, target);
1629           return target;
1630         }
1631     }
1632
1633   /* Make sure we are playing with integral modes.  Pun with subregs
1634      if we aren't.  */
1635   {
1636     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1637     if (imode != GET_MODE (op0))
1638       {
1639         if (MEM_P (op0))
1640           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1641         else if (imode != BLKmode)
1642           {
1643             op0 = gen_lowpart (imode, op0);
1644
1645             /* If we got a SUBREG, force it into a register since we
1646                aren't going to be able to do another SUBREG on it.  */
1647             if (GET_CODE (op0) == SUBREG)
1648               op0 = force_reg (imode, op0);
1649           }
1650         else if (REG_P (op0))
1651           {
1652             rtx reg, subreg;
1653             imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1654                                             MODE_INT);
1655             reg = gen_reg_rtx (imode);
1656             subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1657             emit_move_insn (subreg, op0);
1658             op0 = reg;
1659             bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1660           }
1661         else
1662           {
1663             HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1664             rtx mem = assign_stack_temp (GET_MODE (op0), size);
1665             emit_move_insn (mem, op0);
1666             op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1667           }
1668       }
1669   }
1670
1671   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1672      If that's wrong, the solution is to test for it and set TARGET to 0
1673      if needed.  */
1674
1675   /* Get the mode of the field to use for atomic access or subreg
1676      conversion.  */
1677   mode1 = mode;
1678   if (SCALAR_INT_MODE_P (tmode))
1679     {
1680       machine_mode try_mode = mode_for_size (bitsize,
1681                                                   GET_MODE_CLASS (tmode), 0);
1682       if (try_mode != BLKmode)
1683         mode1 = try_mode;
1684     }
1685   gcc_assert (mode1 != BLKmode);
1686
1687   /* Extraction of a full MODE1 value can be done with a subreg as long
1688      as the least significant bit of the value is the least significant
1689      bit of either OP0 or a word of OP0.  */
1690   if (!MEM_P (op0)
1691       && !reverse
1692       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1693       && bitsize == GET_MODE_BITSIZE (mode1)
1694       && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1695     {
1696       rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1697                                      bitnum / BITS_PER_UNIT);
1698       if (sub)
1699         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1700     }
1701
1702   /* Extraction of a full MODE1 value can be done with a load as long as
1703      the field is on a byte boundary and is sufficiently aligned.  */
1704   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1705     {
1706       op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1707       if (reverse)
1708         op0 = flip_storage_order (mode1, op0);
1709       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1710     }
1711
1712   /* Handle fields bigger than a word.  */
1713
1714   if (bitsize > BITS_PER_WORD)
1715     {
1716       /* Here we transfer the words of the field
1717          in the order least significant first.
1718          This is because the most significant word is the one which may
1719          be less than full.  */
1720
1721       const bool backwards = WORDS_BIG_ENDIAN;
1722       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1723       unsigned int i;
1724       rtx_insn *last;
1725
1726       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1727         target = gen_reg_rtx (mode);
1728
1729       /* In case we're about to clobber a base register or something
1730          (see gcc.c-torture/execute/20040625-1.c).   */
1731       if (reg_mentioned_p (target, str_rtx))
1732         target = gen_reg_rtx (mode);
1733
1734       /* Indicate for flow that the entire target reg is being set.  */
1735       emit_clobber (target);
1736
1737       last = get_last_insn ();
1738       for (i = 0; i < nwords; i++)
1739         {
1740           /* If I is 0, use the low-order word in both field and target;
1741              if I is 1, use the next to lowest word; and so on.  */
1742           /* Word number in TARGET to use.  */
1743           unsigned int wordnum
1744             = (backwards
1745                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1746                : i);
1747           /* Offset from start of field in OP0.  */
1748           unsigned int bit_offset = (backwards ^ reverse
1749                                      ? MAX ((int) bitsize - ((int) i + 1)
1750                                             * BITS_PER_WORD,
1751                                             0)
1752                                      : (int) i * BITS_PER_WORD);
1753           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1754           rtx result_part
1755             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1756                                              bitsize - i * BITS_PER_WORD),
1757                                    bitnum + bit_offset, 1, target_part,
1758                                    mode, word_mode, reverse, fallback_p);
1759
1760           gcc_assert (target_part);
1761           if (!result_part)
1762             {
1763               delete_insns_since (last);
1764               return NULL;
1765             }
1766
1767           if (result_part != target_part)
1768             emit_move_insn (target_part, result_part);
1769         }
1770
1771       if (unsignedp)
1772         {
1773           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1774              need to be zero'd out.  */
1775           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1776             {
1777               unsigned int i, total_words;
1778
1779               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1780               for (i = nwords; i < total_words; i++)
1781                 emit_move_insn
1782                   (operand_subword (target,
1783                                     backwards ? total_words - i - 1 : i,
1784                                     1, VOIDmode),
1785                    const0_rtx);
1786             }
1787           return target;
1788         }
1789
1790       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1791       target = expand_shift (LSHIFT_EXPR, mode, target,
1792                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1793       return expand_shift (RSHIFT_EXPR, mode, target,
1794                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1795     }
1796
1797   /* If OP0 is a multi-word register, narrow it to the affected word.
1798      If the region spans two words, defer to extract_split_bit_field.  */
1799   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1800     {
1801       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1802                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1803       bitnum %= BITS_PER_WORD;
1804       if (bitnum + bitsize > BITS_PER_WORD)
1805         {
1806           if (!fallback_p)
1807             return NULL_RTX;
1808           target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
1809                                             reverse);
1810           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1811         }
1812     }
1813
1814   /* From here on we know the desired field is smaller than a word.
1815      If OP0 is a register, it too fits within a word.  */
1816   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1817   extraction_insn extv;
1818   if (!MEM_P (op0)
1819       && !reverse
1820       /* ??? We could limit the structure size to the part of OP0 that
1821          contains the field, with appropriate checks for endianness
1822          and TRULY_NOOP_TRUNCATION.  */
1823       && get_best_reg_extraction_insn (&extv, pattern,
1824                                        GET_MODE_BITSIZE (GET_MODE (op0)),
1825                                        tmode))
1826     {
1827       rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1828                                                  unsignedp, target, mode,
1829                                                  tmode);
1830       if (result)
1831         return result;
1832     }
1833
1834   /* If OP0 is a memory, try copying it to a register and seeing if a
1835      cheap register alternative is available.  */
1836   if (MEM_P (op0) & !reverse)
1837     {
1838       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1839                                         tmode))
1840         {
1841           rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1842                                                      bitnum, unsignedp,
1843                                                      target, mode,
1844                                                      tmode);
1845           if (result)
1846             return result;
1847         }
1848
1849       rtx_insn *last = get_last_insn ();
1850
1851       /* Try loading part of OP0 into a register and extracting the
1852          bitfield from that.  */
1853       unsigned HOST_WIDE_INT bitpos;
1854       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1855                                                0, 0, tmode, &bitpos);
1856       if (xop0)
1857         {
1858           xop0 = copy_to_reg (xop0);
1859           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1860                                             unsignedp, target,
1861                                             mode, tmode, reverse, false);
1862           if (result)
1863             return result;
1864           delete_insns_since (last);
1865         }
1866     }
1867
1868   if (!fallback_p)
1869     return NULL;
1870
1871   /* Find a correspondingly-sized integer field, so we can apply
1872      shifts and masks to it.  */
1873   int_mode = int_mode_for_mode (tmode);
1874   if (int_mode == BLKmode)
1875     int_mode = int_mode_for_mode (mode);
1876   /* Should probably push op0 out to memory and then do a load.  */
1877   gcc_assert (int_mode != BLKmode);
1878
1879   target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum, target,
1880                                     unsignedp, reverse);
1881
1882   /* Complex values must be reversed piecewise, so we need to undo the global
1883      reversal, convert to the complex mode and reverse again.  */
1884   if (reverse && COMPLEX_MODE_P (tmode))
1885     {
1886       target = flip_storage_order (int_mode, target);
1887       target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1888       target = flip_storage_order (tmode, target);
1889     }
1890   else
1891     target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1892
1893   return target;
1894 }
1895
1896 /* Generate code to extract a byte-field from STR_RTX
1897    containing BITSIZE bits, starting at BITNUM,
1898    and put it in TARGET if possible (if TARGET is nonzero).
1899    Regardless of TARGET, we return the rtx for where the value is placed.
1900
1901    STR_RTX is the structure containing the byte (a REG or MEM).
1902    UNSIGNEDP is nonzero if this is an unsigned bit field.
1903    MODE is the natural mode of the field value once extracted.
1904    TMODE is the mode the caller would like the value to have;
1905    but the value may be returned with type MODE instead.
1906
1907    If REVERSE is true, the extraction is to be done in reverse order.
1908
1909    If a TARGET is specified and we can store in it at no extra cost,
1910    we do so, and return TARGET.
1911    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1912    if they are equally easy.  */
1913
1914 rtx
1915 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1916                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1917                    machine_mode mode, machine_mode tmode, bool reverse)
1918 {
1919   machine_mode mode1;
1920
1921   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1922   if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1923     mode1 = GET_MODE (str_rtx);
1924   else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1925     mode1 = GET_MODE (target);
1926   else
1927     mode1 = tmode;
1928
1929   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0))
1930     {
1931       /* Extraction of a full MODE1 value can be done with a simple load.
1932          We know here that the field can be accessed with one single
1933          instruction.  For targets that support unaligned memory,
1934          an unaligned access may be necessary.  */
1935       if (bitsize == GET_MODE_BITSIZE (mode1))
1936         {
1937           rtx result = adjust_bitfield_address (str_rtx, mode1,
1938                                                 bitnum / BITS_PER_UNIT);
1939           if (reverse)
1940             result = flip_storage_order (mode1, result);
1941           gcc_assert (bitnum % BITS_PER_UNIT == 0);
1942           return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1943         }
1944
1945       str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum,
1946                                       &bitnum);
1947       gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (mode1));
1948       str_rtx = copy_to_reg (str_rtx);
1949     }
1950
1951   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1952                               target, mode, tmode, reverse, true);
1953 }
1954 \f
1955 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1956    from bit BITNUM of OP0.
1957
1958    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1959    If REVERSE is true, the extraction is to be done in reverse order.
1960
1961    If TARGET is nonzero, attempts to store the value there
1962    and return TARGET, but this is not guaranteed.
1963    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1964
1965 static rtx
1966 extract_fixed_bit_field (machine_mode tmode, rtx op0,
1967                          unsigned HOST_WIDE_INT bitsize,
1968                          unsigned HOST_WIDE_INT bitnum, rtx target,
1969                          int unsignedp, bool reverse)
1970 {
1971   if (MEM_P (op0))
1972     {
1973       machine_mode mode
1974         = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode,
1975                          MEM_VOLATILE_P (op0));
1976
1977       if (mode == VOIDmode)
1978         /* The only way this should occur is if the field spans word
1979            boundaries.  */
1980         return extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
1981                                         reverse);
1982
1983       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1984     }
1985
1986   return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum,
1987                                     target, unsignedp, reverse);
1988 }
1989
1990 /* Helper function for extract_fixed_bit_field, extracts
1991    the bit field always using the MODE of OP0.  */
1992
1993 static rtx
1994 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0,
1995                            unsigned HOST_WIDE_INT bitsize,
1996                            unsigned HOST_WIDE_INT bitnum, rtx target,
1997                            int unsignedp, bool reverse)
1998 {
1999   machine_mode mode = GET_MODE (op0);
2000   gcc_assert (SCALAR_INT_MODE_P (mode));
2001
2002   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
2003      for invalid input, such as extract equivalent of f5 from
2004      gcc.dg/pr48335-2.c.  */
2005
2006   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2007     /* BITNUM is the distance between our msb and that of OP0.
2008        Convert it to the distance from the lsb.  */
2009     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
2010
2011   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2012      We have reduced the big-endian case to the little-endian case.  */
2013   if (reverse)
2014     op0 = flip_storage_order (mode, op0);
2015
2016   if (unsignedp)
2017     {
2018       if (bitnum)
2019         {
2020           /* If the field does not already start at the lsb,
2021              shift it so it does.  */
2022           /* Maybe propagate the target for the shift.  */
2023           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2024           if (tmode != mode)
2025             subtarget = 0;
2026           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2027         }
2028       /* Convert the value to the desired mode.  */
2029       if (mode != tmode)
2030         op0 = convert_to_mode (tmode, op0, 1);
2031
2032       /* Unless the msb of the field used to be the msb when we shifted,
2033          mask out the upper bits.  */
2034
2035       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2036         return expand_binop (GET_MODE (op0), and_optab, op0,
2037                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
2038                              target, 1, OPTAB_LIB_WIDEN);
2039       return op0;
2040     }
2041
2042   /* To extract a signed bit-field, first shift its msb to the msb of the word,
2043      then arithmetic-shift its lsb to the lsb of the word.  */
2044   op0 = force_reg (mode, op0);
2045
2046   /* Find the narrowest integer mode that contains the field.  */
2047
2048   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
2049        mode = GET_MODE_WIDER_MODE (mode))
2050     if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
2051       {
2052         op0 = convert_to_mode (mode, op0, 0);
2053         break;
2054       }
2055
2056   if (mode != tmode)
2057     target = 0;
2058
2059   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2060     {
2061       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2062       /* Maybe propagate the target for the shift.  */
2063       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2064       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2065     }
2066
2067   return expand_shift (RSHIFT_EXPR, mode, op0,
2068                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2069 }
2070
2071 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2072    VALUE << BITPOS.  */
2073
2074 static rtx
2075 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2076               int bitpos)
2077 {
2078   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2079 }
2080 \f
2081 /* Extract a bit field that is split across two words
2082    and return an RTX for the result.
2083
2084    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2085    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2086    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2087
2088    If REVERSE is true, the extraction is to be done in reverse order.  */
2089
2090 static rtx
2091 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
2092                          unsigned HOST_WIDE_INT bitpos, int unsignedp,
2093                          bool reverse)
2094 {
2095   unsigned int unit;
2096   unsigned int bitsdone = 0;
2097   rtx result = NULL_RTX;
2098   int first = 1;
2099
2100   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2101      much at a time.  */
2102   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2103     unit = BITS_PER_WORD;
2104   else
2105     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2106
2107   while (bitsdone < bitsize)
2108     {
2109       unsigned HOST_WIDE_INT thissize;
2110       rtx part, word;
2111       unsigned HOST_WIDE_INT thispos;
2112       unsigned HOST_WIDE_INT offset;
2113
2114       offset = (bitpos + bitsdone) / unit;
2115       thispos = (bitpos + bitsdone) % unit;
2116
2117       /* THISSIZE must not overrun a word boundary.  Otherwise,
2118          extract_fixed_bit_field will call us again, and we will mutually
2119          recurse forever.  */
2120       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2121       thissize = MIN (thissize, unit - thispos);
2122
2123       /* If OP0 is a register, then handle OFFSET here.
2124
2125          When handling multiword bitfields, extract_bit_field may pass
2126          down a word_mode SUBREG of a larger REG for a bitfield that actually
2127          crosses a word boundary.  Thus, for a SUBREG, we must find
2128          the current word starting from the base register.  */
2129       if (GET_CODE (op0) == SUBREG)
2130         {
2131           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
2132           word = operand_subword_force (SUBREG_REG (op0), word_offset,
2133                                         GET_MODE (SUBREG_REG (op0)));
2134           offset = 0;
2135         }
2136       else if (REG_P (op0))
2137         {
2138           word = operand_subword_force (op0, offset, GET_MODE (op0));
2139           offset = 0;
2140         }
2141       else
2142         word = op0;
2143
2144       /* Extract the parts in bit-counting order,
2145          whose meaning is determined by BYTES_PER_UNIT.
2146          OFFSET is in UNITs, and UNIT is in bits.  */
2147       part = extract_fixed_bit_field (word_mode, word, thissize,
2148                                       offset * unit + thispos, 0, 1, reverse);
2149       bitsdone += thissize;
2150
2151       /* Shift this part into place for the result.  */
2152       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2153         {
2154           if (bitsize != bitsdone)
2155             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2156                                  bitsize - bitsdone, 0, 1);
2157         }
2158       else
2159         {
2160           if (bitsdone != thissize)
2161             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2162                                  bitsdone - thissize, 0, 1);
2163         }
2164
2165       if (first)
2166         result = part;
2167       else
2168         /* Combine the parts with bitwise or.  This works
2169            because we extracted each part as an unsigned bit field.  */
2170         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2171                                OPTAB_LIB_WIDEN);
2172
2173       first = 0;
2174     }
2175
2176   /* Unsigned bit field: we are done.  */
2177   if (unsignedp)
2178     return result;
2179   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2180   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2181                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2182   return expand_shift (RSHIFT_EXPR, word_mode, result,
2183                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2184 }
2185 \f
2186 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2187    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2188    MODE, fill the upper bits with zeros.  Fail if the layout of either
2189    mode is unknown (as for CC modes) or if the extraction would involve
2190    unprofitable mode punning.  Return the value on success, otherwise
2191    return null.
2192
2193    This is different from gen_lowpart* in these respects:
2194
2195      - the returned value must always be considered an rvalue
2196
2197      - when MODE is wider than SRC_MODE, the extraction involves
2198        a zero extension
2199
2200      - when MODE is smaller than SRC_MODE, the extraction involves
2201        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2202
2203    In other words, this routine performs a computation, whereas the
2204    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2205    operations.  */
2206
2207 rtx
2208 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2209 {
2210   machine_mode int_mode, src_int_mode;
2211
2212   if (mode == src_mode)
2213     return src;
2214
2215   if (CONSTANT_P (src))
2216     {
2217       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2218          fails, it will happily create (subreg (symbol_ref)) or similar
2219          invalid SUBREGs.  */
2220       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2221       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2222       if (ret)
2223         return ret;
2224
2225       if (GET_MODE (src) == VOIDmode
2226           || !validate_subreg (mode, src_mode, src, byte))
2227         return NULL_RTX;
2228
2229       src = force_reg (GET_MODE (src), src);
2230       return gen_rtx_SUBREG (mode, src, byte);
2231     }
2232
2233   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2234     return NULL_RTX;
2235
2236   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2237       && MODES_TIEABLE_P (mode, src_mode))
2238     {
2239       rtx x = gen_lowpart_common (mode, src);
2240       if (x)
2241         return x;
2242     }
2243
2244   src_int_mode = int_mode_for_mode (src_mode);
2245   int_mode = int_mode_for_mode (mode);
2246   if (src_int_mode == BLKmode || int_mode == BLKmode)
2247     return NULL_RTX;
2248
2249   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2250     return NULL_RTX;
2251   if (!MODES_TIEABLE_P (int_mode, mode))
2252     return NULL_RTX;
2253
2254   src = gen_lowpart (src_int_mode, src);
2255   src = convert_modes (int_mode, src_int_mode, src, true);
2256   src = gen_lowpart (mode, src);
2257   return src;
2258 }
2259 \f
2260 /* Add INC into TARGET.  */
2261
2262 void
2263 expand_inc (rtx target, rtx inc)
2264 {
2265   rtx value = expand_binop (GET_MODE (target), add_optab,
2266                             target, inc,
2267                             target, 0, OPTAB_LIB_WIDEN);
2268   if (value != target)
2269     emit_move_insn (target, value);
2270 }
2271
2272 /* Subtract DEC from TARGET.  */
2273
2274 void
2275 expand_dec (rtx target, rtx dec)
2276 {
2277   rtx value = expand_binop (GET_MODE (target), sub_optab,
2278                             target, dec,
2279                             target, 0, OPTAB_LIB_WIDEN);
2280   if (value != target)
2281     emit_move_insn (target, value);
2282 }
2283 \f
2284 /* Output a shift instruction for expression code CODE,
2285    with SHIFTED being the rtx for the value to shift,
2286    and AMOUNT the rtx for the amount to shift by.
2287    Store the result in the rtx TARGET, if that is convenient.
2288    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2289    Return the rtx for where the value is.  */
2290
2291 static rtx
2292 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2293                 rtx amount, rtx target, int unsignedp)
2294 {
2295   rtx op1, temp = 0;
2296   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2297   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2298   optab lshift_optab = ashl_optab;
2299   optab rshift_arith_optab = ashr_optab;
2300   optab rshift_uns_optab = lshr_optab;
2301   optab lrotate_optab = rotl_optab;
2302   optab rrotate_optab = rotr_optab;
2303   machine_mode op1_mode;
2304   machine_mode scalar_mode = mode;
2305   int attempt;
2306   bool speed = optimize_insn_for_speed_p ();
2307
2308   if (VECTOR_MODE_P (mode))
2309     scalar_mode = GET_MODE_INNER (mode);
2310   op1 = amount;
2311   op1_mode = GET_MODE (op1);
2312
2313   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2314      shift amount is a vector, use the vector/vector shift patterns.  */
2315   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2316     {
2317       lshift_optab = vashl_optab;
2318       rshift_arith_optab = vashr_optab;
2319       rshift_uns_optab = vlshr_optab;
2320       lrotate_optab = vrotl_optab;
2321       rrotate_optab = vrotr_optab;
2322     }
2323
2324   /* Previously detected shift-counts computed by NEGATE_EXPR
2325      and shifted in the other direction; but that does not work
2326      on all machines.  */
2327
2328   if (SHIFT_COUNT_TRUNCATED)
2329     {
2330       if (CONST_INT_P (op1)
2331           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2332               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2333         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2334                        % GET_MODE_BITSIZE (scalar_mode));
2335       else if (GET_CODE (op1) == SUBREG
2336                && subreg_lowpart_p (op1)
2337                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2338                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2339         op1 = SUBREG_REG (op1);
2340     }
2341
2342   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2343      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2344      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2345      amount instead.  */
2346   if (rotate
2347       && CONST_INT_P (op1)
2348       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2349                    GET_MODE_BITSIZE (scalar_mode) - 1))
2350     {
2351       op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2352       left = !left;
2353       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2354     }
2355
2356   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2357      Note that this is not the case for bigger values.  For instance a rotation
2358      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2359      0x04030201 (bswapsi).  */
2360   if (rotate
2361       && CONST_INT_P (op1)
2362       && INTVAL (op1) == BITS_PER_UNIT
2363       && GET_MODE_SIZE (scalar_mode) == 2
2364       && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing)
2365     return expand_unop (HImode, bswap_optab, shifted, NULL_RTX,
2366                                   unsignedp);
2367
2368   if (op1 == const0_rtx)
2369     return shifted;
2370
2371   /* Check whether its cheaper to implement a left shift by a constant
2372      bit count by a sequence of additions.  */
2373   if (code == LSHIFT_EXPR
2374       && CONST_INT_P (op1)
2375       && INTVAL (op1) > 0
2376       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2377       && INTVAL (op1) < MAX_BITS_PER_WORD
2378       && (shift_cost (speed, mode, INTVAL (op1))
2379           > INTVAL (op1) * add_cost (speed, mode))
2380       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2381     {
2382       int i;
2383       for (i = 0; i < INTVAL (op1); i++)
2384         {
2385           temp = force_reg (mode, shifted);
2386           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2387                                   unsignedp, OPTAB_LIB_WIDEN);
2388         }
2389       return shifted;
2390     }
2391
2392   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2393     {
2394       enum optab_methods methods;
2395
2396       if (attempt == 0)
2397         methods = OPTAB_DIRECT;
2398       else if (attempt == 1)
2399         methods = OPTAB_WIDEN;
2400       else
2401         methods = OPTAB_LIB_WIDEN;
2402
2403       if (rotate)
2404         {
2405           /* Widening does not work for rotation.  */
2406           if (methods == OPTAB_WIDEN)
2407             continue;
2408           else if (methods == OPTAB_LIB_WIDEN)
2409             {
2410               /* If we have been unable to open-code this by a rotation,
2411                  do it as the IOR of two shifts.  I.e., to rotate A
2412                  by N bits, compute
2413                  (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2414                  where C is the bitsize of A.
2415
2416                  It is theoretically possible that the target machine might
2417                  not be able to perform either shift and hence we would
2418                  be making two libcalls rather than just the one for the
2419                  shift (similarly if IOR could not be done).  We will allow
2420                  this extremely unlikely lossage to avoid complicating the
2421                  code below.  */
2422
2423               rtx subtarget = target == shifted ? 0 : target;
2424               rtx new_amount, other_amount;
2425               rtx temp1;
2426
2427               new_amount = op1;
2428               if (op1 == const0_rtx)
2429                 return shifted;
2430               else if (CONST_INT_P (op1))
2431                 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
2432                                         - INTVAL (op1));
2433               else
2434                 {
2435                   other_amount
2436                     = simplify_gen_unary (NEG, GET_MODE (op1),
2437                                           op1, GET_MODE (op1));
2438                   HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2439                   other_amount
2440                     = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2441                                            gen_int_mode (mask, GET_MODE (op1)));
2442                 }
2443
2444               shifted = force_reg (mode, shifted);
2445
2446               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2447                                      mode, shifted, new_amount, 0, 1);
2448               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2449                                       mode, shifted, other_amount,
2450                                       subtarget, 1);
2451               return expand_binop (mode, ior_optab, temp, temp1, target,
2452                                    unsignedp, methods);
2453             }
2454
2455           temp = expand_binop (mode,
2456                                left ? lrotate_optab : rrotate_optab,
2457                                shifted, op1, target, unsignedp, methods);
2458         }
2459       else if (unsignedp)
2460         temp = expand_binop (mode,
2461                              left ? lshift_optab : rshift_uns_optab,
2462                              shifted, op1, target, unsignedp, methods);
2463
2464       /* Do arithmetic shifts.
2465          Also, if we are going to widen the operand, we can just as well
2466          use an arithmetic right-shift instead of a logical one.  */
2467       if (temp == 0 && ! rotate
2468           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2469         {
2470           enum optab_methods methods1 = methods;
2471
2472           /* If trying to widen a log shift to an arithmetic shift,
2473              don't accept an arithmetic shift of the same size.  */
2474           if (unsignedp)
2475             methods1 = OPTAB_MUST_WIDEN;
2476
2477           /* Arithmetic shift */
2478
2479           temp = expand_binop (mode,
2480                                left ? lshift_optab : rshift_arith_optab,
2481                                shifted, op1, target, unsignedp, methods1);
2482         }
2483
2484       /* We used to try extzv here for logical right shifts, but that was
2485          only useful for one machine, the VAX, and caused poor code
2486          generation there for lshrdi3, so the code was deleted and a
2487          define_expand for lshrsi3 was added to vax.md.  */
2488     }
2489
2490   gcc_assert (temp);
2491   return temp;
2492 }
2493
2494 /* Output a shift instruction for expression code CODE,
2495    with SHIFTED being the rtx for the value to shift,
2496    and AMOUNT the amount to shift by.
2497    Store the result in the rtx TARGET, if that is convenient.
2498    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2499    Return the rtx for where the value is.  */
2500
2501 rtx
2502 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2503               int amount, rtx target, int unsignedp)
2504 {
2505   return expand_shift_1 (code, mode,
2506                          shifted, GEN_INT (amount), target, unsignedp);
2507 }
2508
2509 /* Output a shift instruction for expression code CODE,
2510    with SHIFTED being the rtx for the value to shift,
2511    and AMOUNT the tree for the amount to shift by.
2512    Store the result in the rtx TARGET, if that is convenient.
2513    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2514    Return the rtx for where the value is.  */
2515
2516 rtx
2517 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2518                        tree amount, rtx target, int unsignedp)
2519 {
2520   return expand_shift_1 (code, mode,
2521                          shifted, expand_normal (amount), target, unsignedp);
2522 }
2523
2524 \f
2525 /* Indicates the type of fixup needed after a constant multiplication.
2526    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2527    the result should be negated, and ADD_VARIANT means that the
2528    multiplicand should be added to the result.  */
2529 enum mult_variant {basic_variant, negate_variant, add_variant};
2530
2531 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2532                         const struct mult_cost *, machine_mode mode);
2533 static bool choose_mult_variant (machine_mode, HOST_WIDE_INT,
2534                                  struct algorithm *, enum mult_variant *, int);
2535 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2536                               const struct algorithm *, enum mult_variant);
2537 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2538 static rtx extract_high_half (machine_mode, rtx);
2539 static rtx expmed_mult_highpart (machine_mode, rtx, rtx, rtx, int, int);
2540 static rtx expmed_mult_highpart_optab (machine_mode, rtx, rtx, rtx,
2541                                        int, int);
2542 /* Compute and return the best algorithm for multiplying by T.
2543    The algorithm must cost less than cost_limit
2544    If retval.cost >= COST_LIMIT, no algorithm was found and all
2545    other field of the returned struct are undefined.
2546    MODE is the machine mode of the multiplication.  */
2547
2548 static void
2549 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2550             const struct mult_cost *cost_limit, machine_mode mode)
2551 {
2552   int m;
2553   struct algorithm *alg_in, *best_alg;
2554   struct mult_cost best_cost;
2555   struct mult_cost new_limit;
2556   int op_cost, op_latency;
2557   unsigned HOST_WIDE_INT orig_t = t;
2558   unsigned HOST_WIDE_INT q;
2559   int maxm, hash_index;
2560   bool cache_hit = false;
2561   enum alg_code cache_alg = alg_zero;
2562   bool speed = optimize_insn_for_speed_p ();
2563   machine_mode imode;
2564   struct alg_hash_entry *entry_ptr;
2565
2566   /* Indicate that no algorithm is yet found.  If no algorithm
2567      is found, this value will be returned and indicate failure.  */
2568   alg_out->cost.cost = cost_limit->cost + 1;
2569   alg_out->cost.latency = cost_limit->latency + 1;
2570
2571   if (cost_limit->cost < 0
2572       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2573     return;
2574
2575   /* Be prepared for vector modes.  */
2576   imode = GET_MODE_INNER (mode);
2577
2578   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2579
2580   /* Restrict the bits of "t" to the multiplication's mode.  */
2581   t &= GET_MODE_MASK (imode);
2582
2583   /* t == 1 can be done in zero cost.  */
2584   if (t == 1)
2585     {
2586       alg_out->ops = 1;
2587       alg_out->cost.cost = 0;
2588       alg_out->cost.latency = 0;
2589       alg_out->op[0] = alg_m;
2590       return;
2591     }
2592
2593   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2594      fail now.  */
2595   if (t == 0)
2596     {
2597       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2598         return;
2599       else
2600         {
2601           alg_out->ops = 1;
2602           alg_out->cost.cost = zero_cost (speed);
2603           alg_out->cost.latency = zero_cost (speed);
2604           alg_out->op[0] = alg_zero;
2605           return;
2606         }
2607     }
2608
2609   /* We'll be needing a couple extra algorithm structures now.  */
2610
2611   alg_in = XALLOCA (struct algorithm);
2612   best_alg = XALLOCA (struct algorithm);
2613   best_cost = *cost_limit;
2614
2615   /* Compute the hash index.  */
2616   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2617
2618   /* See if we already know what to do for T.  */
2619   entry_ptr = alg_hash_entry_ptr (hash_index);
2620   if (entry_ptr->t == t
2621       && entry_ptr->mode == mode
2622       && entry_ptr->mode == mode
2623       && entry_ptr->speed == speed
2624       && entry_ptr->alg != alg_unknown)
2625     {
2626       cache_alg = entry_ptr->alg;
2627
2628       if (cache_alg == alg_impossible)
2629         {
2630           /* The cache tells us that it's impossible to synthesize
2631              multiplication by T within entry_ptr->cost.  */
2632           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2633             /* COST_LIMIT is at least as restrictive as the one
2634                recorded in the hash table, in which case we have no
2635                hope of synthesizing a multiplication.  Just
2636                return.  */
2637             return;
2638
2639           /* If we get here, COST_LIMIT is less restrictive than the
2640              one recorded in the hash table, so we may be able to
2641              synthesize a multiplication.  Proceed as if we didn't
2642              have the cache entry.  */
2643         }
2644       else
2645         {
2646           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2647             /* The cached algorithm shows that this multiplication
2648                requires more cost than COST_LIMIT.  Just return.  This
2649                way, we don't clobber this cache entry with
2650                alg_impossible but retain useful information.  */
2651             return;
2652
2653           cache_hit = true;
2654
2655           switch (cache_alg)
2656             {
2657             case alg_shift:
2658               goto do_alg_shift;
2659
2660             case alg_add_t_m2:
2661             case alg_sub_t_m2:
2662               goto do_alg_addsub_t_m2;
2663
2664             case alg_add_factor:
2665             case alg_sub_factor:
2666               goto do_alg_addsub_factor;
2667
2668             case alg_add_t2_m:
2669               goto do_alg_add_t2_m;
2670
2671             case alg_sub_t2_m:
2672               goto do_alg_sub_t2_m;
2673
2674             default:
2675               gcc_unreachable ();
2676             }
2677         }
2678     }
2679
2680   /* If we have a group of zero bits at the low-order part of T, try
2681      multiplying by the remaining bits and then doing a shift.  */
2682
2683   if ((t & 1) == 0)
2684     {
2685     do_alg_shift:
2686       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2687       if (m < maxm)
2688         {
2689           q = t >> m;
2690           /* The function expand_shift will choose between a shift and
2691              a sequence of additions, so the observed cost is given as
2692              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2693           op_cost = m * add_cost (speed, mode);
2694           if (shift_cost (speed, mode, m) < op_cost)
2695             op_cost = shift_cost (speed, mode, m);
2696           new_limit.cost = best_cost.cost - op_cost;
2697           new_limit.latency = best_cost.latency - op_cost;
2698           synth_mult (alg_in, q, &new_limit, mode);
2699
2700           alg_in->cost.cost += op_cost;
2701           alg_in->cost.latency += op_cost;
2702           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2703             {
2704               best_cost = alg_in->cost;
2705               std::swap (alg_in, best_alg);
2706               best_alg->log[best_alg->ops] = m;
2707               best_alg->op[best_alg->ops] = alg_shift;
2708             }
2709
2710           /* See if treating ORIG_T as a signed number yields a better
2711              sequence.  Try this sequence only for a negative ORIG_T
2712              as it would be useless for a non-negative ORIG_T.  */
2713           if ((HOST_WIDE_INT) orig_t < 0)
2714             {
2715               /* Shift ORIG_T as follows because a right shift of a
2716                  negative-valued signed type is implementation
2717                  defined.  */
2718               q = ~(~orig_t >> m);
2719               /* The function expand_shift will choose between a shift
2720                  and a sequence of additions, so the observed cost is
2721                  given as MIN (m * add_cost(speed, mode),
2722                  shift_cost(speed, mode, m)).  */
2723               op_cost = m * add_cost (speed, mode);
2724               if (shift_cost (speed, mode, m) < op_cost)
2725                 op_cost = shift_cost (speed, mode, m);
2726               new_limit.cost = best_cost.cost - op_cost;
2727               new_limit.latency = best_cost.latency - op_cost;
2728               synth_mult (alg_in, q, &new_limit, mode);
2729
2730               alg_in->cost.cost += op_cost;
2731               alg_in->cost.latency += op_cost;
2732               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2733                 {
2734                   best_cost = alg_in->cost;
2735                   std::swap (alg_in, best_alg);
2736                   best_alg->log[best_alg->ops] = m;
2737                   best_alg->op[best_alg->ops] = alg_shift;
2738                 }
2739             }
2740         }
2741       if (cache_hit)
2742         goto done;
2743     }
2744
2745   /* If we have an odd number, add or subtract one.  */
2746   if ((t & 1) != 0)
2747     {
2748       unsigned HOST_WIDE_INT w;
2749
2750     do_alg_addsub_t_m2:
2751       for (w = 1; (w & t) != 0; w <<= 1)
2752         ;
2753       /* If T was -1, then W will be zero after the loop.  This is another
2754          case where T ends with ...111.  Handling this with (T + 1) and
2755          subtract 1 produces slightly better code and results in algorithm
2756          selection much faster than treating it like the ...0111 case
2757          below.  */
2758       if (w == 0
2759           || (w > 2
2760               /* Reject the case where t is 3.
2761                  Thus we prefer addition in that case.  */
2762               && t != 3))
2763         {
2764           /* T ends with ...111.  Multiply by (T + 1) and subtract T.  */
2765
2766           op_cost = add_cost (speed, mode);
2767           new_limit.cost = best_cost.cost - op_cost;
2768           new_limit.latency = best_cost.latency - op_cost;
2769           synth_mult (alg_in, t + 1, &new_limit, mode);
2770
2771           alg_in->cost.cost += op_cost;
2772           alg_in->cost.latency += op_cost;
2773           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2774             {
2775               best_cost = alg_in->cost;
2776               std::swap (alg_in, best_alg);
2777               best_alg->log[best_alg->ops] = 0;
2778               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2779             }
2780         }
2781       else
2782         {
2783           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add T.  */
2784
2785           op_cost = add_cost (speed, mode);
2786           new_limit.cost = best_cost.cost - op_cost;
2787           new_limit.latency = best_cost.latency - op_cost;
2788           synth_mult (alg_in, t - 1, &new_limit, mode);
2789
2790           alg_in->cost.cost += op_cost;
2791           alg_in->cost.latency += op_cost;
2792           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2793             {
2794               best_cost = alg_in->cost;
2795               std::swap (alg_in, best_alg);
2796               best_alg->log[best_alg->ops] = 0;
2797               best_alg->op[best_alg->ops] = alg_add_t_m2;
2798             }
2799         }
2800
2801       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2802          quickly with a - a * n for some appropriate constant n.  */
2803       m = exact_log2 (-orig_t + 1);
2804       if (m >= 0 && m < maxm)
2805         {
2806           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2807           /* If the target has a cheap shift-and-subtract insn use
2808              that in preference to a shift insn followed by a sub insn.
2809              Assume that the shift-and-sub is "atomic" with a latency
2810              equal to it's cost, otherwise assume that on superscalar
2811              hardware the shift may be executed concurrently with the
2812              earlier steps in the algorithm.  */
2813           if (shiftsub1_cost (speed, mode, m) <= op_cost)
2814             {
2815               op_cost = shiftsub1_cost (speed, mode, m);
2816               op_latency = op_cost;
2817             }
2818           else
2819             op_latency = add_cost (speed, mode);
2820
2821           new_limit.cost = best_cost.cost - op_cost;
2822           new_limit.latency = best_cost.latency - op_latency;
2823           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2824                       &new_limit, mode);
2825
2826           alg_in->cost.cost += op_cost;
2827           alg_in->cost.latency += op_latency;
2828           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2829             {
2830               best_cost = alg_in->cost;
2831               std::swap (alg_in, best_alg);
2832               best_alg->log[best_alg->ops] = m;
2833               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2834             }
2835         }
2836
2837       if (cache_hit)
2838         goto done;
2839     }
2840
2841   /* Look for factors of t of the form
2842      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2843      If we find such a factor, we can multiply by t using an algorithm that
2844      multiplies by q, shift the result by m and add/subtract it to itself.
2845
2846      We search for large factors first and loop down, even if large factors
2847      are less probable than small; if we find a large factor we will find a
2848      good sequence quickly, and therefore be able to prune (by decreasing
2849      COST_LIMIT) the search.  */
2850
2851  do_alg_addsub_factor:
2852   for (m = floor_log2 (t - 1); m >= 2; m--)
2853     {
2854       unsigned HOST_WIDE_INT d;
2855
2856       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2857       if (t % d == 0 && t > d && m < maxm
2858           && (!cache_hit || cache_alg == alg_add_factor))
2859         {
2860           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2861           if (shiftadd_cost (speed, mode, m) <= op_cost)
2862             op_cost = shiftadd_cost (speed, mode, m);
2863
2864           op_latency = op_cost;
2865
2866
2867           new_limit.cost = best_cost.cost - op_cost;
2868           new_limit.latency = best_cost.latency - op_latency;
2869           synth_mult (alg_in, t / d, &new_limit, mode);
2870
2871           alg_in->cost.cost += op_cost;
2872           alg_in->cost.latency += op_latency;
2873           if (alg_in->cost.latency < op_cost)
2874             alg_in->cost.latency = op_cost;
2875           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2876             {
2877               best_cost = alg_in->cost;
2878               std::swap (alg_in, best_alg);
2879               best_alg->log[best_alg->ops] = m;
2880               best_alg->op[best_alg->ops] = alg_add_factor;
2881             }
2882           /* Other factors will have been taken care of in the recursion.  */
2883           break;
2884         }
2885
2886       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2887       if (t % d == 0 && t > d && m < maxm
2888           && (!cache_hit || cache_alg == alg_sub_factor))
2889         {
2890           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2891           if (shiftsub0_cost (speed, mode, m) <= op_cost)
2892             op_cost = shiftsub0_cost (speed, mode, m);
2893
2894           op_latency = op_cost;
2895
2896           new_limit.cost = best_cost.cost - op_cost;
2897           new_limit.latency = best_cost.latency - op_latency;
2898           synth_mult (alg_in, t / d, &new_limit, mode);
2899
2900           alg_in->cost.cost += op_cost;
2901           alg_in->cost.latency += op_latency;
2902           if (alg_in->cost.latency < op_cost)
2903             alg_in->cost.latency = op_cost;
2904           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2905             {
2906               best_cost = alg_in->cost;
2907               std::swap (alg_in, best_alg);
2908               best_alg->log[best_alg->ops] = m;
2909               best_alg->op[best_alg->ops] = alg_sub_factor;
2910             }
2911           break;
2912         }
2913     }
2914   if (cache_hit)
2915     goto done;
2916
2917   /* Try shift-and-add (load effective address) instructions,
2918      i.e. do a*3, a*5, a*9.  */
2919   if ((t & 1) != 0)
2920     {
2921     do_alg_add_t2_m:
2922       q = t - 1;
2923       q = q & -q;
2924       m = exact_log2 (q);
2925       if (m >= 0 && m < maxm)
2926         {
2927           op_cost = shiftadd_cost (speed, mode, m);
2928           new_limit.cost = best_cost.cost - op_cost;
2929           new_limit.latency = best_cost.latency - op_cost;
2930           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2931
2932           alg_in->cost.cost += op_cost;
2933           alg_in->cost.latency += op_cost;
2934           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2935             {
2936               best_cost = alg_in->cost;
2937               std::swap (alg_in, best_alg);
2938               best_alg->log[best_alg->ops] = m;
2939               best_alg->op[best_alg->ops] = alg_add_t2_m;
2940             }
2941         }
2942       if (cache_hit)
2943         goto done;
2944
2945     do_alg_sub_t2_m:
2946       q = t + 1;
2947       q = q & -q;
2948       m = exact_log2 (q);
2949       if (m >= 0 && m < maxm)
2950         {
2951           op_cost = shiftsub0_cost (speed, mode, m);
2952           new_limit.cost = best_cost.cost - op_cost;
2953           new_limit.latency = best_cost.latency - op_cost;
2954           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2955
2956           alg_in->cost.cost += op_cost;
2957           alg_in->cost.latency += op_cost;
2958           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2959             {
2960               best_cost = alg_in->cost;
2961               std::swap (alg_in, best_alg);
2962               best_alg->log[best_alg->ops] = m;
2963               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2964             }
2965         }
2966       if (cache_hit)
2967         goto done;
2968     }
2969
2970  done:
2971   /* If best_cost has not decreased, we have not found any algorithm.  */
2972   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2973     {
2974       /* We failed to find an algorithm.  Record alg_impossible for
2975          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2976          we are asked to find an algorithm for T within the same or
2977          lower COST_LIMIT, we can immediately return to the
2978          caller.  */
2979       entry_ptr->t = t;
2980       entry_ptr->mode = mode;
2981       entry_ptr->speed = speed;
2982       entry_ptr->alg = alg_impossible;
2983       entry_ptr->cost = *cost_limit;
2984       return;
2985     }
2986
2987   /* Cache the result.  */
2988   if (!cache_hit)
2989     {
2990       entry_ptr->t = t;
2991       entry_ptr->mode = mode;
2992       entry_ptr->speed = speed;
2993       entry_ptr->alg = best_alg->op[best_alg->ops];
2994       entry_ptr->cost.cost = best_cost.cost;
2995       entry_ptr->cost.latency = best_cost.latency;
2996     }
2997
2998   /* If we are getting a too long sequence for `struct algorithm'
2999      to record, make this search fail.  */
3000   if (best_alg->ops == MAX_BITS_PER_WORD)
3001     return;
3002
3003   /* Copy the algorithm from temporary space to the space at alg_out.
3004      We avoid using structure assignment because the majority of
3005      best_alg is normally undefined, and this is a critical function.  */
3006   alg_out->ops = best_alg->ops + 1;
3007   alg_out->cost = best_cost;
3008   memcpy (alg_out->op, best_alg->op,
3009           alg_out->ops * sizeof *alg_out->op);
3010   memcpy (alg_out->log, best_alg->log,
3011           alg_out->ops * sizeof *alg_out->log);
3012 }
3013 \f
3014 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3015    Try three variations:
3016
3017        - a shift/add sequence based on VAL itself
3018        - a shift/add sequence based on -VAL, followed by a negation
3019        - a shift/add sequence based on VAL - 1, followed by an addition.
3020
3021    Return true if the cheapest of these cost less than MULT_COST,
3022    describing the algorithm in *ALG and final fixup in *VARIANT.  */
3023
3024 static bool
3025 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
3026                      struct algorithm *alg, enum mult_variant *variant,
3027                      int mult_cost)
3028 {
3029   struct algorithm alg2;
3030   struct mult_cost limit;
3031   int op_cost;
3032   bool speed = optimize_insn_for_speed_p ();
3033
3034   /* Fail quickly for impossible bounds.  */
3035   if (mult_cost < 0)
3036     return false;
3037
3038   /* Ensure that mult_cost provides a reasonable upper bound.
3039      Any constant multiplication can be performed with less
3040      than 2 * bits additions.  */
3041   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3042   if (mult_cost > op_cost)
3043     mult_cost = op_cost;
3044
3045   *variant = basic_variant;
3046   limit.cost = mult_cost;
3047   limit.latency = mult_cost;
3048   synth_mult (alg, val, &limit, mode);
3049
3050   /* This works only if the inverted value actually fits in an
3051      `unsigned int' */
3052   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3053     {
3054       op_cost = neg_cost (speed, mode);
3055       if (MULT_COST_LESS (&alg->cost, mult_cost))
3056         {
3057           limit.cost = alg->cost.cost - op_cost;
3058           limit.latency = alg->cost.latency - op_cost;
3059         }
3060       else
3061         {
3062           limit.cost = mult_cost - op_cost;
3063           limit.latency = mult_cost - op_cost;
3064         }
3065
3066       synth_mult (&alg2, -val, &limit, mode);
3067       alg2.cost.cost += op_cost;
3068       alg2.cost.latency += op_cost;
3069       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3070         *alg = alg2, *variant = negate_variant;
3071     }
3072
3073   /* This proves very useful for division-by-constant.  */
3074   op_cost = add_cost (speed, mode);
3075   if (MULT_COST_LESS (&alg->cost, mult_cost))
3076     {
3077       limit.cost = alg->cost.cost - op_cost;
3078       limit.latency = alg->cost.latency - op_cost;
3079     }
3080   else
3081     {
3082       limit.cost = mult_cost - op_cost;
3083       limit.latency = mult_cost - op_cost;
3084     }
3085
3086   synth_mult (&alg2, val - 1, &limit, mode);
3087   alg2.cost.cost += op_cost;
3088   alg2.cost.latency += op_cost;
3089   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3090     *alg = alg2, *variant = add_variant;
3091
3092   return MULT_COST_LESS (&alg->cost, mult_cost);
3093 }
3094
3095 /* A subroutine of expand_mult, used for constant multiplications.
3096    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3097    convenient.  Use the shift/add sequence described by ALG and apply
3098    the final fixup specified by VARIANT.  */
3099
3100 static rtx
3101 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3102                    rtx target, const struct algorithm *alg,
3103                    enum mult_variant variant)
3104 {
3105   HOST_WIDE_INT val_so_far;
3106   rtx_insn *insn;
3107   rtx accum, tem;
3108   int opno;
3109   machine_mode nmode;
3110
3111   /* Avoid referencing memory over and over and invalid sharing
3112      on SUBREGs.  */
3113   op0 = force_reg (mode, op0);
3114
3115   /* ACCUM starts out either as OP0 or as a zero, depending on
3116      the first operation.  */
3117
3118   if (alg->op[0] == alg_zero)
3119     {
3120       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3121       val_so_far = 0;
3122     }
3123   else if (alg->op[0] == alg_m)
3124     {
3125       accum = copy_to_mode_reg (mode, op0);
3126       val_so_far = 1;
3127     }
3128   else
3129     gcc_unreachable ();
3130
3131   for (opno = 1; opno < alg->ops; opno++)
3132     {
3133       int log = alg->log[opno];
3134       rtx shift_subtarget = optimize ? 0 : accum;
3135       rtx add_target
3136         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3137            && !optimize)
3138           ? target : 0;
3139       rtx accum_target = optimize ? 0 : accum;
3140       rtx accum_inner;
3141
3142       switch (alg->op[opno])
3143         {
3144         case alg_shift:
3145           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3146           /* REG_EQUAL note will be attached to the following insn.  */
3147           emit_move_insn (accum, tem);
3148           val_so_far <<= log;
3149           break;
3150
3151         case alg_add_t_m2:
3152           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3153           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3154                                  add_target ? add_target : accum_target);
3155           val_so_far += (HOST_WIDE_INT) 1 << log;
3156           break;
3157
3158         case alg_sub_t_m2:
3159           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3160           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3161                                  add_target ? add_target : accum_target);
3162           val_so_far -= (HOST_WIDE_INT) 1 << log;
3163           break;
3164
3165         case alg_add_t2_m:
3166           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3167                                 log, shift_subtarget, 0);
3168           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3169                                  add_target ? add_target : accum_target);
3170           val_so_far = (val_so_far << log) + 1;
3171           break;
3172
3173         case alg_sub_t2_m:
3174           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3175                                 log, shift_subtarget, 0);
3176           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3177                                  add_target ? add_target : accum_target);
3178           val_so_far = (val_so_far << log) - 1;
3179           break;
3180
3181         case alg_add_factor:
3182           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3183           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3184                                  add_target ? add_target : accum_target);
3185           val_so_far += val_so_far << log;
3186           break;
3187
3188         case alg_sub_factor:
3189           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3190           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3191                                  (add_target
3192                                   ? add_target : (optimize ? 0 : tem)));
3193           val_so_far = (val_so_far << log) - val_so_far;
3194           break;
3195
3196         default:
3197           gcc_unreachable ();
3198         }
3199
3200       if (SCALAR_INT_MODE_P (mode))
3201         {
3202           /* Write a REG_EQUAL note on the last insn so that we can cse
3203              multiplication sequences.  Note that if ACCUM is a SUBREG,
3204              we've set the inner register and must properly indicate that.  */
3205           tem = op0, nmode = mode;
3206           accum_inner = accum;
3207           if (GET_CODE (accum) == SUBREG)
3208             {
3209               accum_inner = SUBREG_REG (accum);
3210               nmode = GET_MODE (accum_inner);
3211               tem = gen_lowpart (nmode, op0);
3212             }
3213
3214           insn = get_last_insn ();
3215           set_dst_reg_note (insn, REG_EQUAL,
3216                             gen_rtx_MULT (nmode, tem,
3217                                           gen_int_mode (val_so_far, nmode)),
3218                             accum_inner);
3219         }
3220     }
3221
3222   if (variant == negate_variant)
3223     {
3224       val_so_far = -val_so_far;
3225       accum = expand_unop (mode, neg_optab, accum, target, 0);
3226     }
3227   else if (variant == add_variant)
3228     {
3229       val_so_far = val_so_far + 1;
3230       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3231     }
3232
3233   /* Compare only the bits of val and val_so_far that are significant
3234      in the result mode, to avoid sign-/zero-extension confusion.  */
3235   nmode = GET_MODE_INNER (mode);
3236   val &= GET_MODE_MASK (nmode);
3237   val_so_far &= GET_MODE_MASK (nmode);
3238   gcc_assert (val == val_so_far);
3239
3240   return accum;
3241 }
3242
3243 /* Perform a multiplication and return an rtx for the result.
3244    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3245    TARGET is a suggestion for where to store the result (an rtx).
3246
3247    We check specially for a constant integer as OP1.
3248    If you want this check for OP0 as well, then before calling
3249    you should swap the two operands if OP0 would be constant.  */
3250
3251 rtx
3252 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3253              int unsignedp)
3254 {
3255   enum mult_variant variant;
3256   struct algorithm algorithm;
3257   rtx scalar_op1;
3258   int max_cost;
3259   bool speed = optimize_insn_for_speed_p ();
3260   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3261
3262   if (CONSTANT_P (op0))
3263     std::swap (op0, op1);
3264
3265   /* For vectors, there are several simplifications that can be made if
3266      all elements of the vector constant are identical.  */
3267   scalar_op1 = unwrap_const_vec_duplicate (op1);
3268
3269   if (INTEGRAL_MODE_P (mode))
3270     {
3271       rtx fake_reg;
3272       HOST_WIDE_INT coeff;
3273       bool is_neg;
3274       int mode_bitsize;
3275
3276       if (op1 == CONST0_RTX (mode))
3277         return op1;
3278       if (op1 == CONST1_RTX (mode))
3279         return op0;
3280       if (op1 == CONSTM1_RTX (mode))
3281         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3282                             op0, target, 0);
3283
3284       if (do_trapv)
3285         goto skip_synth;
3286
3287       /* If mode is integer vector mode, check if the backend supports
3288          vector lshift (by scalar or vector) at all.  If not, we can't use
3289          synthetized multiply.  */
3290       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3291           && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3292           && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3293         goto skip_synth;
3294
3295       /* These are the operations that are potentially turned into
3296          a sequence of shifts and additions.  */
3297       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3298
3299       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3300          less than or equal in size to `unsigned int' this doesn't matter.
3301          If the mode is larger than `unsigned int', then synth_mult works
3302          only if the constant value exactly fits in an `unsigned int' without
3303          any truncation.  This means that multiplying by negative values does
3304          not work; results are off by 2^32 on a 32 bit machine.  */
3305       if (CONST_INT_P (scalar_op1))
3306         {
3307           coeff = INTVAL (scalar_op1);
3308           is_neg = coeff < 0;
3309         }
3310 #if TARGET_SUPPORTS_WIDE_INT
3311       else if (CONST_WIDE_INT_P (scalar_op1))
3312 #else
3313       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3314 #endif
3315         {
3316           int shift = wi::exact_log2 (std::make_pair (scalar_op1, mode));
3317           /* Perfect power of 2 (other than 1, which is handled above).  */
3318           if (shift > 0)
3319             return expand_shift (LSHIFT_EXPR, mode, op0,
3320                                  shift, target, unsignedp);
3321           else
3322             goto skip_synth;
3323         }
3324       else
3325         goto skip_synth;
3326
3327       /* We used to test optimize here, on the grounds that it's better to
3328          produce a smaller program when -O is not used.  But this causes
3329          such a terrible slowdown sometimes that it seems better to always
3330          use synth_mult.  */
3331
3332       /* Special case powers of two.  */
3333       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3334           && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3335         return expand_shift (LSHIFT_EXPR, mode, op0,
3336                              floor_log2 (coeff), target, unsignedp);
3337
3338       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3339
3340       /* Attempt to handle multiplication of DImode values by negative
3341          coefficients, by performing the multiplication by a positive
3342          multiplier and then inverting the result.  */
3343       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3344         {
3345           /* Its safe to use -coeff even for INT_MIN, as the
3346              result is interpreted as an unsigned coefficient.
3347              Exclude cost of op0 from max_cost to match the cost
3348              calculation of the synth_mult.  */
3349           coeff = -(unsigned HOST_WIDE_INT) coeff;
3350           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3351                                     mode, speed)
3352                       - neg_cost (speed, mode));
3353           if (max_cost <= 0)
3354             goto skip_synth;
3355
3356           /* Special case powers of two.  */
3357           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3358             {
3359               rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3360                                        floor_log2 (coeff), target, unsignedp);
3361               return expand_unop (mode, neg_optab, temp, target, 0);
3362             }
3363
3364           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3365                                    max_cost))
3366             {
3367               rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3368                                             &algorithm, variant);
3369               return expand_unop (mode, neg_optab, temp, target, 0);
3370             }
3371           goto skip_synth;
3372         }
3373
3374       /* Exclude cost of op0 from max_cost to match the cost
3375          calculation of the synth_mult.  */
3376       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3377       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3378         return expand_mult_const (mode, op0, coeff, target,
3379                                   &algorithm, variant);
3380     }
3381  skip_synth:
3382
3383   /* Expand x*2.0 as x+x.  */
3384   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3385       && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3386     {
3387       op0 = force_reg (GET_MODE (op0), op0);
3388       return expand_binop (mode, add_optab, op0, op0,
3389                            target, unsignedp, OPTAB_LIB_WIDEN);
3390     }
3391
3392   /* This used to use umul_optab if unsigned, but for non-widening multiply
3393      there is no difference between signed and unsigned.  */
3394   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3395                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3396   gcc_assert (op0);
3397   return op0;
3398 }
3399
3400 /* Return a cost estimate for multiplying a register by the given
3401    COEFFicient in the given MODE and SPEED.  */
3402
3403 int
3404 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3405 {
3406   int max_cost;
3407   struct algorithm algorithm;
3408   enum mult_variant variant;
3409
3410   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3411   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3412                            mode, speed);
3413   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3414     return algorithm.cost.cost;
3415   else
3416     return max_cost;
3417 }
3418
3419 /* Perform a widening multiplication and return an rtx for the result.
3420    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3421    TARGET is a suggestion for where to store the result (an rtx).
3422    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3423    or smul_widen_optab.
3424
3425    We check specially for a constant integer as OP1, comparing the
3426    cost of a widening multiply against the cost of a sequence of shifts
3427    and adds.  */
3428
3429 rtx
3430 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3431                       int unsignedp, optab this_optab)
3432 {
3433   bool speed = optimize_insn_for_speed_p ();
3434   rtx cop1;
3435
3436   if (CONST_INT_P (op1)
3437       && GET_MODE (op0) != VOIDmode
3438       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3439                                 this_optab == umul_widen_optab))
3440       && CONST_INT_P (cop1)
3441       && (INTVAL (cop1) >= 0
3442           || HWI_COMPUTABLE_MODE_P (mode)))
3443     {
3444       HOST_WIDE_INT coeff = INTVAL (cop1);
3445       int max_cost;
3446       enum mult_variant variant;
3447       struct algorithm algorithm;
3448
3449       if (coeff == 0)
3450         return CONST0_RTX (mode);
3451
3452       /* Special case powers of two.  */
3453       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3454         {
3455           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3456           return expand_shift (LSHIFT_EXPR, mode, op0,
3457                                floor_log2 (coeff), target, unsignedp);
3458         }
3459
3460       /* Exclude cost of op0 from max_cost to match the cost
3461          calculation of the synth_mult.  */
3462       max_cost = mul_widen_cost (speed, mode);
3463       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3464                                max_cost))
3465         {
3466           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3467           return expand_mult_const (mode, op0, coeff, target,
3468                                     &algorithm, variant);
3469         }
3470     }
3471   return expand_binop (mode, this_optab, op0, op1, target,
3472                        unsignedp, OPTAB_LIB_WIDEN);
3473 }
3474 \f
3475 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3476    replace division by D, and put the least significant N bits of the result
3477    in *MULTIPLIER_PTR and return the most significant bit.
3478
3479    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3480    needed precision is in PRECISION (should be <= N).
3481
3482    PRECISION should be as small as possible so this function can choose
3483    multiplier more freely.
3484
3485    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3486    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3487
3488    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3489    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3490
3491 unsigned HOST_WIDE_INT
3492 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3493                    unsigned HOST_WIDE_INT *multiplier_ptr,
3494                    int *post_shift_ptr, int *lgup_ptr)
3495 {
3496   int lgup, post_shift;
3497   int pow, pow2;
3498
3499   /* lgup = ceil(log2(divisor)); */
3500   lgup = ceil_log2 (d);
3501
3502   gcc_assert (lgup <= n);
3503
3504   pow = n + lgup;
3505   pow2 = n + lgup - precision;
3506
3507   /* mlow = 2^(N + lgup)/d */
3508   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3509   wide_int mlow = wi::udiv_trunc (val, d);
3510
3511   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3512   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3513   wide_int mhigh = wi::udiv_trunc (val, d);
3514
3515   /* If precision == N, then mlow, mhigh exceed 2^N
3516      (but they do not exceed 2^(N+1)).  */
3517
3518   /* Reduce to lowest terms.  */
3519   for (post_shift = lgup; post_shift > 0; post_shift--)
3520     {
3521       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3522                                                        HOST_BITS_PER_WIDE_INT);
3523       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3524                                                        HOST_BITS_PER_WIDE_INT);
3525       if (ml_lo >= mh_lo)
3526         break;
3527
3528       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3529       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3530     }
3531
3532   *post_shift_ptr = post_shift;
3533   *lgup_ptr = lgup;
3534   if (n < HOST_BITS_PER_WIDE_INT)
3535     {
3536       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3537       *multiplier_ptr = mhigh.to_uhwi () & mask;
3538       return mhigh.to_uhwi () >= mask;
3539     }
3540   else
3541     {
3542       *multiplier_ptr = mhigh.to_uhwi ();
3543       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3544     }
3545 }
3546
3547 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3548    congruent to 1 (mod 2**N).  */
3549
3550 static unsigned HOST_WIDE_INT
3551 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3552 {
3553   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3554
3555   /* The algorithm notes that the choice y = x satisfies
3556      x*y == 1 mod 2^3, since x is assumed odd.
3557      Each iteration doubles the number of bits of significance in y.  */
3558
3559   unsigned HOST_WIDE_INT mask;
3560   unsigned HOST_WIDE_INT y = x;
3561   int nbit = 3;
3562
3563   mask = (n == HOST_BITS_PER_WIDE_INT
3564           ? ~(unsigned HOST_WIDE_INT) 0
3565           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3566
3567   while (nbit < n)
3568     {
3569       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3570       nbit *= 2;
3571     }
3572   return y;
3573 }
3574
3575 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3576    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3577    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3578    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3579    become signed.
3580
3581    The result is put in TARGET if that is convenient.
3582
3583    MODE is the mode of operation.  */
3584
3585 rtx
3586 expand_mult_highpart_adjust (machine_mode mode, rtx adj_operand, rtx op0,
3587                              rtx op1, rtx target, int unsignedp)
3588 {
3589   rtx tem;
3590   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3591
3592   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3593                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3594   tem = expand_and (mode, tem, op1, NULL_RTX);
3595   adj_operand
3596     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3597                      adj_operand);
3598
3599   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3600                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3601   tem = expand_and (mode, tem, op0, NULL_RTX);
3602   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3603                           target);
3604
3605   return target;
3606 }
3607
3608 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3609
3610 static rtx
3611 extract_high_half (machine_mode mode, rtx op)
3612 {
3613   machine_mode wider_mode;
3614
3615   if (mode == word_mode)
3616     return gen_highpart (mode, op);
3617
3618   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3619
3620   wider_mode = GET_MODE_WIDER_MODE (mode);
3621   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3622                      GET_MODE_BITSIZE (mode), 0, 1);
3623   return convert_modes (mode, wider_mode, op, 0);
3624 }
3625
3626 /* Like expmed_mult_highpart, but only consider using a multiplication
3627    optab.  OP1 is an rtx for the constant operand.  */
3628
3629 static rtx
3630 expmed_mult_highpart_optab (machine_mode mode, rtx op0, rtx op1,
3631                             rtx target, int unsignedp, int max_cost)
3632 {
3633   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3634   machine_mode wider_mode;
3635   optab moptab;
3636   rtx tem;
3637   int size;
3638   bool speed = optimize_insn_for_speed_p ();
3639
3640   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3641
3642   wider_mode = GET_MODE_WIDER_MODE (mode);
3643   size = GET_MODE_BITSIZE (mode);
3644
3645   /* Firstly, try using a multiplication insn that only generates the needed
3646      high part of the product, and in the sign flavor of unsignedp.  */
3647   if (mul_highpart_cost (speed, mode) < max_cost)
3648     {
3649       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3650       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3651                           unsignedp, OPTAB_DIRECT);
3652       if (tem)
3653         return tem;
3654     }
3655
3656   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3657      Need to adjust the result after the multiplication.  */
3658   if (size - 1 < BITS_PER_WORD
3659       && (mul_highpart_cost (speed, mode)
3660           + 2 * shift_cost (speed, mode, size-1)
3661           + 4 * add_cost (speed, mode) < max_cost))
3662     {
3663       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3664       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3665                           unsignedp, OPTAB_DIRECT);
3666       if (tem)
3667         /* We used the wrong signedness.  Adjust the result.  */
3668         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3669                                             tem, unsignedp);
3670     }
3671
3672   /* Try widening multiplication.  */
3673   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3674   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3675       && mul_widen_cost (speed, wider_mode) < max_cost)
3676     {
3677       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3678                           unsignedp, OPTAB_WIDEN);
3679       if (tem)
3680         return extract_high_half (mode, tem);
3681     }
3682
3683   /* Try widening the mode and perform a non-widening multiplication.  */
3684   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3685       && size - 1 < BITS_PER_WORD
3686       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3687           < max_cost))
3688     {
3689       rtx_insn *insns;
3690       rtx wop0, wop1;
3691
3692       /* We need to widen the operands, for example to ensure the
3693          constant multiplier is correctly sign or zero extended.
3694          Use a sequence to clean-up any instructions emitted by
3695          the conversions if things don't work out.  */
3696       start_sequence ();
3697       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3698       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3699       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3700                           unsignedp, OPTAB_WIDEN);
3701       insns = get_insns ();
3702       end_sequence ();
3703
3704       if (tem)
3705         {
3706           emit_insn (insns);
3707           return extract_high_half (mode, tem);
3708         }
3709     }
3710
3711   /* Try widening multiplication of opposite signedness, and adjust.  */
3712   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3713   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3714       && size - 1 < BITS_PER_WORD
3715       && (mul_widen_cost (speed, wider_mode)
3716           + 2 * shift_cost (speed, mode, size-1)
3717           + 4 * add_cost (speed, mode) < max_cost))
3718     {
3719       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3720                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3721       if (tem != 0)
3722         {
3723           tem = extract_high_half (mode, tem);
3724           /* We used the wrong signedness.  Adjust the result.  */
3725           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3726                                               target, unsignedp);
3727         }
3728     }
3729
3730   return 0;
3731 }
3732
3733 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3734    putting the high half of the result in TARGET if that is convenient,
3735    and return where the result is.  If the operation can not be performed,
3736    0 is returned.
3737
3738    MODE is the mode of operation and result.
3739
3740    UNSIGNEDP nonzero means unsigned multiply.
3741
3742    MAX_COST is the total allowed cost for the expanded RTL.  */
3743
3744 static rtx
3745 expmed_mult_highpart (machine_mode mode, rtx op0, rtx op1,
3746                       rtx target, int unsignedp, int max_cost)
3747 {
3748   machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3749   unsigned HOST_WIDE_INT cnst1;
3750   int extra_cost;
3751   bool sign_adjust = false;
3752   enum mult_variant variant;
3753   struct algorithm alg;
3754   rtx tem;
3755   bool speed = optimize_insn_for_speed_p ();
3756
3757   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3758   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3759   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3760
3761   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3762
3763   /* We can't optimize modes wider than BITS_PER_WORD.
3764      ??? We might be able to perform double-word arithmetic if
3765      mode == word_mode, however all the cost calculations in
3766      synth_mult etc. assume single-word operations.  */
3767   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3768     return expmed_mult_highpart_optab (mode, op0, op1, target,
3769                                        unsignedp, max_cost);
3770
3771   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3772
3773   /* Check whether we try to multiply by a negative constant.  */
3774   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3775     {
3776       sign_adjust = true;
3777       extra_cost += add_cost (speed, mode);
3778     }
3779
3780   /* See whether shift/add multiplication is cheap enough.  */
3781   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3782                            max_cost - extra_cost))
3783     {
3784       /* See whether the specialized multiplication optabs are
3785          cheaper than the shift/add version.  */
3786       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3787                                         alg.cost.cost + extra_cost);
3788       if (tem)
3789         return tem;
3790
3791       tem = convert_to_mode (wider_mode, op0, unsignedp);
3792       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3793       tem = extract_high_half (mode, tem);
3794
3795       /* Adjust result for signedness.  */
3796       if (sign_adjust)
3797         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3798
3799       return tem;
3800     }
3801   return expmed_mult_highpart_optab (mode, op0, op1, target,
3802                                      unsignedp, max_cost);
3803 }
3804
3805
3806 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3807
3808 static rtx
3809 expand_smod_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3810 {
3811   rtx result, temp, shift;
3812   rtx_code_label *label;
3813   int logd;
3814   int prec = GET_MODE_PRECISION (mode);
3815
3816   logd = floor_log2 (d);
3817   result = gen_reg_rtx (mode);
3818
3819   /* Avoid conditional branches when they're expensive.  */
3820   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3821       && optimize_insn_for_speed_p ())
3822     {
3823       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3824                                       mode, 0, -1);
3825       if (signmask)
3826         {
3827           HOST_WIDE_INT masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3828           signmask = force_reg (mode, signmask);
3829           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3830
3831           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3832              which instruction sequence to use.  If logical right shifts
3833              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3834              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3835
3836           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3837           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3838               || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
3839                   > COSTS_N_INSNS (2)))
3840             {
3841               temp = expand_binop (mode, xor_optab, op0, signmask,
3842                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3843               temp = expand_binop (mode, sub_optab, temp, signmask,
3844                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3845               temp = expand_binop (mode, and_optab, temp,
3846                                    gen_int_mode (masklow, mode),
3847                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3848               temp = expand_binop (mode, xor_optab, temp, signmask,
3849                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3850               temp = expand_binop (mode, sub_optab, temp, signmask,
3851                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3852             }
3853           else
3854             {
3855               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3856                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3857               signmask = force_reg (mode, signmask);
3858
3859               temp = expand_binop (mode, add_optab, op0, signmask,
3860                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3861               temp = expand_binop (mode, and_optab, temp,
3862                                    gen_int_mode (masklow, mode),
3863                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3864               temp = expand_binop (mode, sub_optab, temp, signmask,
3865                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3866             }
3867           return temp;
3868         }
3869     }
3870
3871   /* Mask contains the mode's signbit and the significant bits of the
3872      modulus.  By including the signbit in the operation, many targets
3873      can avoid an explicit compare operation in the following comparison
3874      against zero.  */
3875   wide_int mask = wi::mask (logd, false, prec);
3876   mask = wi::set_bit (mask, prec - 1);
3877
3878   temp = expand_binop (mode, and_optab, op0,
3879                        immed_wide_int_const (mask, mode),
3880                        result, 1, OPTAB_LIB_WIDEN);
3881   if (temp != result)
3882     emit_move_insn (result, temp);
3883
3884   label = gen_label_rtx ();
3885   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3886
3887   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3888                        0, OPTAB_LIB_WIDEN);
3889
3890   mask = wi::mask (logd, true, prec);
3891   temp = expand_binop (mode, ior_optab, temp,
3892                        immed_wide_int_const (mask, mode),
3893                        result, 1, OPTAB_LIB_WIDEN);
3894   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3895                        0, OPTAB_LIB_WIDEN);
3896   if (temp != result)
3897     emit_move_insn (result, temp);
3898   emit_label (label);
3899   return result;
3900 }
3901
3902 /* Expand signed division of OP0 by a power of two D in mode MODE.
3903    This routine is only called for positive values of D.  */
3904
3905 static rtx
3906 expand_sdiv_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3907 {
3908   rtx temp;
3909   rtx_code_label *label;
3910   int logd;
3911
3912   logd = floor_log2 (d);
3913
3914   if (d == 2
3915       && BRANCH_COST (optimize_insn_for_speed_p (),
3916                       false) >= 1)
3917     {
3918       temp = gen_reg_rtx (mode);
3919       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3920       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3921                            0, OPTAB_LIB_WIDEN);
3922       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3923     }
3924
3925   if (HAVE_conditional_move
3926       && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
3927     {
3928       rtx temp2;
3929
3930       start_sequence ();
3931       temp2 = copy_to_mode_reg (mode, op0);
3932       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3933                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3934       temp = force_reg (mode, temp);
3935
3936       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3937       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3938                                      mode, temp, temp2, mode, 0);
3939       if (temp2)
3940         {
3941           rtx_insn *seq = get_insns ();
3942           end_sequence ();
3943           emit_insn (seq);
3944           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3945         }
3946       end_sequence ();
3947     }
3948
3949   if (BRANCH_COST (optimize_insn_for_speed_p (),
3950                    false) >= 2)
3951     {
3952       int ushift = GET_MODE_BITSIZE (mode) - logd;
3953
3954       temp = gen_reg_rtx (mode);
3955       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3956       if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
3957           || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3958              > COSTS_N_INSNS (1))
3959         temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3960                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3961       else
3962         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3963                              ushift, NULL_RTX, 1);
3964       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3965                            0, OPTAB_LIB_WIDEN);
3966       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3967     }
3968
3969   label = gen_label_rtx ();
3970   temp = copy_to_mode_reg (mode, op0);
3971   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3972   expand_inc (temp, gen_int_mode (d - 1, mode));
3973   emit_label (label);
3974   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3975 }
3976 \f
3977 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3978    if that is convenient, and returning where the result is.
3979    You may request either the quotient or the remainder as the result;
3980    specify REM_FLAG nonzero to get the remainder.
3981
3982    CODE is the expression code for which kind of division this is;
3983    it controls how rounding is done.  MODE is the machine mode to use.
3984    UNSIGNEDP nonzero means do unsigned division.  */
3985
3986 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3987    and then correct it by or'ing in missing high bits
3988    if result of ANDI is nonzero.
3989    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3990    This could optimize to a bfexts instruction.
3991    But C doesn't use these operations, so their optimizations are
3992    left for later.  */
3993 /* ??? For modulo, we don't actually need the highpart of the first product,
3994    the low part will do nicely.  And for small divisors, the second multiply
3995    can also be a low-part only multiply or even be completely left out.
3996    E.g. to calculate the remainder of a division by 3 with a 32 bit
3997    multiply, multiply with 0x55555556 and extract the upper two bits;
3998    the result is exact for inputs up to 0x1fffffff.
3999    The input range can be reduced by using cross-sum rules.
4000    For odd divisors >= 3, the following table gives right shift counts
4001    so that if a number is shifted by an integer multiple of the given
4002    amount, the remainder stays the same:
4003    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
4004    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
4005    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
4006    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
4007    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4008
4009    Cross-sum rules for even numbers can be derived by leaving as many bits
4010    to the right alone as the divisor has zeros to the right.
4011    E.g. if x is an unsigned 32 bit number:
4012    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4013    */
4014
4015 rtx
4016 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
4017                rtx op0, rtx op1, rtx target, int unsignedp)
4018 {
4019   machine_mode compute_mode;
4020   rtx tquotient;
4021   rtx quotient = 0, remainder = 0;
4022   rtx_insn *last;
4023   int size;
4024   rtx_insn *insn;
4025   optab optab1, optab2;
4026   int op1_is_constant, op1_is_pow2 = 0;
4027   int max_cost, extra_cost;
4028   static HOST_WIDE_INT last_div_const = 0;
4029   bool speed = optimize_insn_for_speed_p ();
4030
4031   op1_is_constant = CONST_INT_P (op1);
4032   if (op1_is_constant)
4033     {
4034       unsigned HOST_WIDE_INT ext_op1 = UINTVAL (op1);
4035       if (unsignedp)
4036         ext_op1 &= GET_MODE_MASK (mode);
4037       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
4038                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
4039     }
4040
4041   /*
4042      This is the structure of expand_divmod:
4043
4044      First comes code to fix up the operands so we can perform the operations
4045      correctly and efficiently.
4046
4047      Second comes a switch statement with code specific for each rounding mode.
4048      For some special operands this code emits all RTL for the desired
4049      operation, for other cases, it generates only a quotient and stores it in
4050      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
4051      to indicate that it has not done anything.
4052
4053      Last comes code that finishes the operation.  If QUOTIENT is set and
4054      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
4055      QUOTIENT is not set, it is computed using trunc rounding.
4056
4057      We try to generate special code for division and remainder when OP1 is a
4058      constant.  If |OP1| = 2**n we can use shifts and some other fast
4059      operations.  For other values of OP1, we compute a carefully selected
4060      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4061      by m.
4062
4063      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4064      half of the product.  Different strategies for generating the product are
4065      implemented in expmed_mult_highpart.
4066
4067      If what we actually want is the remainder, we generate that by another
4068      by-constant multiplication and a subtraction.  */
4069
4070   /* We shouldn't be called with OP1 == const1_rtx, but some of the
4071      code below will malfunction if we are, so check here and handle
4072      the special case if so.  */
4073   if (op1 == const1_rtx)
4074     return rem_flag ? const0_rtx : op0;
4075
4076     /* When dividing by -1, we could get an overflow.
4077      negv_optab can handle overflows.  */
4078   if (! unsignedp && op1 == constm1_rtx)
4079     {
4080       if (rem_flag)
4081         return const0_rtx;
4082       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4083                           ? negv_optab : neg_optab, op0, target, 0);
4084     }
4085
4086   if (target
4087       /* Don't use the function value register as a target
4088          since we have to read it as well as write it,
4089          and function-inlining gets confused by this.  */
4090       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4091           /* Don't clobber an operand while doing a multi-step calculation.  */
4092           || ((rem_flag || op1_is_constant)
4093               && (reg_mentioned_p (target, op0)
4094                   || (MEM_P (op0) && MEM_P (target))))
4095           || reg_mentioned_p (target, op1)
4096           || (MEM_P (op1) && MEM_P (target))))
4097     target = 0;
4098
4099   /* Get the mode in which to perform this computation.  Normally it will
4100      be MODE, but sometimes we can't do the desired operation in MODE.
4101      If so, pick a wider mode in which we can do the operation.  Convert
4102      to that mode at the start to avoid repeated conversions.
4103
4104      First see what operations we need.  These depend on the expression
4105      we are evaluating.  (We assume that divxx3 insns exist under the
4106      same conditions that modxx3 insns and that these insns don't normally
4107      fail.  If these assumptions are not correct, we may generate less
4108      efficient code in some cases.)
4109
4110      Then see if we find a mode in which we can open-code that operation
4111      (either a division, modulus, or shift).  Finally, check for the smallest
4112      mode for which we can do the operation with a library call.  */
4113
4114   /* We might want to refine this now that we have division-by-constant
4115      optimization.  Since expmed_mult_highpart tries so many variants, it is
4116      not straightforward to generalize this.  Maybe we should make an array
4117      of possible modes in init_expmed?  Save this for GCC 2.7.  */
4118
4119   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
4120             ? (unsignedp ? lshr_optab : ashr_optab)
4121             : (unsignedp ? udiv_optab : sdiv_optab));
4122   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
4123             ? optab1
4124             : (unsignedp ? udivmod_optab : sdivmod_optab));
4125
4126   for (compute_mode = mode; compute_mode != VOIDmode;
4127        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4128     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4129         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4130       break;
4131
4132   if (compute_mode == VOIDmode)
4133     for (compute_mode = mode; compute_mode != VOIDmode;
4134          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4135       if (optab_libfunc (optab1, compute_mode)
4136           || optab_libfunc (optab2, compute_mode))
4137         break;
4138
4139   /* If we still couldn't find a mode, use MODE, but expand_binop will
4140      probably die.  */
4141   if (compute_mode == VOIDmode)
4142     compute_mode = mode;
4143
4144   if (target && GET_MODE (target) == compute_mode)
4145     tquotient = target;
4146   else
4147     tquotient = gen_reg_rtx (compute_mode);
4148
4149   size = GET_MODE_BITSIZE (compute_mode);
4150 #if 0
4151   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4152      (mode), and thereby get better code when OP1 is a constant.  Do that
4153      later.  It will require going over all usages of SIZE below.  */
4154   size = GET_MODE_BITSIZE (mode);
4155 #endif
4156
4157   /* Only deduct something for a REM if the last divide done was
4158      for a different constant.   Then set the constant of the last
4159      divide.  */
4160   max_cost = (unsignedp
4161               ? udiv_cost (speed, compute_mode)
4162               : sdiv_cost (speed, compute_mode));
4163   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4164                      && INTVAL (op1) == last_div_const))
4165     max_cost -= (mul_cost (speed, compute_mode)
4166                  + add_cost (speed, compute_mode));
4167
4168   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4169
4170   /* Now convert to the best mode to use.  */
4171   if (compute_mode != mode)
4172     {
4173       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4174       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4175
4176       /* convert_modes may have placed op1 into a register, so we
4177          must recompute the following.  */
4178       op1_is_constant = CONST_INT_P (op1);
4179       op1_is_pow2 = (op1_is_constant
4180                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4181                           || (! unsignedp
4182                               && EXACT_POWER_OF_2_OR_ZERO_P (-UINTVAL (op1))))));
4183     }
4184
4185   /* If one of the operands is a volatile MEM, copy it into a register.  */
4186
4187   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4188     op0 = force_reg (compute_mode, op0);
4189   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4190     op1 = force_reg (compute_mode, op1);
4191
4192   /* If we need the remainder or if OP1 is constant, we need to
4193      put OP0 in a register in case it has any queued subexpressions.  */
4194   if (rem_flag || op1_is_constant)
4195     op0 = force_reg (compute_mode, op0);
4196
4197   last = get_last_insn ();
4198
4199   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4200   if (unsignedp)
4201     {
4202       if (code == FLOOR_DIV_EXPR)
4203         code = TRUNC_DIV_EXPR;
4204       if (code == FLOOR_MOD_EXPR)
4205         code = TRUNC_MOD_EXPR;
4206       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4207         code = TRUNC_DIV_EXPR;
4208     }
4209
4210   if (op1 != const0_rtx)
4211     switch (code)
4212       {
4213       case TRUNC_MOD_EXPR:
4214       case TRUNC_DIV_EXPR:
4215         if (op1_is_constant)
4216           {
4217             if (unsignedp)
4218               {
4219                 unsigned HOST_WIDE_INT mh, ml;
4220                 int pre_shift, post_shift;
4221                 int dummy;
4222                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4223                                             & GET_MODE_MASK (compute_mode));
4224
4225                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4226                   {
4227                     pre_shift = floor_log2 (d);
4228                     if (rem_flag)
4229                       {
4230                         unsigned HOST_WIDE_INT mask
4231                           = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4232                         remainder
4233                           = expand_binop (compute_mode, and_optab, op0,
4234                                           gen_int_mode (mask, compute_mode),
4235                                           remainder, 1,
4236                                           OPTAB_LIB_WIDEN);
4237                         if (remainder)
4238                           return gen_lowpart (mode, remainder);
4239                       }
4240                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4241                                              pre_shift, tquotient, 1);
4242                   }
4243                 else if (size <= HOST_BITS_PER_WIDE_INT)
4244                   {
4245                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4246                       {
4247                         /* Most significant bit of divisor is set; emit an scc
4248                            insn.  */
4249                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4250                                                           compute_mode, 1, 1);
4251                       }
4252                     else
4253                       {
4254                         /* Find a suitable multiplier and right shift count
4255                            instead of multiplying with D.  */
4256
4257                         mh = choose_multiplier (d, size, size,
4258                                                 &ml, &post_shift, &dummy);
4259
4260                         /* If the suggested multiplier is more than SIZE bits,
4261                            we can do better for even divisors, using an
4262                            initial right shift.  */
4263                         if (mh != 0 && (d & 1) == 0)
4264                           {
4265                             pre_shift = floor_log2 (d & -d);
4266                             mh = choose_multiplier (d >> pre_shift, size,
4267                                                     size - pre_shift,
4268                                                     &ml, &post_shift, &dummy);
4269                             gcc_assert (!mh);
4270                           }
4271                         else
4272                           pre_shift = 0;
4273
4274                         if (mh != 0)
4275                           {
4276                             rtx t1, t2, t3, t4;
4277
4278                             if (post_shift - 1 >= BITS_PER_WORD)
4279                               goto fail1;
4280
4281                             extra_cost
4282                               = (shift_cost (speed, compute_mode, post_shift - 1)
4283                                  + shift_cost (speed, compute_mode, 1)
4284                                  + 2 * add_cost (speed, compute_mode));
4285                             t1 = expmed_mult_highpart
4286                               (compute_mode, op0,
4287                                gen_int_mode (ml, compute_mode),
4288                                NULL_RTX, 1, max_cost - extra_cost);
4289                             if (t1 == 0)
4290                               goto fail1;
4291                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4292                                                                op0, t1),
4293                                                 NULL_RTX);
4294                             t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4295                                                t2, 1, NULL_RTX, 1);
4296                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4297                                                               t1, t3),
4298                                                 NULL_RTX);
4299                             quotient = expand_shift
4300                               (RSHIFT_EXPR, compute_mode, t4,
4301                                post_shift - 1, tquotient, 1);
4302                           }
4303                         else
4304                           {
4305                             rtx t1, t2;
4306
4307                             if (pre_shift >= BITS_PER_WORD
4308                                 || post_shift >= BITS_PER_WORD)
4309                               goto fail1;
4310
4311                             t1 = expand_shift
4312                               (RSHIFT_EXPR, compute_mode, op0,
4313                                pre_shift, NULL_RTX, 1);
4314                             extra_cost
4315                               = (shift_cost (speed, compute_mode, pre_shift)
4316                                  + shift_cost (speed, compute_mode, post_shift));
4317                             t2 = expmed_mult_highpart
4318                               (compute_mode, t1,
4319                                gen_int_mode (ml, compute_mode),
4320                                NULL_RTX, 1, max_cost - extra_cost);
4321                             if (t2 == 0)
4322                               goto fail1;
4323                             quotient = expand_shift
4324                               (RSHIFT_EXPR, compute_mode, t2,
4325                                post_shift, tquotient, 1);
4326                           }
4327                       }
4328                   }
4329                 else            /* Too wide mode to use tricky code */
4330                   break;
4331
4332                 insn = get_last_insn ();
4333                 if (insn != last)
4334                   set_dst_reg_note (insn, REG_EQUAL,
4335                                     gen_rtx_UDIV (compute_mode, op0, op1),
4336                                     quotient);
4337               }
4338             else                /* TRUNC_DIV, signed */
4339               {
4340                 unsigned HOST_WIDE_INT ml;
4341                 int lgup, post_shift;
4342                 rtx mlr;
4343                 HOST_WIDE_INT d = INTVAL (op1);
4344                 unsigned HOST_WIDE_INT abs_d;
4345
4346                 /* Since d might be INT_MIN, we have to cast to
4347                    unsigned HOST_WIDE_INT before negating to avoid
4348                    undefined signed overflow.  */
4349                 abs_d = (d >= 0
4350                          ? (unsigned HOST_WIDE_INT) d
4351                          : - (unsigned HOST_WIDE_INT) d);
4352
4353                 /* n rem d = n rem -d */
4354                 if (rem_flag && d < 0)
4355                   {
4356                     d = abs_d;
4357                     op1 = gen_int_mode (abs_d, compute_mode);
4358                   }
4359
4360                 if (d == 1)
4361                   quotient = op0;
4362                 else if (d == -1)
4363                   quotient = expand_unop (compute_mode, neg_optab, op0,
4364                                           tquotient, 0);
4365                 else if (HOST_BITS_PER_WIDE_INT >= size
4366                          && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4367                   {
4368                     /* This case is not handled correctly below.  */
4369                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4370                                                 compute_mode, 1, 1);
4371                     if (quotient == 0)
4372                       goto fail1;
4373                   }
4374                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4375                          && (rem_flag
4376                              ? smod_pow2_cheap (speed, compute_mode)
4377                              : sdiv_pow2_cheap (speed, compute_mode))
4378                          /* We assume that cheap metric is true if the
4379                             optab has an expander for this mode.  */
4380                          && ((optab_handler ((rem_flag ? smod_optab
4381                                               : sdiv_optab),
4382                                              compute_mode)
4383                               != CODE_FOR_nothing)
4384                              || (optab_handler (sdivmod_optab,
4385                                                 compute_mode)
4386                                  != CODE_FOR_nothing)))
4387                   ;
4388                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4389                   {
4390                     if (rem_flag)
4391                       {
4392                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4393                         if (remainder)
4394                           return gen_lowpart (mode, remainder);
4395                       }
4396
4397                     if (sdiv_pow2_cheap (speed, compute_mode)
4398                         && ((optab_handler (sdiv_optab, compute_mode)
4399                              != CODE_FOR_nothing)
4400                             || (optab_handler (sdivmod_optab, compute_mode)
4401                                 != CODE_FOR_nothing)))
4402                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4403                                                 compute_mode, op0,
4404                                                 gen_int_mode (abs_d,
4405                                                               compute_mode),
4406                                                 NULL_RTX, 0);
4407                     else
4408                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4409
4410                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4411                        negate the quotient.  */
4412                     if (d < 0)
4413                       {
4414                         insn = get_last_insn ();
4415                         if (insn != last
4416                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4417                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4418                           set_dst_reg_note (insn, REG_EQUAL,
4419                                             gen_rtx_DIV (compute_mode, op0,
4420                                                          gen_int_mode
4421                                                            (abs_d,
4422                                                             compute_mode)),
4423                                             quotient);
4424
4425                         quotient = expand_unop (compute_mode, neg_optab,
4426                                                 quotient, quotient, 0);
4427                       }
4428                   }
4429                 else if (size <= HOST_BITS_PER_WIDE_INT)
4430                   {
4431                     choose_multiplier (abs_d, size, size - 1,
4432                                        &ml, &post_shift, &lgup);
4433                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4434                       {
4435                         rtx t1, t2, t3;
4436
4437                         if (post_shift >= BITS_PER_WORD
4438                             || size - 1 >= BITS_PER_WORD)
4439                           goto fail1;
4440
4441                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4442                                       + shift_cost (speed, compute_mode, size - 1)
4443                                       + add_cost (speed, compute_mode));
4444                         t1 = expmed_mult_highpart
4445                           (compute_mode, op0, gen_int_mode (ml, compute_mode),
4446                            NULL_RTX, 0, max_cost - extra_cost);
4447                         if (t1 == 0)
4448                           goto fail1;
4449                         t2 = expand_shift
4450                           (RSHIFT_EXPR, compute_mode, t1,
4451                            post_shift, NULL_RTX, 0);
4452                         t3 = expand_shift
4453                           (RSHIFT_EXPR, compute_mode, op0,
4454                            size - 1, NULL_RTX, 0);
4455                         if (d < 0)
4456                           quotient
4457                             = force_operand (gen_rtx_MINUS (compute_mode,
4458                                                             t3, t2),
4459                                              tquotient);
4460                         else
4461                           quotient
4462                             = force_operand (gen_rtx_MINUS (compute_mode,
4463                                                             t2, t3),
4464                                              tquotient);
4465                       }
4466                     else
4467                       {
4468                         rtx t1, t2, t3, t4;
4469
4470                         if (post_shift >= BITS_PER_WORD
4471                             || size - 1 >= BITS_PER_WORD)
4472                           goto fail1;
4473
4474                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4475                         mlr = gen_int_mode (ml, compute_mode);
4476                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4477                                       + shift_cost (speed, compute_mode, size - 1)
4478                                       + 2 * add_cost (speed, compute_mode));
4479                         t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4480                                                    NULL_RTX, 0,
4481                                                    max_cost - extra_cost);
4482                         if (t1 == 0)
4483                           goto fail1;
4484                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4485                                                           t1, op0),
4486                                             NULL_RTX);
4487                         t3 = expand_shift
4488                           (RSHIFT_EXPR, compute_mode, t2,
4489                            post_shift, NULL_RTX, 0);
4490                         t4 = expand_shift
4491                           (RSHIFT_EXPR, compute_mode, op0,
4492                            size - 1, NULL_RTX, 0);
4493                         if (d < 0)
4494                           quotient
4495                             = force_operand (gen_rtx_MINUS (compute_mode,
4496                                                             t4, t3),
4497                                              tquotient);
4498                         else
4499                           quotient
4500                             = force_operand (gen_rtx_MINUS (compute_mode,
4501                                                             t3, t4),
4502                                              tquotient);
4503                       }
4504                   }
4505                 else            /* Too wide mode to use tricky code */
4506                   break;
4507
4508                 insn = get_last_insn ();
4509                 if (insn != last)
4510                   set_dst_reg_note (insn, REG_EQUAL,
4511                                     gen_rtx_DIV (compute_mode, op0, op1),
4512                                     quotient);
4513               }
4514             break;
4515           }
4516       fail1:
4517         delete_insns_since (last);
4518         break;
4519
4520       case FLOOR_DIV_EXPR:
4521       case FLOOR_MOD_EXPR:
4522       /* We will come here only for signed operations.  */
4523         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4524           {
4525             unsigned HOST_WIDE_INT mh, ml;
4526             int pre_shift, lgup, post_shift;
4527             HOST_WIDE_INT d = INTVAL (op1);
4528
4529             if (d > 0)
4530               {
4531                 /* We could just as easily deal with negative constants here,
4532                    but it does not seem worth the trouble for GCC 2.6.  */
4533                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4534                   {
4535                     pre_shift = floor_log2 (d);
4536                     if (rem_flag)
4537                       {
4538                         unsigned HOST_WIDE_INT mask
4539                           = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4540                         remainder = expand_binop
4541                           (compute_mode, and_optab, op0,
4542                            gen_int_mode (mask, compute_mode),
4543                            remainder, 0, OPTAB_LIB_WIDEN);
4544                         if (remainder)
4545                           return gen_lowpart (mode, remainder);
4546                       }
4547                     quotient = expand_shift
4548                       (RSHIFT_EXPR, compute_mode, op0,
4549                        pre_shift, tquotient, 0);
4550                   }
4551                 else
4552                   {
4553                     rtx t1, t2, t3, t4;
4554
4555                     mh = choose_multiplier (d, size, size - 1,
4556                                             &ml, &post_shift, &lgup);
4557                     gcc_assert (!mh);
4558
4559                     if (post_shift < BITS_PER_WORD
4560                         && size - 1 < BITS_PER_WORD)
4561                       {
4562                         t1 = expand_shift
4563                           (RSHIFT_EXPR, compute_mode, op0,
4564                            size - 1, NULL_RTX, 0);
4565                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4566                                            NULL_RTX, 0, OPTAB_WIDEN);
4567                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4568                                       + shift_cost (speed, compute_mode, size - 1)
4569                                       + 2 * add_cost (speed, compute_mode));
4570                         t3 = expmed_mult_highpart
4571                           (compute_mode, t2, gen_int_mode (ml, compute_mode),
4572                            NULL_RTX, 1, max_cost - extra_cost);
4573                         if (t3 != 0)
4574                           {
4575                             t4 = expand_shift
4576                               (RSHIFT_EXPR, compute_mode, t3,
4577                                post_shift, NULL_RTX, 1);
4578                             quotient = expand_binop (compute_mode, xor_optab,
4579                                                      t4, t1, tquotient, 0,
4580                                                      OPTAB_WIDEN);
4581                           }
4582                       }
4583                   }
4584               }
4585             else
4586               {
4587                 rtx nsign, t1, t2, t3, t4;
4588                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4589                                                   op0, constm1_rtx), NULL_RTX);
4590                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4591                                    0, OPTAB_WIDEN);
4592                 nsign = expand_shift
4593                   (RSHIFT_EXPR, compute_mode, t2,
4594                    size - 1, NULL_RTX, 0);
4595                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4596                                     NULL_RTX);
4597                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4598                                     NULL_RTX, 0);
4599                 if (t4)
4600                   {
4601                     rtx t5;
4602                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4603                                       NULL_RTX, 0);
4604                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4605                                                             t4, t5),
4606                                               tquotient);
4607                   }
4608               }
4609           }
4610
4611         if (quotient != 0)
4612           break;
4613         delete_insns_since (last);
4614
4615         /* Try using an instruction that produces both the quotient and
4616            remainder, using truncation.  We can easily compensate the quotient
4617            or remainder to get floor rounding, once we have the remainder.
4618            Notice that we compute also the final remainder value here,
4619            and return the result right away.  */
4620         if (target == 0 || GET_MODE (target) != compute_mode)
4621           target = gen_reg_rtx (compute_mode);
4622
4623         if (rem_flag)
4624           {
4625             remainder
4626               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4627             quotient = gen_reg_rtx (compute_mode);
4628           }
4629         else
4630           {
4631             quotient
4632               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4633             remainder = gen_reg_rtx (compute_mode);
4634           }
4635
4636         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4637                                  quotient, remainder, 0))
4638           {
4639             /* This could be computed with a branch-less sequence.
4640                Save that for later.  */
4641             rtx tem;
4642             rtx_code_label *label = gen_label_rtx ();
4643             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4644             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4645                                 NULL_RTX, 0, OPTAB_WIDEN);
4646             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4647             expand_dec (quotient, const1_rtx);
4648             expand_inc (remainder, op1);
4649             emit_label (label);
4650             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4651           }
4652
4653         /* No luck with division elimination or divmod.  Have to do it
4654            by conditionally adjusting op0 *and* the result.  */
4655         {
4656           rtx_code_label *label1, *label2, *label3, *label4, *label5;
4657           rtx adjusted_op0;
4658           rtx tem;
4659
4660           quotient = gen_reg_rtx (compute_mode);
4661           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4662           label1 = gen_label_rtx ();
4663           label2 = gen_label_rtx ();
4664           label3 = gen_label_rtx ();
4665           label4 = gen_label_rtx ();
4666           label5 = gen_label_rtx ();
4667           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4668           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4669           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4670                               quotient, 0, OPTAB_LIB_WIDEN);
4671           if (tem != quotient)
4672             emit_move_insn (quotient, tem);
4673           emit_jump_insn (targetm.gen_jump (label5));
4674           emit_barrier ();
4675           emit_label (label1);
4676           expand_inc (adjusted_op0, const1_rtx);
4677           emit_jump_insn (targetm.gen_jump (label4));
4678           emit_barrier ();
4679           emit_label (label2);
4680           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4681           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4682                               quotient, 0, OPTAB_LIB_WIDEN);
4683           if (tem != quotient)
4684             emit_move_insn (quotient, tem);
4685           emit_jump_insn (targetm.gen_jump (label5));
4686           emit_barrier ();
4687           emit_label (label3);
4688           expand_dec (adjusted_op0, const1_rtx);
4689           emit_label (label4);
4690           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4691                               quotient, 0, OPTAB_LIB_WIDEN);
4692           if (tem != quotient)
4693             emit_move_insn (quotient, tem);
4694           expand_dec (quotient, const1_rtx);
4695           emit_label (label5);
4696         }
4697         break;
4698
4699       case CEIL_DIV_EXPR:
4700       case CEIL_MOD_EXPR:
4701         if (unsignedp)
4702           {
4703             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4704               {
4705                 rtx t1, t2, t3;
4706                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4707                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4708                                    floor_log2 (d), tquotient, 1);
4709                 t2 = expand_binop (compute_mode, and_optab, op0,
4710                                    gen_int_mode (d - 1, compute_mode),
4711                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4712                 t3 = gen_reg_rtx (compute_mode);
4713                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4714                                       compute_mode, 1, 1);
4715                 if (t3 == 0)
4716                   {
4717                     rtx_code_label *lab;
4718                     lab = gen_label_rtx ();
4719                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4720                     expand_inc (t1, const1_rtx);
4721                     emit_label (lab);
4722                     quotient = t1;
4723                   }
4724                 else
4725                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4726                                                           t1, t3),
4727                                             tquotient);
4728                 break;
4729               }
4730
4731             /* Try using an instruction that produces both the quotient and
4732                remainder, using truncation.  We can easily compensate the
4733                quotient or remainder to get ceiling rounding, once we have the
4734                remainder.  Notice that we compute also the final remainder
4735                value here, and return the result right away.  */
4736             if (target == 0 || GET_MODE (target) != compute_mode)
4737               target = gen_reg_rtx (compute_mode);
4738
4739             if (rem_flag)
4740               {
4741                 remainder = (REG_P (target)
4742                              ? target : gen_reg_rtx (compute_mode));
4743                 quotient = gen_reg_rtx (compute_mode);
4744               }
4745             else
4746               {
4747                 quotient = (REG_P (target)
4748                             ? target : gen_reg_rtx (compute_mode));
4749                 remainder = gen_reg_rtx (compute_mode);
4750               }
4751
4752             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4753                                      remainder, 1))
4754               {
4755                 /* This could be computed with a branch-less sequence.
4756                    Save that for later.  */
4757                 rtx_code_label *label = gen_label_rtx ();
4758                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4759                                  compute_mode, label);
4760                 expand_inc (quotient, const1_rtx);
4761                 expand_dec (remainder, op1);
4762                 emit_label (label);
4763                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4764               }
4765
4766             /* No luck with division elimination or divmod.  Have to do it
4767                by conditionally adjusting op0 *and* the result.  */
4768             {
4769               rtx_code_label *label1, *label2;
4770               rtx adjusted_op0, tem;
4771
4772               quotient = gen_reg_rtx (compute_mode);
4773               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4774               label1 = gen_label_rtx ();
4775               label2 = gen_label_rtx ();
4776               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4777                                compute_mode, label1);
4778               emit_move_insn  (quotient, const0_rtx);
4779               emit_jump_insn (targetm.gen_jump (label2));
4780               emit_barrier ();
4781               emit_label (label1);
4782               expand_dec (adjusted_op0, const1_rtx);
4783               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4784                                   quotient, 1, OPTAB_LIB_WIDEN);
4785               if (tem != quotient)
4786                 emit_move_insn (quotient, tem);
4787               expand_inc (quotient, const1_rtx);
4788               emit_label (label2);
4789             }
4790           }
4791         else /* signed */
4792           {
4793             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4794                 && INTVAL (op1) >= 0)
4795               {
4796                 /* This is extremely similar to the code for the unsigned case
4797                    above.  For 2.7 we should merge these variants, but for
4798                    2.6.1 I don't want to touch the code for unsigned since that
4799                    get used in C.  The signed case will only be used by other
4800                    languages (Ada).  */
4801
4802                 rtx t1, t2, t3;
4803                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4804                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4805                                    floor_log2 (d), tquotient, 0);
4806                 t2 = expand_binop (compute_mode, and_optab, op0,
4807                                    gen_int_mode (d - 1, compute_mode),
4808                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4809                 t3 = gen_reg_rtx (compute_mode);
4810                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4811                                       compute_mode, 1, 1);
4812                 if (t3 == 0)
4813                   {
4814                     rtx_code_label *lab;
4815                     lab = gen_label_rtx ();
4816                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4817                     expand_inc (t1, const1_rtx);
4818                     emit_label (lab);
4819                     quotient = t1;
4820                   }
4821                 else
4822                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4823                                                           t1, t3),
4824                                             tquotient);
4825                 break;
4826               }
4827
4828             /* Try using an instruction that produces both the quotient and
4829                remainder, using truncation.  We can easily compensate the
4830                quotient or remainder to get ceiling rounding, once we have the
4831                remainder.  Notice that we compute also the final remainder
4832                value here, and return the result right away.  */
4833             if (target == 0 || GET_MODE (target) != compute_mode)
4834               target = gen_reg_rtx (compute_mode);
4835             if (rem_flag)
4836               {
4837                 remainder= (REG_P (target)
4838                             ? target : gen_reg_rtx (compute_mode));
4839                 quotient = gen_reg_rtx (compute_mode);
4840               }
4841             else
4842               {
4843                 quotient = (REG_P (target)
4844                             ? target : gen_reg_rtx (compute_mode));
4845                 remainder = gen_reg_rtx (compute_mode);
4846               }
4847
4848             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4849                                      remainder, 0))
4850               {
4851                 /* This could be computed with a branch-less sequence.
4852                    Save that for later.  */
4853                 rtx tem;
4854                 rtx_code_label *label = gen_label_rtx ();
4855                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4856                                  compute_mode, label);
4857                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4858                                     NULL_RTX, 0, OPTAB_WIDEN);
4859                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4860                 expand_inc (quotient, const1_rtx);
4861                 expand_dec (remainder, op1);
4862                 emit_label (label);
4863                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4864               }
4865
4866             /* No luck with division elimination or divmod.  Have to do it
4867                by conditionally adjusting op0 *and* the result.  */
4868             {
4869               rtx_code_label *label1, *label2, *label3, *label4, *label5;
4870               rtx adjusted_op0;
4871               rtx tem;
4872
4873               quotient = gen_reg_rtx (compute_mode);
4874               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4875               label1 = gen_label_rtx ();
4876               label2 = gen_label_rtx ();
4877               label3 = gen_label_rtx ();
4878               label4 = gen_label_rtx ();
4879               label5 = gen_label_rtx ();
4880               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4881               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4882                                compute_mode, label1);
4883               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4884                                   quotient, 0, OPTAB_LIB_WIDEN);
4885               if (tem != quotient)
4886                 emit_move_insn (quotient, tem);
4887               emit_jump_insn (targetm.gen_jump (label5));
4888               emit_barrier ();
4889               emit_label (label1);
4890               expand_dec (adjusted_op0, const1_rtx);
4891               emit_jump_insn (targetm.gen_jump (label4));
4892               emit_barrier ();
4893               emit_label (label2);
4894               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4895                                compute_mode, label3);
4896               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4897                                   quotient, 0, OPTAB_LIB_WIDEN);
4898               if (tem != quotient)
4899                 emit_move_insn (quotient, tem);
4900               emit_jump_insn (targetm.gen_jump (label5));
4901               emit_barrier ();
4902               emit_label (label3);
4903               expand_inc (adjusted_op0, const1_rtx);
4904               emit_label (label4);
4905               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4906                                   quotient, 0, OPTAB_LIB_WIDEN);
4907               if (tem != quotient)
4908                 emit_move_insn (quotient, tem);
4909               expand_inc (quotient, const1_rtx);
4910               emit_label (label5);
4911             }
4912           }
4913         break;
4914
4915       case EXACT_DIV_EXPR:
4916         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4917           {
4918             HOST_WIDE_INT d = INTVAL (op1);
4919             unsigned HOST_WIDE_INT ml;
4920             int pre_shift;
4921             rtx t1;
4922
4923             pre_shift = floor_log2 (d & -d);
4924             ml = invert_mod2n (d >> pre_shift, size);
4925             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4926                                pre_shift, NULL_RTX, unsignedp);
4927             quotient = expand_mult (compute_mode, t1,
4928                                     gen_int_mode (ml, compute_mode),
4929                                     NULL_RTX, 1);
4930
4931             insn = get_last_insn ();
4932             set_dst_reg_note (insn, REG_EQUAL,
4933                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4934                                               compute_mode, op0, op1),
4935                               quotient);
4936           }
4937         break;
4938
4939       case ROUND_DIV_EXPR:
4940       case ROUND_MOD_EXPR:
4941         if (unsignedp)
4942           {
4943             rtx tem;
4944             rtx_code_label *label;
4945             label = gen_label_rtx ();
4946             quotient = gen_reg_rtx (compute_mode);
4947             remainder = gen_reg_rtx (compute_mode);
4948             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4949               {
4950                 rtx tem;
4951                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4952                                          quotient, 1, OPTAB_LIB_WIDEN);
4953                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4954                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4955                                           remainder, 1, OPTAB_LIB_WIDEN);
4956               }
4957             tem = plus_constant (compute_mode, op1, -1);
4958             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4959             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4960             expand_inc (quotient, const1_rtx);
4961             expand_dec (remainder, op1);
4962             emit_label (label);
4963           }
4964         else
4965           {
4966             rtx abs_rem, abs_op1, tem, mask;
4967             rtx_code_label *label;
4968             label = gen_label_rtx ();
4969             quotient = gen_reg_rtx (compute_mode);
4970             remainder = gen_reg_rtx (compute_mode);
4971             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4972               {
4973                 rtx tem;
4974                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4975                                          quotient, 0, OPTAB_LIB_WIDEN);
4976                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4977                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4978                                           remainder, 0, OPTAB_LIB_WIDEN);
4979               }
4980             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4981             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4982             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4983                                 1, NULL_RTX, 1);
4984             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4985             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4986                                 NULL_RTX, 0, OPTAB_WIDEN);
4987             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4988                                  size - 1, NULL_RTX, 0);
4989             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4990                                 NULL_RTX, 0, OPTAB_WIDEN);
4991             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4992                                 NULL_RTX, 0, OPTAB_WIDEN);
4993             expand_inc (quotient, tem);
4994             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4995                                 NULL_RTX, 0, OPTAB_WIDEN);
4996             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4997                                 NULL_RTX, 0, OPTAB_WIDEN);
4998             expand_dec (remainder, tem);
4999             emit_label (label);
5000           }
5001         return gen_lowpart (mode, rem_flag ? remainder : quotient);
5002
5003       default:
5004         gcc_unreachable ();
5005       }
5006
5007   if (quotient == 0)
5008     {
5009       if (target && GET_MODE (target) != compute_mode)
5010         target = 0;
5011
5012       if (rem_flag)
5013         {
5014           /* Try to produce the remainder without producing the quotient.
5015              If we seem to have a divmod pattern that does not require widening,
5016              don't try widening here.  We should really have a WIDEN argument
5017              to expand_twoval_binop, since what we'd really like to do here is
5018              1) try a mod insn in compute_mode
5019              2) try a divmod insn in compute_mode
5020              3) try a div insn in compute_mode and multiply-subtract to get
5021                 remainder
5022              4) try the same things with widening allowed.  */
5023           remainder
5024             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5025                                  op0, op1, target,
5026                                  unsignedp,
5027                                  ((optab_handler (optab2, compute_mode)
5028                                    != CODE_FOR_nothing)
5029                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
5030           if (remainder == 0)
5031             {
5032               /* No luck there.  Can we do remainder and divide at once
5033                  without a library call?  */
5034               remainder = gen_reg_rtx (compute_mode);
5035               if (! expand_twoval_binop ((unsignedp
5036                                           ? udivmod_optab
5037                                           : sdivmod_optab),
5038                                          op0, op1,
5039                                          NULL_RTX, remainder, unsignedp))
5040                 remainder = 0;
5041             }
5042
5043           if (remainder)
5044             return gen_lowpart (mode, remainder);
5045         }
5046
5047       /* Produce the quotient.  Try a quotient insn, but not a library call.
5048          If we have a divmod in this mode, use it in preference to widening
5049          the div (for this test we assume it will not fail). Note that optab2
5050          is set to the one of the two optabs that the call below will use.  */
5051       quotient
5052         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5053                              op0, op1, rem_flag ? NULL_RTX : target,
5054                              unsignedp,
5055                              ((optab_handler (optab2, compute_mode)
5056                                != CODE_FOR_nothing)
5057                               ? OPTAB_DIRECT : OPTAB_WIDEN));
5058
5059       if (quotient == 0)
5060         {
5061           /* No luck there.  Try a quotient-and-remainder insn,
5062              keeping the quotient alone.  */
5063           quotient = gen_reg_rtx (compute_mode);
5064           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5065                                      op0, op1,
5066                                      quotient, NULL_RTX, unsignedp))
5067             {
5068               quotient = 0;
5069               if (! rem_flag)
5070                 /* Still no luck.  If we are not computing the remainder,
5071                    use a library call for the quotient.  */
5072                 quotient = sign_expand_binop (compute_mode,
5073                                               udiv_optab, sdiv_optab,
5074                                               op0, op1, target,
5075                                               unsignedp, OPTAB_LIB_WIDEN);
5076             }
5077         }
5078     }
5079
5080   if (rem_flag)
5081     {
5082       if (target && GET_MODE (target) != compute_mode)
5083         target = 0;
5084
5085       if (quotient == 0)
5086         {
5087           /* No divide instruction either.  Use library for remainder.  */
5088           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5089                                          op0, op1, target,
5090                                          unsignedp, OPTAB_LIB_WIDEN);
5091           /* No remainder function.  Try a quotient-and-remainder
5092              function, keeping the remainder.  */
5093           if (!remainder)
5094             {
5095               remainder = gen_reg_rtx (compute_mode);
5096               if (!expand_twoval_binop_libfunc
5097                   (unsignedp ? udivmod_optab : sdivmod_optab,
5098                    op0, op1,
5099                    NULL_RTX, remainder,
5100                    unsignedp ? UMOD : MOD))
5101                 remainder = NULL_RTX;
5102             }
5103         }
5104       else
5105         {
5106           /* We divided.  Now finish doing X - Y * (X / Y).  */
5107           remainder = expand_mult (compute_mode, quotient, op1,
5108                                    NULL_RTX, unsignedp);
5109           remainder = expand_binop (compute_mode, sub_optab, op0,
5110                                     remainder, target, unsignedp,
5111                                     OPTAB_LIB_WIDEN);
5112         }
5113     }
5114
5115   return gen_lowpart (mode, rem_flag ? remainder : quotient);
5116 }
5117 \f
5118 /* Return a tree node with data type TYPE, describing the value of X.
5119    Usually this is an VAR_DECL, if there is no obvious better choice.
5120    X may be an expression, however we only support those expressions
5121    generated by loop.c.  */
5122
5123 tree
5124 make_tree (tree type, rtx x)
5125 {
5126   tree t;
5127
5128   switch (GET_CODE (x))
5129     {
5130     case CONST_INT:
5131     case CONST_WIDE_INT:
5132       t = wide_int_to_tree (type, std::make_pair (x, TYPE_MODE (type)));
5133       return t;
5134
5135     case CONST_DOUBLE:
5136       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5137       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5138         t = wide_int_to_tree (type,
5139                               wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5140                                                     HOST_BITS_PER_WIDE_INT * 2));
5141       else
5142         t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5143
5144       return t;
5145
5146     case CONST_VECTOR:
5147       {
5148         int units = CONST_VECTOR_NUNITS (x);
5149         tree itype = TREE_TYPE (type);
5150         tree *elts;
5151         int i;
5152
5153         /* Build a tree with vector elements.  */
5154         elts = XALLOCAVEC (tree, units);
5155         for (i = units - 1; i >= 0; --i)
5156           {
5157             rtx elt = CONST_VECTOR_ELT (x, i);
5158             elts[i] = make_tree (itype, elt);
5159           }
5160
5161         return build_vector (type, elts);
5162       }
5163
5164     case PLUS:
5165       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5166                           make_tree (type, XEXP (x, 1)));
5167
5168     case MINUS:
5169       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5170                           make_tree (type, XEXP (x, 1)));
5171
5172     case NEG:
5173       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5174
5175     case MULT:
5176       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5177                           make_tree (type, XEXP (x, 1)));
5178
5179     case ASHIFT:
5180       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5181                           make_tree (type, XEXP (x, 1)));
5182
5183     case LSHIFTRT:
5184       t = unsigned_type_for (type);
5185       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5186                                          make_tree (t, XEXP (x, 0)),
5187                                          make_tree (type, XEXP (x, 1))));
5188
5189     case ASHIFTRT:
5190       t = signed_type_for (type);
5191       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5192                                          make_tree (t, XEXP (x, 0)),
5193                                          make_tree (type, XEXP (x, 1))));
5194
5195     case DIV:
5196       if (TREE_CODE (type) != REAL_TYPE)
5197         t = signed_type_for (type);
5198       else
5199         t = type;
5200
5201       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5202                                          make_tree (t, XEXP (x, 0)),
5203                                          make_tree (t, XEXP (x, 1))));
5204     case UDIV:
5205       t = unsigned_type_for (type);
5206       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5207                                          make_tree (t, XEXP (x, 0)),
5208                                          make_tree (t, XEXP (x, 1))));
5209
5210     case SIGN_EXTEND:
5211     case ZERO_EXTEND:
5212       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5213                                           GET_CODE (x) == ZERO_EXTEND);
5214       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5215
5216     case CONST:
5217       return make_tree (type, XEXP (x, 0));
5218
5219     case SYMBOL_REF:
5220       t = SYMBOL_REF_DECL (x);
5221       if (t)
5222         return fold_convert (type, build_fold_addr_expr (t));
5223       /* else fall through.  */
5224
5225     default:
5226       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5227
5228       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5229          address mode to pointer mode.  */
5230       if (POINTER_TYPE_P (type))
5231         x = convert_memory_address_addr_space
5232               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5233
5234       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5235          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5236       t->decl_with_rtl.rtl = x;
5237
5238       return t;
5239     }
5240 }
5241 \f
5242 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5243    and returning TARGET.
5244
5245    If TARGET is 0, a pseudo-register or constant is returned.  */
5246
5247 rtx
5248 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5249 {
5250   rtx tem = 0;
5251
5252   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5253     tem = simplify_binary_operation (AND, mode, op0, op1);
5254   if (tem == 0)
5255     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5256
5257   if (target == 0)
5258     target = tem;
5259   else if (tem != target)
5260     emit_move_insn (target, tem);
5261   return target;
5262 }
5263
5264 /* Helper function for emit_store_flag.  */
5265 rtx
5266 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5267              machine_mode mode, machine_mode compare_mode,
5268              int unsignedp, rtx x, rtx y, int normalizep,
5269              machine_mode target_mode)
5270 {
5271   struct expand_operand ops[4];
5272   rtx op0, comparison, subtarget;
5273   rtx_insn *last;
5274   machine_mode result_mode = targetm.cstore_mode (icode);
5275
5276   last = get_last_insn ();
5277   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5278   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5279   if (!x || !y)
5280     {
5281       delete_insns_since (last);
5282       return NULL_RTX;
5283     }
5284
5285   if (target_mode == VOIDmode)
5286     target_mode = result_mode;
5287   if (!target)
5288     target = gen_reg_rtx (target_mode);
5289
5290   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5291
5292   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5293   create_fixed_operand (&ops[1], comparison);
5294   create_fixed_operand (&ops[2], x);
5295   create_fixed_operand (&ops[3], y);
5296   if (!maybe_expand_insn (icode, 4, ops))
5297     {
5298       delete_insns_since (last);
5299       return NULL_RTX;
5300     }
5301   subtarget = ops[0].value;
5302
5303   /* If we are converting to a wider mode, first convert to
5304      TARGET_MODE, then normalize.  This produces better combining
5305      opportunities on machines that have a SIGN_EXTRACT when we are
5306      testing a single bit.  This mostly benefits the 68k.
5307
5308      If STORE_FLAG_VALUE does not have the sign bit set when
5309      interpreted in MODE, we can do this conversion as unsigned, which
5310      is usually more efficient.  */
5311   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5312     {
5313       convert_move (target, subtarget,
5314                     val_signbit_known_clear_p (result_mode,
5315                                                STORE_FLAG_VALUE));
5316       op0 = target;
5317       result_mode = target_mode;
5318     }
5319   else
5320     op0 = subtarget;
5321
5322   /* If we want to keep subexpressions around, don't reuse our last
5323      target.  */
5324   if (optimize)
5325     subtarget = 0;
5326
5327   /* Now normalize to the proper value in MODE.  Sometimes we don't
5328      have to do anything.  */
5329   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5330     ;
5331   /* STORE_FLAG_VALUE might be the most negative number, so write
5332      the comparison this way to avoid a compiler-time warning.  */
5333   else if (- normalizep == STORE_FLAG_VALUE)
5334     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5335
5336   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5337      it hard to use a value of just the sign bit due to ANSI integer
5338      constant typing rules.  */
5339   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5340     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5341                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5342                         normalizep == 1);
5343   else
5344     {
5345       gcc_assert (STORE_FLAG_VALUE & 1);
5346
5347       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5348       if (normalizep == -1)
5349         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5350     }
5351
5352   /* If we were converting to a smaller mode, do the conversion now.  */
5353   if (target_mode != result_mode)
5354     {
5355       convert_move (target, op0, 0);
5356       return target;
5357     }
5358   else
5359     return op0;
5360 }
5361
5362
5363 /* A subroutine of emit_store_flag only including "tricks" that do not
5364    need a recursive call.  These are kept separate to avoid infinite
5365    loops.  */
5366
5367 static rtx
5368 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5369                    machine_mode mode, int unsignedp, int normalizep,
5370                    machine_mode target_mode)
5371 {
5372   rtx subtarget;
5373   enum insn_code icode;
5374   machine_mode compare_mode;
5375   enum mode_class mclass;
5376   enum rtx_code scode;
5377
5378   if (unsignedp)
5379     code = unsigned_condition (code);
5380   scode = swap_condition (code);
5381
5382   /* If one operand is constant, make it the second one.  Only do this
5383      if the other operand is not constant as well.  */
5384
5385   if (swap_commutative_operands_p (op0, op1))
5386     {
5387       std::swap (op0, op1);
5388       code = swap_condition (code);
5389     }
5390
5391   if (mode == VOIDmode)
5392     mode = GET_MODE (op0);
5393
5394   /* For some comparisons with 1 and -1, we can convert this to
5395      comparisons with zero.  This will often produce more opportunities for
5396      store-flag insns.  */
5397
5398   switch (code)
5399     {
5400     case LT:
5401       if (op1 == const1_rtx)
5402         op1 = const0_rtx, code = LE;
5403       break;
5404     case LE:
5405       if (op1 == constm1_rtx)
5406         op1 = const0_rtx, code = LT;
5407       break;
5408     case GE:
5409       if (op1 == const1_rtx)
5410         op1 = const0_rtx, code = GT;
5411       break;
5412     case GT:
5413       if (op1 == constm1_rtx)
5414         op1 = const0_rtx, code = GE;
5415       break;
5416     case GEU:
5417       if (op1 == const1_rtx)
5418         op1 = const0_rtx, code = NE;
5419       break;
5420     case LTU:
5421       if (op1 == const1_rtx)
5422         op1 = const0_rtx, code = EQ;
5423       break;
5424     default:
5425       break;
5426     }
5427
5428   /* If we are comparing a double-word integer with zero or -1, we can
5429      convert the comparison into one involving a single word.  */
5430   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5431       && GET_MODE_CLASS (mode) == MODE_INT
5432       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5433     {
5434       rtx tem;
5435       if ((code == EQ || code == NE)
5436           && (op1 == const0_rtx || op1 == constm1_rtx))
5437         {
5438           rtx op00, op01;
5439
5440           /* Do a logical OR or AND of the two words and compare the
5441              result.  */
5442           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5443           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5444           tem = expand_binop (word_mode,
5445                               op1 == const0_rtx ? ior_optab : and_optab,
5446                               op00, op01, NULL_RTX, unsignedp,
5447                               OPTAB_DIRECT);
5448
5449           if (tem != 0)
5450             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5451                                    unsignedp, normalizep);
5452         }
5453       else if ((code == LT || code == GE) && op1 == const0_rtx)
5454         {
5455           rtx op0h;
5456
5457           /* If testing the sign bit, can just test on high word.  */
5458           op0h = simplify_gen_subreg (word_mode, op0, mode,
5459                                       subreg_highpart_offset (word_mode,
5460                                                               mode));
5461           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5462                                  unsignedp, normalizep);
5463         }
5464       else
5465         tem = NULL_RTX;
5466
5467       if (tem)
5468         {
5469           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5470             return tem;
5471           if (!target)
5472             target = gen_reg_rtx (target_mode);
5473
5474           convert_move (target, tem,
5475                         !val_signbit_known_set_p (word_mode,
5476                                                   (normalizep ? normalizep
5477                                                    : STORE_FLAG_VALUE)));
5478           return target;
5479         }
5480     }
5481
5482   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5483      complement of A (for GE) and shifting the sign bit to the low bit.  */
5484   if (op1 == const0_rtx && (code == LT || code == GE)
5485       && GET_MODE_CLASS (mode) == MODE_INT
5486       && (normalizep || STORE_FLAG_VALUE == 1
5487           || val_signbit_p (mode, STORE_FLAG_VALUE)))
5488     {
5489       subtarget = target;
5490
5491       if (!target)
5492         target_mode = mode;
5493
5494       /* If the result is to be wider than OP0, it is best to convert it
5495          first.  If it is to be narrower, it is *incorrect* to convert it
5496          first.  */
5497       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5498         {
5499           op0 = convert_modes (target_mode, mode, op0, 0);
5500           mode = target_mode;
5501         }
5502
5503       if (target_mode != mode)
5504         subtarget = 0;
5505
5506       if (code == GE)
5507         op0 = expand_unop (mode, one_cmpl_optab, op0,
5508                            ((STORE_FLAG_VALUE == 1 || normalizep)
5509                             ? 0 : subtarget), 0);
5510
5511       if (STORE_FLAG_VALUE == 1 || normalizep)
5512         /* If we are supposed to produce a 0/1 value, we want to do
5513            a logical shift from the sign bit to the low-order bit; for
5514            a -1/0 value, we do an arithmetic shift.  */
5515         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5516                             GET_MODE_BITSIZE (mode) - 1,
5517                             subtarget, normalizep != -1);
5518
5519       if (mode != target_mode)
5520         op0 = convert_modes (target_mode, mode, op0, 0);
5521
5522       return op0;
5523     }
5524
5525   mclass = GET_MODE_CLASS (mode);
5526   for (compare_mode = mode; compare_mode != VOIDmode;
5527        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5528     {
5529      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5530      icode = optab_handler (cstore_optab, optab_mode);
5531      if (icode != CODE_FOR_nothing)
5532         {
5533           do_pending_stack_adjust ();
5534           rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5535                                  unsignedp, op0, op1, normalizep, target_mode);
5536           if (tem)
5537             return tem;
5538
5539           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5540             {
5541               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5542                                  unsignedp, op1, op0, normalizep, target_mode);
5543               if (tem)
5544                 return tem;
5545             }
5546           break;
5547         }
5548     }
5549
5550   return 0;
5551 }
5552
5553 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5554    and storing in TARGET.  Normally return TARGET.
5555    Return 0 if that cannot be done.
5556
5557    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5558    it is VOIDmode, they cannot both be CONST_INT.
5559
5560    UNSIGNEDP is for the case where we have to widen the operands
5561    to perform the operation.  It says to use zero-extension.
5562
5563    NORMALIZEP is 1 if we should convert the result to be either zero
5564    or one.  Normalize is -1 if we should convert the result to be
5565    either zero or -1.  If NORMALIZEP is zero, the result will be left
5566    "raw" out of the scc insn.  */
5567
5568 rtx
5569 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5570                  machine_mode mode, int unsignedp, int normalizep)
5571 {
5572   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5573   enum rtx_code rcode;
5574   rtx subtarget;
5575   rtx tem, trueval;
5576   rtx_insn *last;
5577
5578   /* If we compare constants, we shouldn't use a store-flag operation,
5579      but a constant load.  We can get there via the vanilla route that
5580      usually generates a compare-branch sequence, but will in this case
5581      fold the comparison to a constant, and thus elide the branch.  */
5582   if (CONSTANT_P (op0) && CONSTANT_P (op1))
5583     return NULL_RTX;
5584
5585   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5586                            target_mode);
5587   if (tem)
5588     return tem;
5589
5590   /* If we reached here, we can't do this with a scc insn, however there
5591      are some comparisons that can be done in other ways.  Don't do any
5592      of these cases if branches are very cheap.  */
5593   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5594     return 0;
5595
5596   /* See what we need to return.  We can only return a 1, -1, or the
5597      sign bit.  */
5598
5599   if (normalizep == 0)
5600     {
5601       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5602         normalizep = STORE_FLAG_VALUE;
5603
5604       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5605         ;
5606       else
5607         return 0;
5608     }
5609
5610   last = get_last_insn ();
5611
5612   /* If optimizing, use different pseudo registers for each insn, instead
5613      of reusing the same pseudo.  This leads to better CSE, but slows
5614      down the compiler, since there are more pseudos */
5615   subtarget = (!optimize
5616                && (target_mode == mode)) ? target : NULL_RTX;
5617   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5618
5619   /* For floating-point comparisons, try the reverse comparison or try
5620      changing the "orderedness" of the comparison.  */
5621   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5622     {
5623       enum rtx_code first_code;
5624       bool and_them;
5625
5626       rcode = reverse_condition_maybe_unordered (code);
5627       if (can_compare_p (rcode, mode, ccp_store_flag)
5628           && (code == ORDERED || code == UNORDERED
5629               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5630               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5631         {
5632           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5633                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5634
5635           /* For the reverse comparison, use either an addition or a XOR.  */
5636           if (want_add
5637               && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5638                            optimize_insn_for_speed_p ()) == 0)
5639             {
5640               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5641                                        STORE_FLAG_VALUE, target_mode);
5642               if (tem)
5643                 return expand_binop (target_mode, add_optab, tem,
5644                                      gen_int_mode (normalizep, target_mode),
5645                                      target, 0, OPTAB_WIDEN);
5646             }
5647           else if (!want_add
5648                    && rtx_cost (trueval, mode, XOR, 1,
5649                                 optimize_insn_for_speed_p ()) == 0)
5650             {
5651               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5652                                        normalizep, target_mode);
5653               if (tem)
5654                 return expand_binop (target_mode, xor_optab, tem, trueval,
5655                                      target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5656             }
5657         }
5658
5659       delete_insns_since (last);
5660
5661       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5662       if (code == ORDERED || code == UNORDERED)
5663         return 0;
5664
5665       and_them = split_comparison (code, mode, &first_code, &code);
5666
5667       /* If there are no NaNs, the first comparison should always fall through.
5668          Effectively change the comparison to the other one.  */
5669       if (!HONOR_NANS (mode))
5670         {
5671           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5672           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5673                                     target_mode);
5674         }
5675
5676       if (!HAVE_conditional_move)
5677         return 0;
5678
5679       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5680          conditional move.  */
5681       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5682                                normalizep, target_mode);
5683       if (tem == 0)
5684         return 0;
5685
5686       if (and_them)
5687         tem = emit_conditional_move (target, code, op0, op1, mode,
5688                                      tem, const0_rtx, GET_MODE (tem), 0);
5689       else
5690         tem = emit_conditional_move (target, code, op0, op1, mode,
5691                                      trueval, tem, GET_MODE (tem), 0);
5692
5693       if (tem == 0)
5694         delete_insns_since (last);
5695       return tem;
5696     }
5697
5698   /* The remaining tricks only apply to integer comparisons.  */
5699
5700   if (GET_MODE_CLASS (mode) != MODE_INT)
5701     return 0;
5702
5703   /* If this is an equality comparison of integers, we can try to exclusive-or
5704      (or subtract) the two operands and use a recursive call to try the
5705      comparison with zero.  Don't do any of these cases if branches are
5706      very cheap.  */
5707
5708   if ((code == EQ || code == NE) && op1 != const0_rtx)
5709     {
5710       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5711                           OPTAB_WIDEN);
5712
5713       if (tem == 0)
5714         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5715                             OPTAB_WIDEN);
5716       if (tem != 0)
5717         tem = emit_store_flag (target, code, tem, const0_rtx,
5718                                mode, unsignedp, normalizep);
5719       if (tem != 0)
5720         return tem;
5721
5722       delete_insns_since (last);
5723     }
5724
5725   /* For integer comparisons, try the reverse comparison.  However, for
5726      small X and if we'd have anyway to extend, implementing "X != 0"
5727      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5728   rcode = reverse_condition (code);
5729   if (can_compare_p (rcode, mode, ccp_store_flag)
5730       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5731             && code == NE
5732             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5733             && op1 == const0_rtx))
5734     {
5735       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5736                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5737
5738       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5739       if (want_add
5740           && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5741                        optimize_insn_for_speed_p ()) == 0)
5742         {
5743           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5744                                    STORE_FLAG_VALUE, target_mode);
5745           if (tem != 0)
5746             tem = expand_binop (target_mode, add_optab, tem,
5747                                 gen_int_mode (normalizep, target_mode),
5748                                 target, 0, OPTAB_WIDEN);
5749         }
5750       else if (!want_add
5751                && rtx_cost (trueval, mode, XOR, 1,
5752                             optimize_insn_for_speed_p ()) == 0)
5753         {
5754           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5755                                    normalizep, target_mode);
5756           if (tem != 0)
5757             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5758                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5759         }
5760
5761       if (tem != 0)
5762         return tem;
5763       delete_insns_since (last);
5764     }
5765
5766   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5767      the constant zero.  Reject all other comparisons at this point.  Only
5768      do LE and GT if branches are expensive since they are expensive on
5769      2-operand machines.  */
5770
5771   if (op1 != const0_rtx
5772       || (code != EQ && code != NE
5773           && (BRANCH_COST (optimize_insn_for_speed_p (),
5774                            false) <= 1 || (code != LE && code != GT))))
5775     return 0;
5776
5777   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5778      do the necessary operation below.  */
5779
5780   tem = 0;
5781
5782   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5783      the sign bit set.  */
5784
5785   if (code == LE)
5786     {
5787       /* This is destructive, so SUBTARGET can't be OP0.  */
5788       if (rtx_equal_p (subtarget, op0))
5789         subtarget = 0;
5790
5791       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5792                           OPTAB_WIDEN);
5793       if (tem)
5794         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5795                             OPTAB_WIDEN);
5796     }
5797
5798   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5799      number of bits in the mode of OP0, minus one.  */
5800
5801   if (code == GT)
5802     {
5803       if (rtx_equal_p (subtarget, op0))
5804         subtarget = 0;
5805
5806       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5807                           GET_MODE_BITSIZE (mode) - 1,
5808                           subtarget, 0);
5809       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5810                           OPTAB_WIDEN);
5811     }
5812
5813   if (code == EQ || code == NE)
5814     {
5815       /* For EQ or NE, one way to do the comparison is to apply an operation
5816          that converts the operand into a positive number if it is nonzero
5817          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5818          for NE we negate.  This puts the result in the sign bit.  Then we
5819          normalize with a shift, if needed.
5820
5821          Two operations that can do the above actions are ABS and FFS, so try
5822          them.  If that doesn't work, and MODE is smaller than a full word,
5823          we can use zero-extension to the wider mode (an unsigned conversion)
5824          as the operation.  */
5825
5826       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5827          that is compensated by the subsequent overflow when subtracting
5828          one / negating.  */
5829
5830       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5831         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5832       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5833         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5834       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5835         {
5836           tem = convert_modes (word_mode, mode, op0, 1);
5837           mode = word_mode;
5838         }
5839
5840       if (tem != 0)
5841         {
5842           if (code == EQ)
5843             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5844                                 0, OPTAB_WIDEN);
5845           else
5846             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5847         }
5848
5849       /* If we couldn't do it that way, for NE we can "or" the two's complement
5850          of the value with itself.  For EQ, we take the one's complement of
5851          that "or", which is an extra insn, so we only handle EQ if branches
5852          are expensive.  */
5853
5854       if (tem == 0
5855           && (code == NE
5856               || BRANCH_COST (optimize_insn_for_speed_p (),
5857                               false) > 1))
5858         {
5859           if (rtx_equal_p (subtarget, op0))
5860             subtarget = 0;
5861
5862           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5863           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5864                               OPTAB_WIDEN);
5865
5866           if (tem && code == EQ)
5867             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5868         }
5869     }
5870
5871   if (tem && normalizep)
5872     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5873                         GET_MODE_BITSIZE (mode) - 1,
5874                         subtarget, normalizep == 1);
5875
5876   if (tem)
5877     {
5878       if (!target)
5879         ;
5880       else if (GET_MODE (tem) != target_mode)
5881         {
5882           convert_move (target, tem, 0);
5883           tem = target;
5884         }
5885       else if (!subtarget)
5886         {
5887           emit_move_insn (target, tem);
5888           tem = target;
5889         }
5890     }
5891   else
5892     delete_insns_since (last);
5893
5894   return tem;
5895 }
5896
5897 /* Like emit_store_flag, but always succeeds.  */
5898
5899 rtx
5900 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5901                        machine_mode mode, int unsignedp, int normalizep)
5902 {
5903   rtx tem;
5904   rtx_code_label *label;
5905   rtx trueval, falseval;
5906
5907   /* First see if emit_store_flag can do the job.  */
5908   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5909   if (tem != 0)
5910     return tem;
5911
5912   if (!target)
5913     target = gen_reg_rtx (word_mode);
5914
5915   /* If this failed, we have to do this with set/compare/jump/set code.
5916      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5917   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5918   if (code == NE
5919       && GET_MODE_CLASS (mode) == MODE_INT
5920       && REG_P (target)
5921       && op0 == target
5922       && op1 == const0_rtx)
5923     {
5924       label = gen_label_rtx ();
5925       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
5926                                NULL_RTX, NULL, label, -1);
5927       emit_move_insn (target, trueval);
5928       emit_label (label);
5929       return target;
5930     }
5931
5932   if (!REG_P (target)
5933       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5934     target = gen_reg_rtx (GET_MODE (target));
5935
5936   /* Jump in the right direction if the target cannot implement CODE
5937      but can jump on its reverse condition.  */
5938   falseval = const0_rtx;
5939   if (! can_compare_p (code, mode, ccp_jump)
5940       && (! FLOAT_MODE_P (mode)
5941           || code == ORDERED || code == UNORDERED
5942           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5943           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5944     {
5945       enum rtx_code rcode;
5946       if (FLOAT_MODE_P (mode))
5947         rcode = reverse_condition_maybe_unordered (code);
5948       else
5949         rcode = reverse_condition (code);
5950
5951       /* Canonicalize to UNORDERED for the libcall.  */
5952       if (can_compare_p (rcode, mode, ccp_jump)
5953           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5954         {
5955           falseval = trueval;
5956           trueval = const0_rtx;
5957           code = rcode;
5958         }
5959     }
5960
5961   emit_move_insn (target, trueval);
5962   label = gen_label_rtx ();
5963   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
5964                            label, -1);
5965
5966   emit_move_insn (target, falseval);
5967   emit_label (label);
5968
5969   return target;
5970 }
5971 \f
5972 /* Perform possibly multi-word comparison and conditional jump to LABEL
5973    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5974    now a thin wrapper around do_compare_rtx_and_jump.  */
5975
5976 static void
5977 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
5978                  rtx_code_label *label)
5979 {
5980   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5981   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
5982                            NULL, label, -1);
5983 }