gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2015 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "tm.h"
  26 #include "diagnostic-core.h"
  27 #include "rtl.h"
  28 #include "hash-set.h"
  29 #include "machmode.h"
  30 #include "vec.h"
  31 #include "double-int.h"
  32 #include "input.h"
  33 #include "alias.h"
  34 #include "symtab.h"
  35 #include "wide-int.h"
  36 #include "inchash.h"
  37 #include "tree.h"
  38 #include "fold-const.h"
  39 #include "stor-layout.h"
  40 #include "tm_p.h"
  41 #include "flags.h"
  42 #include "insn-config.h"
  43 #include "hashtab.h"
  44 #include "hard-reg-set.h"
  45 #include "function.h"
  46 #include "statistics.h"
  47 #include "real.h"
  48 #include "fixed-value.h"
  49 #include "expmed.h"
  50 #include "dojump.h"
  51 #include "explow.h"
  52 #include "calls.h"
  53 #include "emit-rtl.h"
  54 #include "varasm.h"
  55 #include "stmt.h"
  56 #include "expr.h"
  57 #include "insn-codes.h"
  58 #include "optabs.h"
  59 #include "recog.h"
  60 #include "langhooks.h"
  61 #include "predict.h"
  62 #include "basic-block.h"
  63 #include "df.h"
  64 #include "target.h"
  65
  66 struct target_expmed default_target_expmed;
  67 #if SWITCHABLE_TARGET
  68 struct target_expmed *this_target_expmed = &default_target_expmed;
  69 #endif
  70
  71 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  72                                    unsigned HOST_WIDE_INT,
  73                                    unsigned HOST_WIDE_INT,
  74                                    unsigned HOST_WIDE_INT,
  75                                    rtx, bool);
  76 static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT,
  77                                      unsigned HOST_WIDE_INT,
  78                                      rtx, bool);
  79 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  80                                    unsigned HOST_WIDE_INT,
  81                                    unsigned HOST_WIDE_INT,
  82                                    unsigned HOST_WIDE_INT,
  83                                    rtx, bool);
  84 static rtx extract_fixed_bit_field (machine_mode, rtx,
  85                                     unsigned HOST_WIDE_INT,
  86                                     unsigned HOST_WIDE_INT, rtx, int, bool);
  87 static rtx extract_fixed_bit_field_1 (machine_mode, rtx,
  88                                       unsigned HOST_WIDE_INT,
  89                                       unsigned HOST_WIDE_INT, rtx, int, bool);
  90 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
  91 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  92                                     unsigned HOST_WIDE_INT, int, bool);
  93 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
  94 static rtx expand_smod_pow2 (machine_mode, rtx, HOST_WIDE_INT);
  95 static rtx expand_sdiv_pow2 (machine_mode, rtx, HOST_WIDE_INT);
  96
  97 /* Return a constant integer mask value of mode MODE with BITSIZE ones
  98    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
  99    The mask is truncated if necessary to the width of mode MODE.  The
 100    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
 101
 102 static inline rtx
 103 mask_rtx (machine_mode mode, int bitpos, int bitsize, bool complement)
 104 {
 105   return immed_wide_int_const
 106     (wi::shifted_mask (bitpos, bitsize, complement,
 107                        GET_MODE_PRECISION (mode)), mode);
 108 }
 109
 110 /* Test whether a value is zero of a power of two.  */
 111 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
 112   (((x) & ((x) - (unsigned HOST_WIDE_INT) 1)) == 0)
 113
 114 struct init_expmed_rtl
 115 {
 116   rtx reg;
 117   rtx plus;
 118   rtx neg;
 119   rtx mult;
 120   rtx sdiv;
 121   rtx udiv;
 122   rtx sdiv_32;
 123   rtx smod_32;
 124   rtx wide_mult;
 125   rtx wide_lshr;
 126   rtx wide_trunc;
 127   rtx shift;
 128   rtx shift_mult;
 129   rtx shift_add;
 130   rtx shift_sub0;
 131   rtx shift_sub1;
 132   rtx zext;
 133   rtx trunc;
 134
 135   rtx pow2[MAX_BITS_PER_WORD];
 136   rtx cint[MAX_BITS_PER_WORD];
 137 };
 138
 139 static void
 140 init_expmed_one_conv (struct init_expmed_rtl *all, machine_mode to_mode,
 141                       machine_mode from_mode, bool speed)
 142 {
 143   int to_size, from_size;
 144   rtx which;
 145
 146   to_size = GET_MODE_PRECISION (to_mode);
 147   from_size = GET_MODE_PRECISION (from_mode);
 148
 149   /* Most partial integers have a precision less than the "full"
 150      integer it requires for storage.  In case one doesn't, for
 151      comparison purposes here, reduce the bit size by one in that
 152      case.  */
 153   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
 154       && exact_log2 (to_size) != -1)
 155     to_size --;
 156   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
 157       && exact_log2 (from_size) != -1)
 158     from_size --;
 159
 160   /* Assume cost of zero-extend and sign-extend is the same.  */
 161   which = (to_size < from_size ? all->trunc : all->zext);
 162
 163   PUT_MODE (all->reg, from_mode);
 164   set_convert_cost (to_mode, from_mode, speed, set_src_cost (which, speed));
 165 }
 166
 167 static void
 168 init_expmed_one_mode (struct init_expmed_rtl *all,
 169                       machine_mode mode, int speed)
 170 {
 171   int m, n, mode_bitsize;
 172   machine_mode mode_from;
 173
 174   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 175
 176   PUT_MODE (all->reg, mode);
 177   PUT_MODE (all->plus, mode);
 178   PUT_MODE (all->neg, mode);
 179   PUT_MODE (all->mult, mode);
 180   PUT_MODE (all->sdiv, mode);
 181   PUT_MODE (all->udiv, mode);
 182   PUT_MODE (all->sdiv_32, mode);
 183   PUT_MODE (all->smod_32, mode);
 184   PUT_MODE (all->wide_trunc, mode);
 185   PUT_MODE (all->shift, mode);
 186   PUT_MODE (all->shift_mult, mode);
 187   PUT_MODE (all->shift_add, mode);
 188   PUT_MODE (all->shift_sub0, mode);
 189   PUT_MODE (all->shift_sub1, mode);
 190   PUT_MODE (all->zext, mode);
 191   PUT_MODE (all->trunc, mode);
 192
 193   set_add_cost (speed, mode, set_src_cost (all->plus, speed));
 194   set_neg_cost (speed, mode, set_src_cost (all->neg, speed));
 195   set_mul_cost (speed, mode, set_src_cost (all->mult, speed));
 196   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, speed));
 197   set_udiv_cost (speed, mode, set_src_cost (all->udiv, speed));
 198
 199   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, speed)
 200                                      <= 2 * add_cost (speed, mode)));
 201   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, speed)
 202                                      <= 4 * add_cost (speed, mode)));
 203
 204   set_shift_cost (speed, mode, 0, 0);
 205   {
 206     int cost = add_cost (speed, mode);
 207     set_shiftadd_cost (speed, mode, 0, cost);
 208     set_shiftsub0_cost (speed, mode, 0, cost);
 209     set_shiftsub1_cost (speed, mode, 0, cost);
 210   }
 211
 212   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 213   for (m = 1; m < n; m++)
 214     {
 215       XEXP (all->shift, 1) = all->cint[m];
 216       XEXP (all->shift_mult, 1) = all->pow2[m];
 217
 218       set_shift_cost (speed, mode, m, set_src_cost (all->shift, speed));
 219       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, speed));
 220       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, speed));
 221       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, speed));
 222     }
 223
 224   if (SCALAR_INT_MODE_P (mode))
 225     {
 226       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 227            mode_from = (machine_mode)(mode_from + 1))
 228         init_expmed_one_conv (all, mode, mode_from, speed);
 229     }
 230   if (GET_MODE_CLASS (mode) == MODE_INT)
 231     {
 232       machine_mode  wider_mode = GET_MODE_WIDER_MODE (mode);
 233       if (wider_mode != VOIDmode)
 234         {
 235           PUT_MODE (all->zext, wider_mode);
 236           PUT_MODE (all->wide_mult, wider_mode);
 237           PUT_MODE (all->wide_lshr, wider_mode);
 238           XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize);
 239
 240           set_mul_widen_cost (speed, wider_mode,
 241                               set_src_cost (all->wide_mult, speed));
 242           set_mul_highpart_cost (speed, mode,
 243                                  set_src_cost (all->wide_trunc, speed));
 244         }
 245     }
 246 }
 247
 248 void
 249 init_expmed (void)
 250 {
 251   struct init_expmed_rtl all;
 252   machine_mode mode = QImode;
 253   int m, speed;
 254
 255   memset (&all, 0, sizeof all);
 256   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 257     {
 258       all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 259       all.cint[m] = GEN_INT (m);
 260     }
 261
 262   /* Avoid using hard regs in ways which may be unsupported.  */
 263   all.reg = gen_rtx_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
 264   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
 265   all.neg = gen_rtx_NEG (mode, all.reg);
 266   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
 267   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
 268   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
 269   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
 270   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
 271   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
 272   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
 273   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
 274   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
 275   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
 276   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
 277   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
 278   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
 279   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
 280   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
 281
 282   for (speed = 0; speed < 2; speed++)
 283     {
 284       crtl->maybe_hot_insn_p = speed;
 285       set_zero_cost (speed, set_src_cost (const0_rtx, speed));
 286
 287       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 288            mode = (machine_mode)(mode + 1))
 289         init_expmed_one_mode (&all, mode, speed);
 290
 291       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 292         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 293              mode = (machine_mode)(mode + 1))
 294           init_expmed_one_mode (&all, mode, speed);
 295
 296       if (MIN_MODE_VECTOR_INT != VOIDmode)
 297         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 298              mode = (machine_mode)(mode + 1))
 299           init_expmed_one_mode (&all, mode, speed);
 300     }
 301
 302   if (alg_hash_used_p ())
 303     {
 304       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 305       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 306     }
 307   else
 308     set_alg_hash_used_p (true);
 309   default_rtl_profile ();
 310
 311   ggc_free (all.trunc);
 312   ggc_free (all.shift_sub1);
 313   ggc_free (all.shift_sub0);
 314   ggc_free (all.shift_add);
 315   ggc_free (all.shift_mult);
 316   ggc_free (all.shift);
 317   ggc_free (all.wide_trunc);
 318   ggc_free (all.wide_lshr);
 319   ggc_free (all.wide_mult);
 320   ggc_free (all.zext);
 321   ggc_free (all.smod_32);
 322   ggc_free (all.sdiv_32);
 323   ggc_free (all.udiv);
 324   ggc_free (all.sdiv);
 325   ggc_free (all.mult);
 326   ggc_free (all.neg);
 327   ggc_free (all.plus);
 328   ggc_free (all.reg);
 329 }
 330
 331 /* Return an rtx representing minus the value of X.
 332    MODE is the intended mode of the result,
 333    useful if X is a CONST_INT.  */
 334
 335 rtx
 336 negate_rtx (machine_mode mode, rtx x)
 337 {
 338   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 339
 340   if (result == 0)
 341     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 342
 343   return result;
 344 }
 345
 346 /* Return an rtx representing value of X with reverse storage order.
 347    MODE is the intended mode of the result,
 348    useful if X is a CONST_INT.  */
 349
 350 rtx
 351 flip_storage_order (enum machine_mode mode, rtx x)
 352 {
 353   enum machine_mode int_mode;
 354   rtx result;
 355
 356   if (mode == QImode)
 357     return x;
 358
 359   if (SCALAR_INT_MODE_P (mode))
 360     int_mode = mode;
 361   else
 362     {
 363       int_mode = int_mode_for_mode (mode);
 364       gcc_assert (int_mode != BLKmode);
 365       x = gen_lowpart (int_mode, x);
 366     }
 367
 368   result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
 369   if (result == 0)
 370     result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
 371
 372   if (int_mode != mode)
 373     result = gen_lowpart (mode, result);
 374
 375   return result;
 376 }
 377
 378 /* Adjust bitfield memory MEM so that it points to the first unit of mode
 379    MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
 380    If MODE is BLKmode, return a reference to every byte in the bitfield.
 381    Set *NEW_BITNUM to the bit position of the field within the new memory.  */
 382
 383 static rtx
 384 narrow_bit_field_mem (rtx mem, machine_mode mode,
 385                       unsigned HOST_WIDE_INT bitsize,
 386                       unsigned HOST_WIDE_INT bitnum,
 387                       unsigned HOST_WIDE_INT *new_bitnum)
 388 {
 389   if (mode == BLKmode)
 390     {
 391       *new_bitnum = bitnum % BITS_PER_UNIT;
 392       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 393       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 394                             / BITS_PER_UNIT);
 395       return adjust_bitfield_address_size (mem, mode, offset, size);
 396     }
 397   else
 398     {
 399       unsigned int unit = GET_MODE_BITSIZE (mode);
 400       *new_bitnum = bitnum % unit;
 401       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 402       return adjust_bitfield_address (mem, mode, offset);
 403     }
 404 }
 405
 406 /* The caller wants to perform insertion or extraction PATTERN on a
 407    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 408    BITREGION_START and BITREGION_END are as for store_bit_field
 409    and FIELDMODE is the natural mode of the field.
 410
 411    Search for a mode that is compatible with the memory access
 412    restrictions and (where applicable) with a register insertion or
 413    extraction.  Return the new memory on success, storing the adjusted
 414    bit position in *NEW_BITNUM.  Return null otherwise.  */
 415
 416 static rtx
 417 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 418                               rtx op0, HOST_WIDE_INT bitsize,
 419                               HOST_WIDE_INT bitnum,
 420                               unsigned HOST_WIDE_INT bitregion_start,
 421                               unsigned HOST_WIDE_INT bitregion_end,
 422                               machine_mode fieldmode,
 423                               unsigned HOST_WIDE_INT *new_bitnum)
 424 {
 425   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 426                                 bitregion_end, MEM_ALIGN (op0),
 427                                 MEM_VOLATILE_P (op0));
 428   machine_mode best_mode;
 429   if (iter.next_mode (&best_mode))
 430     {
 431       /* We can use a memory in BEST_MODE.  See whether this is true for
 432          any wider modes.  All other things being equal, we prefer to
 433          use the widest mode possible because it tends to expose more
 434          CSE opportunities.  */
 435       if (!iter.prefer_smaller_modes ())
 436         {
 437           /* Limit the search to the mode required by the corresponding
 438              register insertion or extraction instruction, if any.  */
 439           machine_mode limit_mode = word_mode;
 440           extraction_insn insn;
 441           if (get_best_reg_extraction_insn (&insn, pattern,
 442                                             GET_MODE_BITSIZE (best_mode),
 443                                             fieldmode))
 444             limit_mode = insn.field_mode;
 445
 446           machine_mode wider_mode;
 447           while (iter.next_mode (&wider_mode)
 448                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 449             best_mode = wider_mode;
 450         }
 451       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 452                                    new_bitnum);
 453     }
 454   return NULL_RTX;
 455 }
 456
 457 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 458    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 459    offset is then BITNUM / BITS_PER_UNIT.  */
 460
 461 static bool
 462 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
 463                      unsigned HOST_WIDE_INT bitsize,
 464                      machine_mode struct_mode)
 465 {
 466   if (BYTES_BIG_ENDIAN)
 467     return (bitnum % BITS_PER_UNIT == 0
 468             && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
 469                 || (bitnum + bitsize) % BITS_PER_WORD == 0));
 470   else
 471     return bitnum % BITS_PER_WORD == 0;
 472 }
 473
 474 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
 475    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
 476    Return false if the access would touch memory outside the range
 477    BITREGION_START to BITREGION_END for conformance to the C++ memory
 478    model.  */
 479
 480 static bool
 481 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 482                             unsigned HOST_WIDE_INT bitnum,
 483                             machine_mode fieldmode,
 484                             unsigned HOST_WIDE_INT bitregion_start,
 485                             unsigned HOST_WIDE_INT bitregion_end)
 486 {
 487   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
 488
 489   /* -fstrict-volatile-bitfields must be enabled and we must have a
 490      volatile MEM.  */
 491   if (!MEM_P (op0)
 492       || !MEM_VOLATILE_P (op0)
 493       || flag_strict_volatile_bitfields <= 0)
 494     return false;
 495
 496   /* Non-integral modes likely only happen with packed structures.
 497      Punt.  */
 498   if (!SCALAR_INT_MODE_P (fieldmode))
 499     return false;
 500
 501   /* The bit size must not be larger than the field mode, and
 502      the field mode must not be larger than a word.  */
 503   if (bitsize > modesize || modesize > BITS_PER_WORD)
 504     return false;
 505
 506   /* Check for cases of unaligned fields that must be split.  */
 507   if (bitnum % modesize + bitsize > modesize)
 508     return false;
 509
 510   /* The memory must be sufficiently aligned for a MODESIZE access.
 511      This condition guarantees, that the memory access will not
 512      touch anything after the end of the structure.  */
 513   if (MEM_ALIGN (op0) < modesize)
 514     return false;
 515
 516   /* Check for cases where the C++ memory model applies.  */
 517   if (bitregion_end != 0
 518       && (bitnum - bitnum % modesize < bitregion_start
 519           || bitnum - bitnum % modesize + modesize - 1 > bitregion_end))
 520     return false;
 521
 522   return true;
 523 }
 524
 525 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 526    bit number BITNUM can be treated as a simple value of mode MODE.  */
 527
 528 static bool
 529 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 530                        unsigned HOST_WIDE_INT bitnum, machine_mode mode)
 531 {
 532   return (MEM_P (op0)
 533           && bitnum % BITS_PER_UNIT == 0
 534           && bitsize == GET_MODE_BITSIZE (mode)
 535           && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
 536               || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
 537                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 538 }
 539 \f
 540 /* Try to use instruction INSV to store VALUE into a field of OP0.
 541    BITSIZE and BITNUM are as for store_bit_field.  */
 542
 543 static bool
 544 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 545                             unsigned HOST_WIDE_INT bitsize,
 546                             unsigned HOST_WIDE_INT bitnum,
 547                             rtx value)
 548 {
 549   struct expand_operand ops[4];
 550   rtx value1;
 551   rtx xop0 = op0;
 552   rtx_insn *last = get_last_insn ();
 553   bool copy_back = false;
 554
 555   machine_mode op_mode = insv->field_mode;
 556   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 557   if (bitsize == 0 || bitsize > unit)
 558     return false;
 559
 560   if (MEM_P (xop0))
 561     /* Get a reference to the first byte of the field.  */
 562     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 563                                  &bitnum);
 564   else
 565     {
 566       /* Convert from counting within OP0 to counting in OP_MODE.  */
 567       if (BYTES_BIG_ENDIAN)
 568         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 569
 570       /* If xop0 is a register, we need it in OP_MODE
 571          to make it acceptable to the format of insv.  */
 572       if (GET_CODE (xop0) == SUBREG)
 573         /* We can't just change the mode, because this might clobber op0,
 574            and we will need the original value of op0 if insv fails.  */
 575         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 576       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 577         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 578     }
 579
 580   /* If the destination is a paradoxical subreg such that we need a
 581      truncate to the inner mode, perform the insertion on a temporary and
 582      truncate the result to the original destination.  Note that we can't
 583      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 584      X) 0)) is (reg:N X).  */
 585   if (GET_CODE (xop0) == SUBREG
 586       && REG_P (SUBREG_REG (xop0))
 587       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 588                                          op_mode))
 589     {
 590       rtx tem = gen_reg_rtx (op_mode);
 591       emit_move_insn (tem, xop0);
 592       xop0 = tem;
 593       copy_back = true;
 594     }
 595
 596   /* There are similar overflow check at the start of store_bit_field_1,
 597      but that only check the situation where the field lies completely
 598      outside the register, while there do have situation where the field
 599      lies partialy in the register, we need to adjust bitsize for this
 600      partial overflow situation.  Without this fix, pr48335-2.c on big-endian
 601      will broken on those arch support bit insert instruction, like arm, aarch64
 602      etc.  */
 603   if (bitsize + bitnum > unit && bitnum < unit)
 604     {
 605       warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
 606                "destination object, data truncated into %wu-bit",
 607                bitsize, unit - bitnum);
 608       bitsize = unit - bitnum;
 609     }
 610
 611   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 612      "backwards" from the size of the unit we are inserting into.
 613      Otherwise, we count bits from the most significant on a
 614      BYTES/BITS_BIG_ENDIAN machine.  */
 615
 616   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 617     bitnum = unit - bitsize - bitnum;
 618
 619   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 620   value1 = value;
 621   if (GET_MODE (value) != op_mode)
 622     {
 623       if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 624         {
 625           /* Optimization: Don't bother really extending VALUE
 626              if it has all the bits we will actually use.  However,
 627              if we must narrow it, be sure we do it correctly.  */
 628
 629           if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 630             {
 631               rtx tmp;
 632
 633               tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 634               if (! tmp)
 635                 tmp = simplify_gen_subreg (op_mode,
 636                                            force_reg (GET_MODE (value),
 637                                                       value1),
 638                                            GET_MODE (value), 0);
 639               value1 = tmp;
 640             }
 641           else
 642             value1 = gen_lowpart (op_mode, value1);
 643         }
 644       else if (CONST_INT_P (value))
 645         value1 = gen_int_mode (INTVAL (value), op_mode);
 646       else
 647         /* Parse phase is supposed to make VALUE's data type
 648            match that of the component reference, which is a type
 649            at least as wide as the field; so VALUE should have
 650            a mode that corresponds to that type.  */
 651         gcc_assert (CONSTANT_P (value));
 652     }
 653
 654   create_fixed_operand (&ops[0], xop0);
 655   create_integer_operand (&ops[1], bitsize);
 656   create_integer_operand (&ops[2], bitnum);
 657   create_input_operand (&ops[3], value1, op_mode);
 658   if (maybe_expand_insn (insv->icode, 4, ops))
 659     {
 660       if (copy_back)
 661         convert_move (op0, xop0, true);
 662       return true;
 663     }
 664   delete_insns_since (last);
 665   return false;
 666 }
 667
 668 /* A subroutine of store_bit_field, with the same arguments.  Return true
 669    if the operation could be implemented.
 670
 671    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 672    no other way of implementing the operation.  If FALLBACK_P is false,
 673    return false instead.  */
 674
 675 static bool
 676 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 677                    unsigned HOST_WIDE_INT bitnum,
 678                    unsigned HOST_WIDE_INT bitregion_start,
 679                    unsigned HOST_WIDE_INT bitregion_end,
 680                    machine_mode fieldmode,
 681                    rtx value, bool reverse, bool fallback_p)
 682 {
 683   rtx op0 = str_rtx;
 684   rtx orig_value;
 685
 686   while (GET_CODE (op0) == SUBREG)
 687     {
 688       /* The following line once was done only if WORDS_BIG_ENDIAN,
 689          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 690          meaningful at a much higher level; when structures are copied
 691          between memory and regs, the higher-numbered regs
 692          always get higher addresses.  */
 693       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 694       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 695       int byte_offset = 0;
 696
 697       /* Paradoxical subregs need special handling on big endian machines.  */
 698       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 699         {
 700           int difference = inner_mode_size - outer_mode_size;
 701
 702           if (WORDS_BIG_ENDIAN)
 703             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 704           if (BYTES_BIG_ENDIAN)
 705             byte_offset += difference % UNITS_PER_WORD;
 706         }
 707       else
 708         byte_offset = SUBREG_BYTE (op0);
 709
 710       bitnum += byte_offset * BITS_PER_UNIT;
 711       op0 = SUBREG_REG (op0);
 712     }
 713
 714   /* No action is needed if the target is a register and if the field
 715      lies completely outside that register.  This can occur if the source
 716      code contains an out-of-bounds access to a small array.  */
 717   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 718     return true;
 719
 720   /* Use vec_set patterns for inserting parts of vectors whenever
 721      available.  */
 722   if (VECTOR_MODE_P (GET_MODE (op0))
 723       && !MEM_P (op0)
 724       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
 725       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 726       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 727       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 728     {
 729       struct expand_operand ops[3];
 730       machine_mode outermode = GET_MODE (op0);
 731       machine_mode innermode = GET_MODE_INNER (outermode);
 732       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 733       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 734
 735       create_fixed_operand (&ops[0], op0);
 736       create_input_operand (&ops[1], value, innermode);
 737       create_integer_operand (&ops[2], pos);
 738       if (maybe_expand_insn (icode, 3, ops))
 739         return true;
 740     }
 741
 742   /* If the target is a register, overwriting the entire object, or storing
 743      a full-word or multi-word field can be done with just a SUBREG.  */
 744   if (!MEM_P (op0)
 745       && bitsize == GET_MODE_BITSIZE (fieldmode)
 746       && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
 747           || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
 748     {
 749       /* Use the subreg machinery either to narrow OP0 to the required
 750          words or to cope with mode punning between equal-sized modes.
 751          In the latter case, use subreg on the rhs side, not lhs.  */
 752       rtx sub;
 753
 754       if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
 755         {
 756           sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
 757           if (sub)
 758             {
 759               if (reverse)
 760                 sub = flip_storage_order (GET_MODE (op0), sub);
 761               emit_move_insn (op0, sub);
 762               return true;
 763             }
 764         }
 765       else
 766         {
 767           sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 768                                      bitnum / BITS_PER_UNIT);
 769           if (sub)
 770             {
 771               if (reverse)
 772                 value = flip_storage_order (fieldmode, value);
 773               emit_move_insn (sub, value);
 774               return true;
 775             }
 776         }
 777     }
 778
 779   /* If the target is memory, storing any naturally aligned field can be
 780      done with a simple store.  For targets that support fast unaligned
 781      memory, any naturally sized, unit aligned field can be done directly.  */
 782   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
 783     {
 784       op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
 785       if (reverse)
 786         value = flip_storage_order (fieldmode, value);
 787       emit_move_insn (op0, value);
 788       return true;
 789     }
 790
 791   /* Make sure we are playing with integral modes.  Pun with subregs
 792      if we aren't.  This must come after the entire register case above,
 793      since that case is valid for any mode.  The following cases are only
 794      valid for integral modes.  */
 795   {
 796     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 797     if (imode != GET_MODE (op0))
 798       {
 799         if (MEM_P (op0))
 800           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
 801         else
 802           {
 803             gcc_assert (imode != BLKmode);
 804             op0 = gen_lowpart (imode, op0);
 805           }
 806       }
 807   }
 808
 809   /* Storing an lsb-aligned field in a register
 810      can be done with a movstrict instruction.  */
 811
 812   if (!MEM_P (op0)
 813       && !reverse
 814       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
 815       && bitsize == GET_MODE_BITSIZE (fieldmode)
 816       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 817     {
 818       struct expand_operand ops[2];
 819       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 820       rtx arg0 = op0;
 821       unsigned HOST_WIDE_INT subreg_off;
 822
 823       if (GET_CODE (arg0) == SUBREG)
 824         {
 825           /* Else we've got some float mode source being extracted into
 826              a different float mode destination -- this combination of
 827              subregs results in Severe Tire Damage.  */
 828           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 829                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 830                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 831           arg0 = SUBREG_REG (arg0);
 832         }
 833
 834       subreg_off = bitnum / BITS_PER_UNIT;
 835       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 836         {
 837           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 838
 839           create_fixed_operand (&ops[0], arg0);
 840           /* Shrink the source operand to FIELDMODE.  */
 841           create_convert_operand_to (&ops[1], value, fieldmode, false);
 842           if (maybe_expand_insn (icode, 2, ops))
 843             return true;
 844         }
 845     }
 846
 847   /* Handle fields bigger than a word.  */
 848
 849   if (bitsize > BITS_PER_WORD)
 850     {
 851       /* Here we transfer the words of the field
 852          in the order least significant first.
 853          This is because the most significant word is the one which may
 854          be less than full.
 855          However, only do that if the value is not BLKmode.  */
 856
 857       const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 858       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 859       unsigned int i;
 860       rtx_insn *last;
 861
 862       /* This is the mode we must force value to, so that there will be enough
 863          subwords to extract.  Note that fieldmode will often (always?) be
 864          VOIDmode, because that is what store_field uses to indicate that this
 865          is a bit field, but passing VOIDmode to operand_subword_force
 866          is not allowed.  */
 867       fieldmode = GET_MODE (value);
 868       if (fieldmode == VOIDmode)
 869         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 870
 871       last = get_last_insn ();
 872       for (i = 0; i < nwords; i++)
 873         {
 874           /* If I is 0, use the low-order word in both field and target;
 875              if I is 1, use the next to lowest word; and so on.  */
 876           unsigned int wordnum = (backwards
 877                                   ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
 878                                   - i - 1
 879                                   : i);
 880           unsigned int bit_offset = (backwards ^ reverse
 881                                      ? MAX ((int) bitsize - ((int) i + 1)
 882                                             * BITS_PER_WORD,
 883                                             0)
 884                                      : (int) i * BITS_PER_WORD);
 885           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 886           unsigned HOST_WIDE_INT new_bitsize =
 887             MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 888
 889           /* If the remaining chunk doesn't have full wordsize we have
 890              to make sure that for big endian machines the higher order
 891              bits are used.  */
 892           if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
 893             value_word = simplify_expand_binop (word_mode, lshr_optab,
 894                                                 value_word,
 895                                                 GEN_INT (BITS_PER_WORD
 896                                                          - new_bitsize),
 897                                                 NULL_RTX, true,
 898                                                 OPTAB_LIB_WIDEN);
 899
 900           if (!store_bit_field_1 (op0, new_bitsize,
 901                                   bitnum + bit_offset,
 902                                   bitregion_start, bitregion_end,
 903                                   word_mode,
 904                                   value_word, reverse, fallback_p))
 905             {
 906               delete_insns_since (last);
 907               return false;
 908             }
 909         }
 910       return true;
 911     }
 912
 913   /* If VALUE has a floating-point or complex mode, access it as an
 914      integer of the corresponding size.  This can occur on a machine
 915      with 64 bit registers that uses SFmode for float.  It can also
 916      occur for unaligned float or complex fields.  */
 917   orig_value = value;
 918   if (GET_MODE (value) != VOIDmode
 919       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 920       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 921     {
 922       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 923       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 924     }
 925
 926   /* If OP0 is a multi-word register, narrow it to the affected word.
 927      If the region spans two words, defer to store_split_bit_field.  */
 928   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 929     {
 930       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
 931                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
 932       gcc_assert (op0);
 933       bitnum %= BITS_PER_WORD;
 934       if (bitnum + bitsize > BITS_PER_WORD)
 935         {
 936           if (!fallback_p)
 937             return false;
 938
 939           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
 940                                  bitregion_end, value, reverse);
 941           return true;
 942         }
 943     }
 944
 945   /* From here on we can assume that the field to be stored in fits
 946      within a word.  If the destination is a register, it too fits
 947      in a word.  */
 948
 949   extraction_insn insv;
 950   if (!MEM_P (op0)
 951       && !reverse
 952       && get_best_reg_extraction_insn (&insv, EP_insv,
 953                                        GET_MODE_BITSIZE (GET_MODE (op0)),
 954                                        fieldmode)
 955       && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 956     return true;
 957
 958   /* If OP0 is a memory, try copying it to a register and seeing if a
 959      cheap register alternative is available.  */
 960   if (MEM_P (op0) && !reverse)
 961     {
 962       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
 963                                         fieldmode)
 964           && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 965         return true;
 966
 967       rtx_insn *last = get_last_insn ();
 968
 969       /* Try loading part of OP0 into a register, inserting the bitfield
 970          into that, and then copying the result back to OP0.  */
 971       unsigned HOST_WIDE_INT bitpos;
 972       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
 973                                                bitregion_start, bitregion_end,
 974                                                fieldmode, &bitpos);
 975       if (xop0)
 976         {
 977           rtx tempreg = copy_to_reg (xop0);
 978           if (store_bit_field_1 (tempreg, bitsize, bitpos,
 979                                  bitregion_start, bitregion_end,
 980                                  fieldmode, orig_value, reverse, false))
 981             {
 982               emit_move_insn (xop0, tempreg);
 983               return true;
 984             }
 985           delete_insns_since (last);
 986         }
 987     }
 988
 989   if (!fallback_p)
 990     return false;
 991
 992   store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
 993                          bitregion_end, value, reverse);
 994   return true;
 995 }
 996
 997 /* Generate code to store value from rtx VALUE
 998    into a bit-field within structure STR_RTX
 999    containing BITSIZE bits starting at bit BITNUM.
1000
1001    BITREGION_START is bitpos of the first bitfield in this region.
1002    BITREGION_END is the bitpos of the ending bitfield in this region.
1003    These two fields are 0, if the C++ memory model does not apply,
1004    or we are not interested in keeping track of bitfield regions.
1005
1006    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1007
1008    If REVERSE is true, the store is to be done in reverse order.  */
1009
1010 void
1011 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1012                  unsigned HOST_WIDE_INT bitnum,
1013                  unsigned HOST_WIDE_INT bitregion_start,
1014                  unsigned HOST_WIDE_INT bitregion_end,
1015                  machine_mode fieldmode,
1016                  rtx value, bool reverse)
1017 {
1018   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1019   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode,
1020                                   bitregion_start, bitregion_end))
1021     {
1022       /* Storing of a full word can be done with a simple store.
1023          We know here that the field can be accessed with one single
1024          instruction.  For targets that support unaligned memory,
1025          an unaligned access may be necessary.  */
1026       if (bitsize == GET_MODE_BITSIZE (fieldmode))
1027         {
1028           str_rtx = adjust_bitfield_address (str_rtx, fieldmode,
1029                                              bitnum / BITS_PER_UNIT);
1030           if (reverse)
1031             value = flip_storage_order (fieldmode, value);
1032           gcc_assert (bitnum % BITS_PER_UNIT == 0);
1033           emit_move_insn (str_rtx, value);
1034         }
1035       else
1036         {
1037           rtx temp;
1038
1039           str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum,
1040                                           &bitnum);
1041           gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (fieldmode));
1042           temp = copy_to_reg (str_rtx);
1043           if (!store_bit_field_1 (temp, bitsize, bitnum, 0, 0,
1044                                   fieldmode, value, reverse, true))
1045             gcc_unreachable ();
1046
1047           emit_move_insn (str_rtx, temp);
1048         }
1049
1050       return;
1051     }
1052
1053   /* Under the C++0x memory model, we must not touch bits outside the
1054      bit region.  Adjust the address to start at the beginning of the
1055      bit region.  */
1056   if (MEM_P (str_rtx) && bitregion_start > 0)
1057     {
1058       machine_mode bestmode;
1059       HOST_WIDE_INT offset, size;
1060
1061       gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
1062
1063       offset = bitregion_start / BITS_PER_UNIT;
1064       bitnum -= bitregion_start;
1065       size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
1066       bitregion_end -= bitregion_start;
1067       bitregion_start = 0;
1068       bestmode = get_best_mode (bitsize, bitnum,
1069                                 bitregion_start, bitregion_end,
1070                                 MEM_ALIGN (str_rtx), VOIDmode,
1071                                 MEM_VOLATILE_P (str_rtx));
1072       str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
1073     }
1074
1075   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1076                           bitregion_start, bitregion_end,
1077                           fieldmode, value, reverse, true))
1078     gcc_unreachable ();
1079 }
1080 \f
1081 /* Use shifts and boolean operations to store VALUE into a bit field of
1082    width BITSIZE in OP0, starting at bit BITNUM.
1083
1084    If REVERSE is true, the store is to be done in reverse order.  */
1085
1086 static void
1087 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1088                        unsigned HOST_WIDE_INT bitnum,
1089                        unsigned HOST_WIDE_INT bitregion_start,
1090                        unsigned HOST_WIDE_INT bitregion_end,
1091                        rtx value, bool reverse)
1092 {
1093   /* There is a case not handled here:
1094      a structure with a known alignment of just a halfword
1095      and a field split across two aligned halfwords within the structure.
1096      Or likewise a structure with a known alignment of just a byte
1097      and a field split across two bytes.
1098      Such cases are not supposed to be able to occur.  */
1099
1100   if (MEM_P (op0))
1101     {
1102       machine_mode mode = GET_MODE (op0);
1103       if (GET_MODE_BITSIZE (mode) == 0
1104           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
1105         mode = word_mode;
1106       mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1107                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
1108
1109       if (mode == VOIDmode)
1110         {
1111           /* The only way this should occur is if the field spans word
1112              boundaries.  */
1113           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
1114                                  bitregion_end, value, reverse);
1115           return;
1116         }
1117
1118       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1119     }
1120
1121   store_fixed_bit_field_1 (op0, bitsize, bitnum, value, reverse);
1122 }
1123
1124 /* Helper function for store_fixed_bit_field, stores
1125    the bit field always using the MODE of OP0.  */
1126
1127 static void
1128 store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize,
1129                          unsigned HOST_WIDE_INT bitnum,
1130                          rtx value, bool reverse)
1131 {
1132   machine_mode mode;
1133   rtx temp;
1134   int all_zero = 0;
1135   int all_one = 0;
1136
1137   mode = GET_MODE (op0);
1138   gcc_assert (SCALAR_INT_MODE_P (mode));
1139
1140   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1141      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1142
1143   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1144     /* BITNUM is the distance between our msb
1145        and that of the containing datum.
1146        Convert it to the distance from the lsb.  */
1147     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1148
1149   /* Now BITNUM is always the distance between our lsb
1150      and that of OP0.  */
1151
1152   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1153      we must first convert its mode to MODE.  */
1154
1155   if (CONST_INT_P (value))
1156     {
1157       unsigned HOST_WIDE_INT v = UINTVAL (value);
1158
1159       if (bitsize < HOST_BITS_PER_WIDE_INT)
1160         v &= ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1;
1161
1162       if (v == 0)
1163         all_zero = 1;
1164       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1165                 && v == ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1)
1166                || (bitsize == HOST_BITS_PER_WIDE_INT
1167                    && v == (unsigned HOST_WIDE_INT) -1))
1168         all_one = 1;
1169
1170       value = lshift_value (mode, v, bitnum);
1171     }
1172   else
1173     {
1174       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1175                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1176
1177       if (GET_MODE (value) != mode)
1178         value = convert_to_mode (mode, value, 1);
1179
1180       if (must_and)
1181         value = expand_binop (mode, and_optab, value,
1182                               mask_rtx (mode, 0, bitsize, 0),
1183                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1184       if (bitnum > 0)
1185         value = expand_shift (LSHIFT_EXPR, mode, value,
1186                               bitnum, NULL_RTX, 1);
1187     }
1188
1189   if (reverse)
1190     value = flip_storage_order (mode, value);
1191
1192   /* Now clear the chosen bits in OP0,
1193      except that if VALUE is -1 we need not bother.  */
1194   /* We keep the intermediates in registers to allow CSE to combine
1195      consecutive bitfield assignments.  */
1196
1197   temp = force_reg (mode, op0);
1198
1199   if (! all_one)
1200     {
1201       rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1202       if (reverse)
1203         mask = flip_storage_order (mode, mask);
1204       temp = expand_binop (mode, and_optab, temp, mask,
1205                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1206       temp = force_reg (mode, temp);
1207     }
1208
1209   /* Now logical-or VALUE into OP0, unless it is zero.  */
1210
1211   if (! all_zero)
1212     {
1213       temp = expand_binop (mode, ior_optab, temp, value,
1214                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1215       temp = force_reg (mode, temp);
1216     }
1217
1218   if (op0 != temp)
1219     {
1220       op0 = copy_rtx (op0);
1221       emit_move_insn (op0, temp);
1222     }
1223 }
1224 \f
1225 /* Store a bit field that is split across multiple accessible memory objects.
1226
1227    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1228    BITSIZE is the field width; BITPOS the position of its first bit
1229    (within the word).
1230    VALUE is the value to store.
1231
1232    If REVERSE is true, the store is to be done in reverse order.
1233
1234    This does not yet handle fields wider than BITS_PER_WORD.  */
1235
1236 static void
1237 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1238                        unsigned HOST_WIDE_INT bitpos,
1239                        unsigned HOST_WIDE_INT bitregion_start,
1240                        unsigned HOST_WIDE_INT bitregion_end,
1241                        rtx value, bool reverse)
1242 {
1243   unsigned int unit, total_bits, bitsdone = 0;
1244
1245   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1246      much at a time.  */
1247   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1248     unit = BITS_PER_WORD;
1249   else
1250     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1251
1252   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1253      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1254      again, and we will mutually recurse forever.  */
1255   if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1256     unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0)));
1257
1258   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1259      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1260      that VALUE might be a floating-point constant.  */
1261   if (CONSTANT_P (value) && !CONST_INT_P (value))
1262     {
1263       rtx word = gen_lowpart_common (word_mode, value);
1264
1265       if (word && (value != word))
1266         value = word;
1267       else
1268         value = gen_lowpart_common (word_mode,
1269                                     force_reg (GET_MODE (value) != VOIDmode
1270                                                ? GET_MODE (value)
1271                                                : word_mode, value));
1272     }
1273
1274   total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1275
1276   while (bitsdone < bitsize)
1277     {
1278       unsigned HOST_WIDE_INT thissize;
1279       unsigned HOST_WIDE_INT thispos;
1280       unsigned HOST_WIDE_INT offset;
1281       rtx part, word;
1282
1283       offset = (bitpos + bitsdone) / unit;
1284       thispos = (bitpos + bitsdone) % unit;
1285
1286       /* When region of bytes we can touch is restricted, decrease
1287          UNIT close to the end of the region as needed.  If op0 is a REG
1288          or SUBREG of REG, don't do this, as there can't be data races
1289          on a register and we can expand shorter code in some cases.  */
1290       if (bitregion_end
1291           && unit > BITS_PER_UNIT
1292           && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1293           && !REG_P (op0)
1294           && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1295         {
1296           unit = unit / 2;
1297           continue;
1298         }
1299
1300       /* THISSIZE must not overrun a word boundary.  Otherwise,
1301          store_fixed_bit_field will call us again, and we will mutually
1302          recurse forever.  */
1303       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1304       thissize = MIN (thissize, unit - thispos);
1305
1306       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1307         {
1308           /* Fetch successively less significant portions.  */
1309           if (CONST_INT_P (value))
1310             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1311                              >> (bitsize - bitsdone - thissize))
1312                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1313           /* Likewise, but the source is little-endian.  */
1314           else if (reverse)
1315             part = extract_fixed_bit_field (word_mode, value, thissize,
1316                                             bitsize - bitsdone - thissize,
1317                                             NULL_RTX, 1, false);
1318           else
1319             {
1320               int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1321               /* The args are chosen so that the last part includes the
1322                  lsb.  Give extract_bit_field the value it needs (with
1323                  endianness compensation) to fetch the piece we want.  */
1324               part = extract_fixed_bit_field (word_mode, value, thissize,
1325                                               total_bits - bitsize + bitsdone,
1326                                               NULL_RTX, 1, false);
1327             }
1328         }
1329       else
1330         {
1331           /* Fetch successively more significant portions.  */
1332           if (CONST_INT_P (value))
1333             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1334                              >> bitsdone)
1335                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1336           /* Likewise, but the source is big-endian.  */
1337           else if (reverse)
1338             part = extract_fixed_bit_field (word_mode, value, thissize,
1339                                             total_bits - bitsdone - thissize,
1340                                             NULL_RTX, 1, false);
1341           else
1342             part = extract_fixed_bit_field (word_mode, value, thissize,
1343                                             bitsdone, NULL_RTX, 1, false);
1344         }
1345
1346       /* If OP0 is a register, then handle OFFSET here.
1347
1348          When handling multiword bitfields, extract_bit_field may pass
1349          down a word_mode SUBREG of a larger REG for a bitfield that actually
1350          crosses a word boundary.  Thus, for a SUBREG, we must find
1351          the current word starting from the base register.  */
1352       if (GET_CODE (op0) == SUBREG)
1353         {
1354           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD)
1355                             + (offset * unit / BITS_PER_WORD);
1356           machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1357           if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1358             word = word_offset ? const0_rtx : op0;
1359           else
1360             word = operand_subword_force (SUBREG_REG (op0), word_offset,
1361                                           GET_MODE (SUBREG_REG (op0)));
1362           offset &= BITS_PER_WORD / unit - 1;
1363         }
1364       else if (REG_P (op0))
1365         {
1366           machine_mode op0_mode = GET_MODE (op0);
1367           if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1368             word = offset ? const0_rtx : op0;
1369           else
1370             word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1371                                           GET_MODE (op0));
1372           offset &= BITS_PER_WORD / unit - 1;
1373         }
1374       else
1375         word = op0;
1376
1377       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1378          it is just an out-of-bounds access.  Ignore it.  */
1379       if (word != const0_rtx)
1380         store_fixed_bit_field (word, thissize, offset * unit + thispos,
1381                                bitregion_start, bitregion_end, part,
1382                                reverse);
1383       bitsdone += thissize;
1384     }
1385 }
1386 \f
1387 /* A subroutine of extract_bit_field_1 that converts return value X
1388    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1389    to extract_bit_field.  */
1390
1391 static rtx
1392 convert_extracted_bit_field (rtx x, machine_mode mode,
1393                              machine_mode tmode, bool unsignedp)
1394 {
1395   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1396     return x;
1397
1398   /* If the x mode is not a scalar integral, first convert to the
1399      integer mode of that size and then access it as a floating-point
1400      value via a SUBREG.  */
1401   if (!SCALAR_INT_MODE_P (tmode))
1402     {
1403       machine_mode smode;
1404
1405       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1406       x = convert_to_mode (smode, x, unsignedp);
1407       x = force_reg (smode, x);
1408       return gen_lowpart (tmode, x);
1409     }
1410
1411   return convert_to_mode (tmode, x, unsignedp);
1412 }
1413
1414 /* Try to use an ext(z)v pattern to extract a field from OP0.
1415    Return the extracted value on success, otherwise return null.
1416    EXT_MODE is the mode of the extraction and the other arguments
1417    are as for extract_bit_field.  */
1418
1419 static rtx
1420 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1421                               unsigned HOST_WIDE_INT bitsize,
1422                               unsigned HOST_WIDE_INT bitnum,
1423                               int unsignedp, rtx target,
1424                               machine_mode mode, machine_mode tmode)
1425 {
1426   struct expand_operand ops[4];
1427   rtx spec_target = target;
1428   rtx spec_target_subreg = 0;
1429   machine_mode ext_mode = extv->field_mode;
1430   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1431
1432   if (bitsize == 0 || unit < bitsize)
1433     return NULL_RTX;
1434
1435   if (MEM_P (op0))
1436     /* Get a reference to the first byte of the field.  */
1437     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1438                                 &bitnum);
1439   else
1440     {
1441       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1442       if (BYTES_BIG_ENDIAN)
1443         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1444
1445       /* If op0 is a register, we need it in EXT_MODE to make it
1446          acceptable to the format of ext(z)v.  */
1447       if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1448         return NULL_RTX;
1449       if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1450         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1451     }
1452
1453   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1454      "backwards" from the size of the unit we are extracting from.
1455      Otherwise, we count bits from the most significant on a
1456      BYTES/BITS_BIG_ENDIAN machine.  */
1457
1458   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1459     bitnum = unit - bitsize - bitnum;
1460
1461   if (target == 0)
1462     target = spec_target = gen_reg_rtx (tmode);
1463
1464   if (GET_MODE (target) != ext_mode)
1465     {
1466       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1467          between the mode of the extraction (word_mode) and the target
1468          mode.  Instead, create a temporary and use convert_move to set
1469          the target.  */
1470       if (REG_P (target)
1471           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1472         {
1473           target = gen_lowpart (ext_mode, target);
1474           if (GET_MODE_PRECISION (ext_mode)
1475               > GET_MODE_PRECISION (GET_MODE (spec_target)))
1476             spec_target_subreg = target;
1477         }
1478       else
1479         target = gen_reg_rtx (ext_mode);
1480     }
1481
1482   create_output_operand (&ops[0], target, ext_mode);
1483   create_fixed_operand (&ops[1], op0);
1484   create_integer_operand (&ops[2], bitsize);
1485   create_integer_operand (&ops[3], bitnum);
1486   if (maybe_expand_insn (extv->icode, 4, ops))
1487     {
1488       target = ops[0].value;
1489       if (target == spec_target)
1490         return target;
1491       if (target == spec_target_subreg)
1492         return spec_target;
1493       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1494     }
1495   return NULL_RTX;
1496 }
1497
1498 /* A subroutine of extract_bit_field, with the same arguments.
1499    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1500    if we can find no other means of implementing the operation.
1501    if FALLBACK_P is false, return NULL instead.  */
1502
1503 static rtx
1504 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1505                      unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1506                      machine_mode mode, machine_mode tmode,
1507                      bool reverse, bool fallback_p)
1508 {
1509   rtx op0 = str_rtx;
1510   machine_mode int_mode;
1511   machine_mode mode1;
1512
1513   if (tmode == VOIDmode)
1514     tmode = mode;
1515
1516   while (GET_CODE (op0) == SUBREG)
1517     {
1518       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1519       op0 = SUBREG_REG (op0);
1520     }
1521
1522   /* If we have an out-of-bounds access to a register, just return an
1523      uninitialized register of the required mode.  This can occur if the
1524      source code contains an out-of-bounds access to a small array.  */
1525   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1526     return gen_reg_rtx (tmode);
1527
1528   if (REG_P (op0)
1529       && mode == GET_MODE (op0)
1530       && bitnum == 0
1531       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1532     {
1533       if (reverse)
1534         op0 = flip_storage_order (mode, op0);
1535       /* We're trying to extract a full register from itself.  */
1536       return op0;
1537     }
1538
1539   /* See if we can get a better vector mode before extracting.  */
1540   if (VECTOR_MODE_P (GET_MODE (op0))
1541       && !MEM_P (op0)
1542       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1543     {
1544       machine_mode new_mode;
1545
1546       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1547         new_mode = MIN_MODE_VECTOR_FLOAT;
1548       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1549         new_mode = MIN_MODE_VECTOR_FRACT;
1550       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1551         new_mode = MIN_MODE_VECTOR_UFRACT;
1552       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1553         new_mode = MIN_MODE_VECTOR_ACCUM;
1554       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1555         new_mode = MIN_MODE_VECTOR_UACCUM;
1556       else
1557         new_mode = MIN_MODE_VECTOR_INT;
1558
1559       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1560         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1561             && targetm.vector_mode_supported_p (new_mode))
1562           break;
1563       if (new_mode != VOIDmode)
1564         op0 = gen_lowpart (new_mode, op0);
1565     }
1566
1567   /* Use vec_extract patterns for extracting parts of vectors whenever
1568      available.  */
1569   if (VECTOR_MODE_P (GET_MODE (op0))
1570       && !MEM_P (op0)
1571       && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1572       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1573           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1574     {
1575       struct expand_operand ops[3];
1576       machine_mode outermode = GET_MODE (op0);
1577       machine_mode innermode = GET_MODE_INNER (outermode);
1578       enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1579       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1580
1581       create_output_operand (&ops[0], target, innermode);
1582       create_input_operand (&ops[1], op0, outermode);
1583       create_integer_operand (&ops[2], pos);
1584       if (maybe_expand_insn (icode, 3, ops))
1585         {
1586           target = ops[0].value;
1587           if (GET_MODE (target) != mode)
1588             return gen_lowpart (tmode, target);
1589           return target;
1590         }
1591     }
1592
1593   /* Make sure we are playing with integral modes.  Pun with subregs
1594      if we aren't.  */
1595   {
1596     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1597     if (imode != GET_MODE (op0))
1598       {
1599         if (MEM_P (op0))
1600           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1601         else if (imode != BLKmode)
1602           {
1603             op0 = gen_lowpart (imode, op0);
1604
1605             /* If we got a SUBREG, force it into a register since we
1606                aren't going to be able to do another SUBREG on it.  */
1607             if (GET_CODE (op0) == SUBREG)
1608               op0 = force_reg (imode, op0);
1609           }
1610         else if (REG_P (op0))
1611           {
1612             rtx reg, subreg;
1613             imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1614                                             MODE_INT);
1615             reg = gen_reg_rtx (imode);
1616             subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1617             emit_move_insn (subreg, op0);
1618             op0 = reg;
1619             bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1620           }
1621         else
1622           {
1623             HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1624             rtx mem = assign_stack_temp (GET_MODE (op0), size);
1625             emit_move_insn (mem, op0);
1626             op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1627           }
1628       }
1629   }
1630
1631   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1632      If that's wrong, the solution is to test for it and set TARGET to 0
1633      if needed.  */
1634
1635   /* Get the mode of the field to use for atomic access or subreg
1636      conversion.  */
1637   mode1 = mode;
1638   if (SCALAR_INT_MODE_P (tmode))
1639     {
1640       machine_mode try_mode = mode_for_size (bitsize,
1641                                                   GET_MODE_CLASS (tmode), 0);
1642       if (try_mode != BLKmode)
1643         mode1 = try_mode;
1644     }
1645   gcc_assert (mode1 != BLKmode);
1646
1647   /* Extraction of a full MODE1 value can be done with a subreg as long
1648      as the least significant bit of the value is the least significant
1649      bit of either OP0 or a word of OP0.  */
1650   if (!MEM_P (op0)
1651       && !reverse
1652       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1653       && bitsize == GET_MODE_BITSIZE (mode1)
1654       && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1655     {
1656       rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1657                                      bitnum / BITS_PER_UNIT);
1658       if (sub)
1659         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1660     }
1661
1662   /* Extraction of a full MODE1 value can be done with a load as long as
1663      the field is on a byte boundary and is sufficiently aligned.  */
1664   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1665     {
1666       op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1667       if (reverse)
1668         op0 = flip_storage_order (mode1, op0);
1669       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1670     }
1671
1672   /* Handle fields bigger than a word.  */
1673
1674   if (bitsize > BITS_PER_WORD)
1675     {
1676       /* Here we transfer the words of the field
1677          in the order least significant first.
1678          This is because the most significant word is the one which may
1679          be less than full.  */
1680
1681       const bool backwards = WORDS_BIG_ENDIAN;
1682       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1683       unsigned int i;
1684       rtx_insn *last;
1685
1686       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1687         target = gen_reg_rtx (mode);
1688
1689       /* Indicate for flow that the entire target reg is being set.  */
1690       emit_clobber (target);
1691
1692       last = get_last_insn ();
1693       for (i = 0; i < nwords; i++)
1694         {
1695           /* If I is 0, use the low-order word in both field and target;
1696              if I is 1, use the next to lowest word; and so on.  */
1697           /* Word number in TARGET to use.  */
1698           unsigned int wordnum
1699             = (backwards
1700                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1701                : i);
1702           /* Offset from start of field in OP0.  */
1703           unsigned int bit_offset = (backwards ^ reverse
1704                                      ? MAX ((int) bitsize - ((int) i + 1)
1705                                             * BITS_PER_WORD,
1706                                             0)
1707                                      : (int) i * BITS_PER_WORD);
1708           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1709           rtx result_part
1710             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1711                                              bitsize - i * BITS_PER_WORD),
1712                                    bitnum + bit_offset, 1, target_part,
1713                                    mode, word_mode, reverse, fallback_p);
1714
1715           gcc_assert (target_part);
1716           if (!result_part)
1717             {
1718               delete_insns_since (last);
1719               return NULL;
1720             }
1721
1722           if (result_part != target_part)
1723             emit_move_insn (target_part, result_part);
1724         }
1725
1726       if (unsignedp)
1727         {
1728           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1729              need to be zero'd out.  */
1730           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1731             {
1732               unsigned int i, total_words;
1733
1734               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1735               for (i = nwords; i < total_words; i++)
1736                 emit_move_insn
1737                   (operand_subword (target,
1738                                     backwards ? total_words - i - 1 : i,
1739                                     1, VOIDmode),
1740                    const0_rtx);
1741             }
1742           return target;
1743         }
1744
1745       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1746       target = expand_shift (LSHIFT_EXPR, mode, target,
1747                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1748       return expand_shift (RSHIFT_EXPR, mode, target,
1749                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1750     }
1751
1752   /* If OP0 is a multi-word register, narrow it to the affected word.
1753      If the region spans two words, defer to extract_split_bit_field.  */
1754   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1755     {
1756       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1757                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1758       bitnum %= BITS_PER_WORD;
1759       if (bitnum + bitsize > BITS_PER_WORD)
1760         {
1761           if (!fallback_p)
1762             return NULL_RTX;
1763           target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
1764                                             reverse);
1765           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1766         }
1767     }
1768
1769   /* From here on we know the desired field is smaller than a word.
1770      If OP0 is a register, it too fits within a word.  */
1771   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1772   extraction_insn extv;
1773   if (!MEM_P (op0)
1774       && !reverse
1775       /* ??? We could limit the structure size to the part of OP0 that
1776          contains the field, with appropriate checks for endianness
1777          and TRULY_NOOP_TRUNCATION.  */
1778       && get_best_reg_extraction_insn (&extv, pattern,
1779                                        GET_MODE_BITSIZE (GET_MODE (op0)),
1780                                        tmode))
1781     {
1782       rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1783                                                  unsignedp, target, mode,
1784                                                  tmode);
1785       if (result)
1786         return result;
1787     }
1788
1789   /* If OP0 is a memory, try copying it to a register and seeing if a
1790      cheap register alternative is available.  */
1791   if (MEM_P (op0) & !reverse)
1792     {
1793       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1794                                         tmode))
1795         {
1796           rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1797                                                      bitnum, unsignedp,
1798                                                      target, mode,
1799                                                      tmode);
1800           if (result)
1801             return result;
1802         }
1803
1804       rtx_insn *last = get_last_insn ();
1805
1806       /* Try loading part of OP0 into a register and extracting the
1807          bitfield from that.  */
1808       unsigned HOST_WIDE_INT bitpos;
1809       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1810                                                0, 0, tmode, &bitpos);
1811       if (xop0)
1812         {
1813           xop0 = copy_to_reg (xop0);
1814           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1815                                             unsignedp, target,
1816                                             mode, tmode, reverse, false);
1817           if (result)
1818             return result;
1819           delete_insns_since (last);
1820         }
1821     }
1822
1823   if (!fallback_p)
1824     return NULL;
1825
1826   /* Find a correspondingly-sized integer field, so we can apply
1827      shifts and masks to it.  */
1828   int_mode = int_mode_for_mode (tmode);
1829   if (int_mode == BLKmode)
1830     int_mode = int_mode_for_mode (mode);
1831   /* Should probably push op0 out to memory and then do a load.  */
1832   gcc_assert (int_mode != BLKmode);
1833
1834   target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum,
1835                                     target, unsignedp, reverse);
1836   return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1837 }
1838
1839 /* Generate code to extract a byte-field from STR_RTX
1840    containing BITSIZE bits, starting at BITNUM,
1841    and put it in TARGET if possible (if TARGET is nonzero).
1842    Regardless of TARGET, we return the rtx for where the value is placed.
1843
1844    STR_RTX is the structure containing the byte (a REG or MEM).
1845    UNSIGNEDP is nonzero if this is an unsigned bit field.
1846    MODE is the natural mode of the field value once extracted.
1847    TMODE is the mode the caller would like the value to have;
1848    but the value may be returned with type MODE instead.
1849
1850    If REVERSE is true, the extraction is to be done in reverse order.
1851
1852    If a TARGET is specified and we can store in it at no extra cost,
1853    we do so, and return TARGET.
1854    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1855    if they are equally easy.  */
1856
1857 rtx
1858 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1859                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1860                    machine_mode mode, machine_mode tmode, bool reverse)
1861 {
1862   machine_mode mode1;
1863
1864   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1865   if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1866     mode1 = GET_MODE (str_rtx);
1867   else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1868     mode1 = GET_MODE (target);
1869   else
1870     mode1 = tmode;
1871
1872   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0))
1873     {
1874       /* Extraction of a full MODE1 value can be done with a simple load.
1875          We know here that the field can be accessed with one single
1876          instruction.  For targets that support unaligned memory,
1877          an unaligned access may be necessary.  */
1878       if (bitsize == GET_MODE_BITSIZE (mode1))
1879         {
1880           rtx result = adjust_bitfield_address (str_rtx, mode1,
1881                                                 bitnum / BITS_PER_UNIT);
1882           if (reverse)
1883             result = flip_storage_order (mode1, result);
1884           gcc_assert (bitnum % BITS_PER_UNIT == 0);
1885           return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1886         }
1887
1888       str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum,
1889                                       &bitnum);
1890       gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (mode1));
1891       str_rtx = copy_to_reg (str_rtx);
1892     }
1893
1894   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1895                               target, mode, tmode, reverse, true);
1896 }
1897 \f
1898 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1899    from bit BITNUM of OP0.
1900
1901    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1902    If REVERSE is true, the extraction is to be done in reverse order.
1903
1904    If TARGET is nonzero, attempts to store the value there
1905    and return TARGET, but this is not guaranteed.
1906    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1907
1908 static rtx
1909 extract_fixed_bit_field (machine_mode tmode, rtx op0,
1910                          unsigned HOST_WIDE_INT bitsize,
1911                          unsigned HOST_WIDE_INT bitnum, rtx target,
1912                          int unsignedp, bool reverse)
1913 {
1914   if (MEM_P (op0))
1915     {
1916       machine_mode mode
1917         = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode,
1918                          MEM_VOLATILE_P (op0));
1919
1920       if (mode == VOIDmode)
1921         /* The only way this should occur is if the field spans word
1922            boundaries.  */
1923         return extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
1924                                         reverse);
1925
1926       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1927     }
1928
1929   return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum,
1930                                     target, unsignedp, reverse);
1931 }
1932
1933 /* Helper function for extract_fixed_bit_field, extracts
1934    the bit field always using the MODE of OP0.  */
1935
1936 static rtx
1937 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0,
1938                            unsigned HOST_WIDE_INT bitsize,
1939                            unsigned HOST_WIDE_INT bitnum, rtx target,
1940                            int unsignedp, bool reverse)
1941 {
1942   machine_mode mode = GET_MODE (op0);
1943   gcc_assert (SCALAR_INT_MODE_P (mode));
1944
1945   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1946      for invalid input, such as extract equivalent of f5 from
1947      gcc.dg/pr48335-2.c.  */
1948
1949   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1950     /* BITNUM is the distance between our msb and that of OP0.
1951        Convert it to the distance from the lsb.  */
1952     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1953
1954   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1955      We have reduced the big-endian case to the little-endian case.  */
1956   if (reverse)
1957     op0 = flip_storage_order (mode, op0);
1958
1959   if (unsignedp)
1960     {
1961       if (bitnum)
1962         {
1963           /* If the field does not already start at the lsb,
1964              shift it so it does.  */
1965           /* Maybe propagate the target for the shift.  */
1966           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1967           if (tmode != mode)
1968             subtarget = 0;
1969           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
1970         }
1971       /* Convert the value to the desired mode.  */
1972       if (mode != tmode)
1973         op0 = convert_to_mode (tmode, op0, 1);
1974
1975       /* Unless the msb of the field used to be the msb when we shifted,
1976          mask out the upper bits.  */
1977
1978       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
1979         return expand_binop (GET_MODE (op0), and_optab, op0,
1980                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1981                              target, 1, OPTAB_LIB_WIDEN);
1982       return op0;
1983     }
1984
1985   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1986      then arithmetic-shift its lsb to the lsb of the word.  */
1987   op0 = force_reg (mode, op0);
1988
1989   /* Find the narrowest integer mode that contains the field.  */
1990
1991   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1992        mode = GET_MODE_WIDER_MODE (mode))
1993     if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
1994       {
1995         op0 = convert_to_mode (mode, op0, 0);
1996         break;
1997       }
1998
1999   if (mode != tmode)
2000     target = 0;
2001
2002   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2003     {
2004       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2005       /* Maybe propagate the target for the shift.  */
2006       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2007       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2008     }
2009
2010   return expand_shift (RSHIFT_EXPR, mode, op0,
2011                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2012 }
2013
2014 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2015    VALUE << BITPOS.  */
2016
2017 static rtx
2018 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2019               int bitpos)
2020 {
2021   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2022 }
2023 \f
2024 /* Extract a bit field that is split across two words
2025    and return an RTX for the result.
2026
2027    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2028    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2029    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2030
2031    If REVERSE is true, the extraction is to be done in reverse order.  */
2032
2033 static rtx
2034 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
2035                          unsigned HOST_WIDE_INT bitpos, int unsignedp,
2036                          bool reverse)
2037 {
2038   unsigned int unit;
2039   unsigned int bitsdone = 0;
2040   rtx result = NULL_RTX;
2041   int first = 1;
2042
2043   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2044      much at a time.  */
2045   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2046     unit = BITS_PER_WORD;
2047   else
2048     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2049
2050   while (bitsdone < bitsize)
2051     {
2052       unsigned HOST_WIDE_INT thissize;
2053       rtx part, word;
2054       unsigned HOST_WIDE_INT thispos;
2055       unsigned HOST_WIDE_INT offset;
2056
2057       offset = (bitpos + bitsdone) / unit;
2058       thispos = (bitpos + bitsdone) % unit;
2059
2060       /* THISSIZE must not overrun a word boundary.  Otherwise,
2061          extract_fixed_bit_field will call us again, and we will mutually
2062          recurse forever.  */
2063       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2064       thissize = MIN (thissize, unit - thispos);
2065
2066       /* If OP0 is a register, then handle OFFSET here.
2067
2068          When handling multiword bitfields, extract_bit_field may pass
2069          down a word_mode SUBREG of a larger REG for a bitfield that actually
2070          crosses a word boundary.  Thus, for a SUBREG, we must find
2071          the current word starting from the base register.  */
2072       if (GET_CODE (op0) == SUBREG)
2073         {
2074           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
2075           word = operand_subword_force (SUBREG_REG (op0), word_offset,
2076                                         GET_MODE (SUBREG_REG (op0)));
2077           offset = 0;
2078         }
2079       else if (REG_P (op0))
2080         {
2081           word = operand_subword_force (op0, offset, GET_MODE (op0));
2082           offset = 0;
2083         }
2084       else
2085         word = op0;
2086
2087       /* Extract the parts in bit-counting order,
2088          whose meaning is determined by BYTES_PER_UNIT.
2089          OFFSET is in UNITs, and UNIT is in bits.  */
2090       part = extract_fixed_bit_field (word_mode, word, thissize,
2091                                       offset * unit + thispos, 0, 1, reverse);
2092       bitsdone += thissize;
2093
2094       /* Shift this part into place for the result.  */
2095       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2096         {
2097           if (bitsize != bitsdone)
2098             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2099                                  bitsize - bitsdone, 0, 1);
2100         }
2101       else
2102         {
2103           if (bitsdone != thissize)
2104             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2105                                  bitsdone - thissize, 0, 1);
2106         }
2107
2108       if (first)
2109         result = part;
2110       else
2111         /* Combine the parts with bitwise or.  This works
2112            because we extracted each part as an unsigned bit field.  */
2113         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2114                                OPTAB_LIB_WIDEN);
2115
2116       first = 0;
2117     }
2118
2119   /* Unsigned bit field: we are done.  */
2120   if (unsignedp)
2121     return result;
2122   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2123   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2124                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2125   return expand_shift (RSHIFT_EXPR, word_mode, result,
2126                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2127 }
2128 \f
2129 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2130    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2131    MODE, fill the upper bits with zeros.  Fail if the layout of either
2132    mode is unknown (as for CC modes) or if the extraction would involve
2133    unprofitable mode punning.  Return the value on success, otherwise
2134    return null.
2135
2136    This is different from gen_lowpart* in these respects:
2137
2138      - the returned value must always be considered an rvalue
2139
2140      - when MODE is wider than SRC_MODE, the extraction involves
2141        a zero extension
2142
2143      - when MODE is smaller than SRC_MODE, the extraction involves
2144        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2145
2146    In other words, this routine performs a computation, whereas the
2147    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2148    operations.  */
2149
2150 rtx
2151 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2152 {
2153   machine_mode int_mode, src_int_mode;
2154
2155   if (mode == src_mode)
2156     return src;
2157
2158   if (CONSTANT_P (src))
2159     {
2160       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2161          fails, it will happily create (subreg (symbol_ref)) or similar
2162          invalid SUBREGs.  */
2163       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2164       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2165       if (ret)
2166         return ret;
2167
2168       if (GET_MODE (src) == VOIDmode
2169           || !validate_subreg (mode, src_mode, src, byte))
2170         return NULL_RTX;
2171
2172       src = force_reg (GET_MODE (src), src);
2173       return gen_rtx_SUBREG (mode, src, byte);
2174     }
2175
2176   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2177     return NULL_RTX;
2178
2179   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2180       && MODES_TIEABLE_P (mode, src_mode))
2181     {
2182       rtx x = gen_lowpart_common (mode, src);
2183       if (x)
2184         return x;
2185     }
2186
2187   src_int_mode = int_mode_for_mode (src_mode);
2188   int_mode = int_mode_for_mode (mode);
2189   if (src_int_mode == BLKmode || int_mode == BLKmode)
2190     return NULL_RTX;
2191
2192   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2193     return NULL_RTX;
2194   if (!MODES_TIEABLE_P (int_mode, mode))
2195     return NULL_RTX;
2196
2197   src = gen_lowpart (src_int_mode, src);
2198   src = convert_modes (int_mode, src_int_mode, src, true);
2199   src = gen_lowpart (mode, src);
2200   return src;
2201 }
2202 \f
2203 /* Add INC into TARGET.  */
2204
2205 void
2206 expand_inc (rtx target, rtx inc)
2207 {
2208   rtx value = expand_binop (GET_MODE (target), add_optab,
2209                             target, inc,
2210                             target, 0, OPTAB_LIB_WIDEN);
2211   if (value != target)
2212     emit_move_insn (target, value);
2213 }
2214
2215 /* Subtract DEC from TARGET.  */
2216
2217 void
2218 expand_dec (rtx target, rtx dec)
2219 {
2220   rtx value = expand_binop (GET_MODE (target), sub_optab,
2221                             target, dec,
2222                             target, 0, OPTAB_LIB_WIDEN);
2223   if (value != target)
2224     emit_move_insn (target, value);
2225 }
2226 \f
2227 /* Output a shift instruction for expression code CODE,
2228    with SHIFTED being the rtx for the value to shift,
2229    and AMOUNT the rtx for the amount to shift by.
2230    Store the result in the rtx TARGET, if that is convenient.
2231    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2232    Return the rtx for where the value is.  */
2233
2234 static rtx
2235 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2236                 rtx amount, rtx target, int unsignedp)
2237 {
2238   rtx op1, temp = 0;
2239   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2240   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2241   optab lshift_optab = ashl_optab;
2242   optab rshift_arith_optab = ashr_optab;
2243   optab rshift_uns_optab = lshr_optab;
2244   optab lrotate_optab = rotl_optab;
2245   optab rrotate_optab = rotr_optab;
2246   machine_mode op1_mode;
2247   machine_mode scalar_mode = mode;
2248   int attempt;
2249   bool speed = optimize_insn_for_speed_p ();
2250
2251   if (VECTOR_MODE_P (mode))
2252     scalar_mode = GET_MODE_INNER (mode);
2253   op1 = amount;
2254   op1_mode = GET_MODE (op1);
2255
2256   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2257      shift amount is a vector, use the vector/vector shift patterns.  */
2258   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2259     {
2260       lshift_optab = vashl_optab;
2261       rshift_arith_optab = vashr_optab;
2262       rshift_uns_optab = vlshr_optab;
2263       lrotate_optab = vrotl_optab;
2264       rrotate_optab = vrotr_optab;
2265     }
2266
2267   /* Previously detected shift-counts computed by NEGATE_EXPR
2268      and shifted in the other direction; but that does not work
2269      on all machines.  */
2270
2271   if (SHIFT_COUNT_TRUNCATED)
2272     {
2273       if (CONST_INT_P (op1)
2274           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2275               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2276         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2277                        % GET_MODE_BITSIZE (scalar_mode));
2278       else if (GET_CODE (op1) == SUBREG
2279                && subreg_lowpart_p (op1)
2280                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2281                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2282         op1 = SUBREG_REG (op1);
2283     }
2284
2285   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2286      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2287      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2288      amount instead.  */
2289   if (rotate
2290       && CONST_INT_P (op1)
2291       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2292                    GET_MODE_BITSIZE (scalar_mode) - 1))
2293     {
2294       op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2295       left = !left;
2296       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2297     }
2298
2299   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2300      Note that this is not the case for bigger values.  For instance a rotation
2301      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2302      0x04030201 (bswapsi).  */
2303   if (rotate
2304       && CONST_INT_P (op1)
2305       && INTVAL (op1) == BITS_PER_UNIT
2306       && GET_MODE_SIZE (scalar_mode) == 2
2307       && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing)
2308     return expand_unop (HImode, bswap_optab, shifted, NULL_RTX,
2309                                   unsignedp);
2310
2311   if (op1 == const0_rtx)
2312     return shifted;
2313
2314   /* Check whether its cheaper to implement a left shift by a constant
2315      bit count by a sequence of additions.  */
2316   if (code == LSHIFT_EXPR
2317       && CONST_INT_P (op1)
2318       && INTVAL (op1) > 0
2319       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2320       && INTVAL (op1) < MAX_BITS_PER_WORD
2321       && (shift_cost (speed, mode, INTVAL (op1))
2322           > INTVAL (op1) * add_cost (speed, mode))
2323       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2324     {
2325       int i;
2326       for (i = 0; i < INTVAL (op1); i++)
2327         {
2328           temp = force_reg (mode, shifted);
2329           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2330                                   unsignedp, OPTAB_LIB_WIDEN);
2331         }
2332       return shifted;
2333     }
2334
2335   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2336     {
2337       enum optab_methods methods;
2338
2339       if (attempt == 0)
2340         methods = OPTAB_DIRECT;
2341       else if (attempt == 1)
2342         methods = OPTAB_WIDEN;
2343       else
2344         methods = OPTAB_LIB_WIDEN;
2345
2346       if (rotate)
2347         {
2348           /* Widening does not work for rotation.  */
2349           if (methods == OPTAB_WIDEN)
2350             continue;
2351           else if (methods == OPTAB_LIB_WIDEN)
2352             {
2353               /* If we have been unable to open-code this by a rotation,
2354                  do it as the IOR of two shifts.  I.e., to rotate A
2355                  by N bits, compute
2356                  (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2357                  where C is the bitsize of A.
2358
2359                  It is theoretically possible that the target machine might
2360                  not be able to perform either shift and hence we would
2361                  be making two libcalls rather than just the one for the
2362                  shift (similarly if IOR could not be done).  We will allow
2363                  this extremely unlikely lossage to avoid complicating the
2364                  code below.  */
2365
2366               rtx subtarget = target == shifted ? 0 : target;
2367               rtx new_amount, other_amount;
2368               rtx temp1;
2369
2370               new_amount = op1;
2371               if (op1 == const0_rtx)
2372                 return shifted;
2373               else if (CONST_INT_P (op1))
2374                 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
2375                                         - INTVAL (op1));
2376               else
2377                 {
2378                   other_amount
2379                     = simplify_gen_unary (NEG, GET_MODE (op1),
2380                                           op1, GET_MODE (op1));
2381                   HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2382                   other_amount
2383                     = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2384                                            gen_int_mode (mask, GET_MODE (op1)));
2385                 }
2386
2387               shifted = force_reg (mode, shifted);
2388
2389               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2390                                      mode, shifted, new_amount, 0, 1);
2391               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2392                                       mode, shifted, other_amount,
2393                                       subtarget, 1);
2394               return expand_binop (mode, ior_optab, temp, temp1, target,
2395                                    unsignedp, methods);
2396             }
2397
2398           temp = expand_binop (mode,
2399                                left ? lrotate_optab : rrotate_optab,
2400                                shifted, op1, target, unsignedp, methods);
2401         }
2402       else if (unsignedp)
2403         temp = expand_binop (mode,
2404                              left ? lshift_optab : rshift_uns_optab,
2405                              shifted, op1, target, unsignedp, methods);
2406
2407       /* Do arithmetic shifts.
2408          Also, if we are going to widen the operand, we can just as well
2409          use an arithmetic right-shift instead of a logical one.  */
2410       if (temp == 0 && ! rotate
2411           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2412         {
2413           enum optab_methods methods1 = methods;
2414
2415           /* If trying to widen a log shift to an arithmetic shift,
2416              don't accept an arithmetic shift of the same size.  */
2417           if (unsignedp)
2418             methods1 = OPTAB_MUST_WIDEN;
2419
2420           /* Arithmetic shift */
2421
2422           temp = expand_binop (mode,
2423                                left ? lshift_optab : rshift_arith_optab,
2424                                shifted, op1, target, unsignedp, methods1);
2425         }
2426
2427       /* We used to try extzv here for logical right shifts, but that was
2428          only useful for one machine, the VAX, and caused poor code
2429          generation there for lshrdi3, so the code was deleted and a
2430          define_expand for lshrsi3 was added to vax.md.  */
2431     }
2432
2433   gcc_assert (temp);
2434   return temp;
2435 }
2436
2437 /* Output a shift instruction for expression code CODE,
2438    with SHIFTED being the rtx for the value to shift,
2439    and AMOUNT the amount to shift by.
2440    Store the result in the rtx TARGET, if that is convenient.
2441    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2442    Return the rtx for where the value is.  */
2443
2444 rtx
2445 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2446               int amount, rtx target, int unsignedp)
2447 {
2448   return expand_shift_1 (code, mode,
2449                          shifted, GEN_INT (amount), target, unsignedp);
2450 }
2451
2452 /* Output a shift instruction for expression code CODE,
2453    with SHIFTED being the rtx for the value to shift,
2454    and AMOUNT the tree for the amount to shift by.
2455    Store the result in the rtx TARGET, if that is convenient.
2456    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2457    Return the rtx for where the value is.  */
2458
2459 rtx
2460 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2461                        tree amount, rtx target, int unsignedp)
2462 {
2463   return expand_shift_1 (code, mode,
2464                          shifted, expand_normal (amount), target, unsignedp);
2465 }
2466
2467 \f
2468 /* Indicates the type of fixup needed after a constant multiplication.
2469    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2470    the result should be negated, and ADD_VARIANT means that the
2471    multiplicand should be added to the result.  */
2472 enum mult_variant {basic_variant, negate_variant, add_variant};
2473
2474 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2475                         const struct mult_cost *, machine_mode mode);
2476 static bool choose_mult_variant (machine_mode, HOST_WIDE_INT,
2477                                  struct algorithm *, enum mult_variant *, int);
2478 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2479                               const struct algorithm *, enum mult_variant);
2480 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2481 static rtx extract_high_half (machine_mode, rtx);
2482 static rtx expmed_mult_highpart (machine_mode, rtx, rtx, rtx, int, int);
2483 static rtx expmed_mult_highpart_optab (machine_mode, rtx, rtx, rtx,
2484                                        int, int);
2485 /* Compute and return the best algorithm for multiplying by T.
2486    The algorithm must cost less than cost_limit
2487    If retval.cost >= COST_LIMIT, no algorithm was found and all
2488    other field of the returned struct are undefined.
2489    MODE is the machine mode of the multiplication.  */
2490
2491 static void
2492 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2493             const struct mult_cost *cost_limit, machine_mode mode)
2494 {
2495   int m;
2496   struct algorithm *alg_in, *best_alg;
2497   struct mult_cost best_cost;
2498   struct mult_cost new_limit;
2499   int op_cost, op_latency;
2500   unsigned HOST_WIDE_INT orig_t = t;
2501   unsigned HOST_WIDE_INT q;
2502   int maxm, hash_index;
2503   bool cache_hit = false;
2504   enum alg_code cache_alg = alg_zero;
2505   bool speed = optimize_insn_for_speed_p ();
2506   machine_mode imode;
2507   struct alg_hash_entry *entry_ptr;
2508
2509   /* Indicate that no algorithm is yet found.  If no algorithm
2510      is found, this value will be returned and indicate failure.  */
2511   alg_out->cost.cost = cost_limit->cost + 1;
2512   alg_out->cost.latency = cost_limit->latency + 1;
2513
2514   if (cost_limit->cost < 0
2515       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2516     return;
2517
2518   /* Be prepared for vector modes.  */
2519   imode = GET_MODE_INNER (mode);
2520   if (imode == VOIDmode)
2521     imode = mode;
2522
2523   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2524
2525   /* Restrict the bits of "t" to the multiplication's mode.  */
2526   t &= GET_MODE_MASK (imode);
2527
2528   /* t == 1 can be done in zero cost.  */
2529   if (t == 1)
2530     {
2531       alg_out->ops = 1;
2532       alg_out->cost.cost = 0;
2533       alg_out->cost.latency = 0;
2534       alg_out->op[0] = alg_m;
2535       return;
2536     }
2537
2538   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2539      fail now.  */
2540   if (t == 0)
2541     {
2542       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2543         return;
2544       else
2545         {
2546           alg_out->ops = 1;
2547           alg_out->cost.cost = zero_cost (speed);
2548           alg_out->cost.latency = zero_cost (speed);
2549           alg_out->op[0] = alg_zero;
2550           return;
2551         }
2552     }
2553
2554   /* We'll be needing a couple extra algorithm structures now.  */
2555
2556   alg_in = XALLOCA (struct algorithm);
2557   best_alg = XALLOCA (struct algorithm);
2558   best_cost = *cost_limit;
2559
2560   /* Compute the hash index.  */
2561   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2562
2563   /* See if we already know what to do for T.  */
2564   entry_ptr = alg_hash_entry_ptr (hash_index);
2565   if (entry_ptr->t == t
2566       && entry_ptr->mode == mode
2567       && entry_ptr->mode == mode
2568       && entry_ptr->speed == speed
2569       && entry_ptr->alg != alg_unknown)
2570     {
2571       cache_alg = entry_ptr->alg;
2572
2573       if (cache_alg == alg_impossible)
2574         {
2575           /* The cache tells us that it's impossible to synthesize
2576              multiplication by T within entry_ptr->cost.  */
2577           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2578             /* COST_LIMIT is at least as restrictive as the one
2579                recorded in the hash table, in which case we have no
2580                hope of synthesizing a multiplication.  Just
2581                return.  */
2582             return;
2583
2584           /* If we get here, COST_LIMIT is less restrictive than the
2585              one recorded in the hash table, so we may be able to
2586              synthesize a multiplication.  Proceed as if we didn't
2587              have the cache entry.  */
2588         }
2589       else
2590         {
2591           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2592             /* The cached algorithm shows that this multiplication
2593                requires more cost than COST_LIMIT.  Just return.  This
2594                way, we don't clobber this cache entry with
2595                alg_impossible but retain useful information.  */
2596             return;
2597
2598           cache_hit = true;
2599
2600           switch (cache_alg)
2601             {
2602             case alg_shift:
2603               goto do_alg_shift;
2604
2605             case alg_add_t_m2:
2606             case alg_sub_t_m2:
2607               goto do_alg_addsub_t_m2;
2608
2609             case alg_add_factor:
2610             case alg_sub_factor:
2611               goto do_alg_addsub_factor;
2612
2613             case alg_add_t2_m:
2614               goto do_alg_add_t2_m;
2615
2616             case alg_sub_t2_m:
2617               goto do_alg_sub_t2_m;
2618
2619             default:
2620               gcc_unreachable ();
2621             }
2622         }
2623     }
2624
2625   /* If we have a group of zero bits at the low-order part of T, try
2626      multiplying by the remaining bits and then doing a shift.  */
2627
2628   if ((t & 1) == 0)
2629     {
2630     do_alg_shift:
2631       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2632       if (m < maxm)
2633         {
2634           q = t >> m;
2635           /* The function expand_shift will choose between a shift and
2636              a sequence of additions, so the observed cost is given as
2637              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2638           op_cost = m * add_cost (speed, mode);
2639           if (shift_cost (speed, mode, m) < op_cost)
2640             op_cost = shift_cost (speed, mode, m);
2641           new_limit.cost = best_cost.cost - op_cost;
2642           new_limit.latency = best_cost.latency - op_cost;
2643           synth_mult (alg_in, q, &new_limit, mode);
2644
2645           alg_in->cost.cost += op_cost;
2646           alg_in->cost.latency += op_cost;
2647           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2648             {
2649               best_cost = alg_in->cost;
2650               std::swap (alg_in, best_alg);
2651               best_alg->log[best_alg->ops] = m;
2652               best_alg->op[best_alg->ops] = alg_shift;
2653             }
2654
2655           /* See if treating ORIG_T as a signed number yields a better
2656              sequence.  Try this sequence only for a negative ORIG_T
2657              as it would be useless for a non-negative ORIG_T.  */
2658           if ((HOST_WIDE_INT) orig_t < 0)
2659             {
2660               /* Shift ORIG_T as follows because a right shift of a
2661                  negative-valued signed type is implementation
2662                  defined.  */
2663               q = ~(~orig_t >> m);
2664               /* The function expand_shift will choose between a shift
2665                  and a sequence of additions, so the observed cost is
2666                  given as MIN (m * add_cost(speed, mode),
2667                  shift_cost(speed, mode, m)).  */
2668               op_cost = m * add_cost (speed, mode);
2669               if (shift_cost (speed, mode, m) < op_cost)
2670                 op_cost = shift_cost (speed, mode, m);
2671               new_limit.cost = best_cost.cost - op_cost;
2672               new_limit.latency = best_cost.latency - op_cost;
2673               synth_mult (alg_in, q, &new_limit, mode);
2674
2675               alg_in->cost.cost += op_cost;
2676               alg_in->cost.latency += op_cost;
2677               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2678                 {
2679                   best_cost = alg_in->cost;
2680                   std::swap (alg_in, best_alg);
2681                   best_alg->log[best_alg->ops] = m;
2682                   best_alg->op[best_alg->ops] = alg_shift;
2683                 }
2684             }
2685         }
2686       if (cache_hit)
2687         goto done;
2688     }
2689
2690   /* If we have an odd number, add or subtract one.  */
2691   if ((t & 1) != 0)
2692     {
2693       unsigned HOST_WIDE_INT w;
2694
2695     do_alg_addsub_t_m2:
2696       for (w = 1; (w & t) != 0; w <<= 1)
2697         ;
2698       /* If T was -1, then W will be zero after the loop.  This is another
2699          case where T ends with ...111.  Handling this with (T + 1) and
2700          subtract 1 produces slightly better code and results in algorithm
2701          selection much faster than treating it like the ...0111 case
2702          below.  */
2703       if (w == 0
2704           || (w > 2
2705               /* Reject the case where t is 3.
2706                  Thus we prefer addition in that case.  */
2707               && t != 3))
2708         {
2709           /* T ends with ...111.  Multiply by (T + 1) and subtract T.  */
2710
2711           op_cost = add_cost (speed, mode);
2712           new_limit.cost = best_cost.cost - op_cost;
2713           new_limit.latency = best_cost.latency - op_cost;
2714           synth_mult (alg_in, t + 1, &new_limit, mode);
2715
2716           alg_in->cost.cost += op_cost;
2717           alg_in->cost.latency += op_cost;
2718           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2719             {
2720               best_cost = alg_in->cost;
2721               std::swap (alg_in, best_alg);
2722               best_alg->log[best_alg->ops] = 0;
2723               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2724             }
2725         }
2726       else
2727         {
2728           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add T.  */
2729
2730           op_cost = add_cost (speed, mode);
2731           new_limit.cost = best_cost.cost - op_cost;
2732           new_limit.latency = best_cost.latency - op_cost;
2733           synth_mult (alg_in, t - 1, &new_limit, mode);
2734
2735           alg_in->cost.cost += op_cost;
2736           alg_in->cost.latency += op_cost;
2737           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2738             {
2739               best_cost = alg_in->cost;
2740               std::swap (alg_in, best_alg);
2741               best_alg->log[best_alg->ops] = 0;
2742               best_alg->op[best_alg->ops] = alg_add_t_m2;
2743             }
2744         }
2745
2746       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2747          quickly with a - a * n for some appropriate constant n.  */
2748       m = exact_log2 (-orig_t + 1);
2749       if (m >= 0 && m < maxm)
2750         {
2751           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2752           /* If the target has a cheap shift-and-subtract insn use
2753              that in preference to a shift insn followed by a sub insn.
2754              Assume that the shift-and-sub is "atomic" with a latency
2755              equal to it's cost, otherwise assume that on superscalar
2756              hardware the shift may be executed concurrently with the
2757              earlier steps in the algorithm.  */
2758           if (shiftsub1_cost (speed, mode, m) <= op_cost)
2759             {
2760               op_cost = shiftsub1_cost (speed, mode, m);
2761               op_latency = op_cost;
2762             }
2763           else
2764             op_latency = add_cost (speed, mode);
2765
2766           new_limit.cost = best_cost.cost - op_cost;
2767           new_limit.latency = best_cost.latency - op_latency;
2768           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2769                       &new_limit, mode);
2770
2771           alg_in->cost.cost += op_cost;
2772           alg_in->cost.latency += op_latency;
2773           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2774             {
2775               best_cost = alg_in->cost;
2776               std::swap (alg_in, best_alg);
2777               best_alg->log[best_alg->ops] = m;
2778               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2779             }
2780         }
2781
2782       if (cache_hit)
2783         goto done;
2784     }
2785
2786   /* Look for factors of t of the form
2787      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2788      If we find such a factor, we can multiply by t using an algorithm that
2789      multiplies by q, shift the result by m and add/subtract it to itself.
2790
2791      We search for large factors first and loop down, even if large factors
2792      are less probable than small; if we find a large factor we will find a
2793      good sequence quickly, and therefore be able to prune (by decreasing
2794      COST_LIMIT) the search.  */
2795
2796  do_alg_addsub_factor:
2797   for (m = floor_log2 (t - 1); m >= 2; m--)
2798     {
2799       unsigned HOST_WIDE_INT d;
2800
2801       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2802       if (t % d == 0 && t > d && m < maxm
2803           && (!cache_hit || cache_alg == alg_add_factor))
2804         {
2805           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2806           if (shiftadd_cost (speed, mode, m) <= op_cost)
2807             op_cost = shiftadd_cost (speed, mode, m);
2808
2809           op_latency = op_cost;
2810
2811
2812           new_limit.cost = best_cost.cost - op_cost;
2813           new_limit.latency = best_cost.latency - op_latency;
2814           synth_mult (alg_in, t / d, &new_limit, mode);
2815
2816           alg_in->cost.cost += op_cost;
2817           alg_in->cost.latency += op_latency;
2818           if (alg_in->cost.latency < op_cost)
2819             alg_in->cost.latency = op_cost;
2820           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2821             {
2822               best_cost = alg_in->cost;
2823               std::swap (alg_in, best_alg);
2824               best_alg->log[best_alg->ops] = m;
2825               best_alg->op[best_alg->ops] = alg_add_factor;
2826             }
2827           /* Other factors will have been taken care of in the recursion.  */
2828           break;
2829         }
2830
2831       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2832       if (t % d == 0 && t > d && m < maxm
2833           && (!cache_hit || cache_alg == alg_sub_factor))
2834         {
2835           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2836           if (shiftsub0_cost (speed, mode, m) <= op_cost)
2837             op_cost = shiftsub0_cost (speed, mode, m);
2838
2839           op_latency = op_cost;
2840
2841           new_limit.cost = best_cost.cost - op_cost;
2842           new_limit.latency = best_cost.latency - op_latency;
2843           synth_mult (alg_in, t / d, &new_limit, mode);
2844
2845           alg_in->cost.cost += op_cost;
2846           alg_in->cost.latency += op_latency;
2847           if (alg_in->cost.latency < op_cost)
2848             alg_in->cost.latency = op_cost;
2849           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2850             {
2851               best_cost = alg_in->cost;
2852               std::swap (alg_in, best_alg);
2853               best_alg->log[best_alg->ops] = m;
2854               best_alg->op[best_alg->ops] = alg_sub_factor;
2855             }
2856           break;
2857         }
2858     }
2859   if (cache_hit)
2860     goto done;
2861
2862   /* Try shift-and-add (load effective address) instructions,
2863      i.e. do a*3, a*5, a*9.  */
2864   if ((t & 1) != 0)
2865     {
2866     do_alg_add_t2_m:
2867       q = t - 1;
2868       q = q & -q;
2869       m = exact_log2 (q);
2870       if (m >= 0 && m < maxm)
2871         {
2872           op_cost = shiftadd_cost (speed, mode, m);
2873           new_limit.cost = best_cost.cost - op_cost;
2874           new_limit.latency = best_cost.latency - op_cost;
2875           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2876
2877           alg_in->cost.cost += op_cost;
2878           alg_in->cost.latency += op_cost;
2879           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2880             {
2881               best_cost = alg_in->cost;
2882               std::swap (alg_in, best_alg);
2883               best_alg->log[best_alg->ops] = m;
2884               best_alg->op[best_alg->ops] = alg_add_t2_m;
2885             }
2886         }
2887       if (cache_hit)
2888         goto done;
2889
2890     do_alg_sub_t2_m:
2891       q = t + 1;
2892       q = q & -q;
2893       m = exact_log2 (q);
2894       if (m >= 0 && m < maxm)
2895         {
2896           op_cost = shiftsub0_cost (speed, mode, m);
2897           new_limit.cost = best_cost.cost - op_cost;
2898           new_limit.latency = best_cost.latency - op_cost;
2899           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2900
2901           alg_in->cost.cost += op_cost;
2902           alg_in->cost.latency += op_cost;
2903           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2904             {
2905               best_cost = alg_in->cost;
2906               std::swap (alg_in, best_alg);
2907               best_alg->log[best_alg->ops] = m;
2908               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2909             }
2910         }
2911       if (cache_hit)
2912         goto done;
2913     }
2914
2915  done:
2916   /* If best_cost has not decreased, we have not found any algorithm.  */
2917   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2918     {
2919       /* We failed to find an algorithm.  Record alg_impossible for
2920          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2921          we are asked to find an algorithm for T within the same or
2922          lower COST_LIMIT, we can immediately return to the
2923          caller.  */
2924       entry_ptr->t = t;
2925       entry_ptr->mode = mode;
2926       entry_ptr->speed = speed;
2927       entry_ptr->alg = alg_impossible;
2928       entry_ptr->cost = *cost_limit;
2929       return;
2930     }
2931
2932   /* Cache the result.  */
2933   if (!cache_hit)
2934     {
2935       entry_ptr->t = t;
2936       entry_ptr->mode = mode;
2937       entry_ptr->speed = speed;
2938       entry_ptr->alg = best_alg->op[best_alg->ops];
2939       entry_ptr->cost.cost = best_cost.cost;
2940       entry_ptr->cost.latency = best_cost.latency;
2941     }
2942
2943   /* If we are getting a too long sequence for `struct algorithm'
2944      to record, make this search fail.  */
2945   if (best_alg->ops == MAX_BITS_PER_WORD)
2946     return;
2947
2948   /* Copy the algorithm from temporary space to the space at alg_out.
2949      We avoid using structure assignment because the majority of
2950      best_alg is normally undefined, and this is a critical function.  */
2951   alg_out->ops = best_alg->ops + 1;
2952   alg_out->cost = best_cost;
2953   memcpy (alg_out->op, best_alg->op,
2954           alg_out->ops * sizeof *alg_out->op);
2955   memcpy (alg_out->log, best_alg->log,
2956           alg_out->ops * sizeof *alg_out->log);
2957 }
2958 \f
2959 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2960    Try three variations:
2961
2962        - a shift/add sequence based on VAL itself
2963        - a shift/add sequence based on -VAL, followed by a negation
2964        - a shift/add sequence based on VAL - 1, followed by an addition.
2965
2966    Return true if the cheapest of these cost less than MULT_COST,
2967    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2968
2969 static bool
2970 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
2971                      struct algorithm *alg, enum mult_variant *variant,
2972                      int mult_cost)
2973 {
2974   struct algorithm alg2;
2975   struct mult_cost limit;
2976   int op_cost;
2977   bool speed = optimize_insn_for_speed_p ();
2978
2979   /* Fail quickly for impossible bounds.  */
2980   if (mult_cost < 0)
2981     return false;
2982
2983   /* Ensure that mult_cost provides a reasonable upper bound.
2984      Any constant multiplication can be performed with less
2985      than 2 * bits additions.  */
2986   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
2987   if (mult_cost > op_cost)
2988     mult_cost = op_cost;
2989
2990   *variant = basic_variant;
2991   limit.cost = mult_cost;
2992   limit.latency = mult_cost;
2993   synth_mult (alg, val, &limit, mode);
2994
2995   /* This works only if the inverted value actually fits in an
2996      `unsigned int' */
2997   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
2998     {
2999       op_cost = neg_cost (speed, mode);
3000       if (MULT_COST_LESS (&alg->cost, mult_cost))
3001         {
3002           limit.cost = alg->cost.cost - op_cost;
3003           limit.latency = alg->cost.latency - op_cost;
3004         }
3005       else
3006         {
3007           limit.cost = mult_cost - op_cost;
3008           limit.latency = mult_cost - op_cost;
3009         }
3010
3011       synth_mult (&alg2, -val, &limit, mode);
3012       alg2.cost.cost += op_cost;
3013       alg2.cost.latency += op_cost;
3014       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3015         *alg = alg2, *variant = negate_variant;
3016     }
3017
3018   /* This proves very useful for division-by-constant.  */
3019   op_cost = add_cost (speed, mode);
3020   if (MULT_COST_LESS (&alg->cost, mult_cost))
3021     {
3022       limit.cost = alg->cost.cost - op_cost;
3023       limit.latency = alg->cost.latency - op_cost;
3024     }
3025   else
3026     {
3027       limit.cost = mult_cost - op_cost;
3028       limit.latency = mult_cost - op_cost;
3029     }
3030
3031   synth_mult (&alg2, val - 1, &limit, mode);
3032   alg2.cost.cost += op_cost;
3033   alg2.cost.latency += op_cost;
3034   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3035     *alg = alg2, *variant = add_variant;
3036
3037   return MULT_COST_LESS (&alg->cost, mult_cost);
3038 }
3039
3040 /* A subroutine of expand_mult, used for constant multiplications.
3041    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3042    convenient.  Use the shift/add sequence described by ALG and apply
3043    the final fixup specified by VARIANT.  */
3044
3045 static rtx
3046 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3047                    rtx target, const struct algorithm *alg,
3048                    enum mult_variant variant)
3049 {
3050   HOST_WIDE_INT val_so_far;
3051   rtx_insn *insn;
3052   rtx accum, tem;
3053   int opno;
3054   machine_mode nmode;
3055
3056   /* Avoid referencing memory over and over and invalid sharing
3057      on SUBREGs.  */
3058   op0 = force_reg (mode, op0);
3059
3060   /* ACCUM starts out either as OP0 or as a zero, depending on
3061      the first operation.  */
3062
3063   if (alg->op[0] == alg_zero)
3064     {
3065       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3066       val_so_far = 0;
3067     }
3068   else if (alg->op[0] == alg_m)
3069     {
3070       accum = copy_to_mode_reg (mode, op0);
3071       val_so_far = 1;
3072     }
3073   else
3074     gcc_unreachable ();
3075
3076   for (opno = 1; opno < alg->ops; opno++)
3077     {
3078       int log = alg->log[opno];
3079       rtx shift_subtarget = optimize ? 0 : accum;
3080       rtx add_target
3081         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3082            && !optimize)
3083           ? target : 0;
3084       rtx accum_target = optimize ? 0 : accum;
3085       rtx accum_inner;
3086
3087       switch (alg->op[opno])
3088         {
3089         case alg_shift:
3090           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3091           /* REG_EQUAL note will be attached to the following insn.  */
3092           emit_move_insn (accum, tem);
3093           val_so_far <<= log;
3094           break;
3095
3096         case alg_add_t_m2:
3097           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3098           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3099                                  add_target ? add_target : accum_target);
3100           val_so_far += (HOST_WIDE_INT) 1 << log;
3101           break;
3102
3103         case alg_sub_t_m2:
3104           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3105           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3106                                  add_target ? add_target : accum_target);
3107           val_so_far -= (HOST_WIDE_INT) 1 << log;
3108           break;
3109
3110         case alg_add_t2_m:
3111           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3112                                 log, shift_subtarget, 0);
3113           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3114                                  add_target ? add_target : accum_target);
3115           val_so_far = (val_so_far << log) + 1;
3116           break;
3117
3118         case alg_sub_t2_m:
3119           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3120                                 log, shift_subtarget, 0);
3121           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3122                                  add_target ? add_target : accum_target);
3123           val_so_far = (val_so_far << log) - 1;
3124           break;
3125
3126         case alg_add_factor:
3127           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3128           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3129                                  add_target ? add_target : accum_target);
3130           val_so_far += val_so_far << log;
3131           break;
3132
3133         case alg_sub_factor:
3134           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3135           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3136                                  (add_target
3137                                   ? add_target : (optimize ? 0 : tem)));
3138           val_so_far = (val_so_far << log) - val_so_far;
3139           break;
3140
3141         default:
3142           gcc_unreachable ();
3143         }
3144
3145       if (SCALAR_INT_MODE_P (mode))
3146         {
3147           /* Write a REG_EQUAL note on the last insn so that we can cse
3148              multiplication sequences.  Note that if ACCUM is a SUBREG,
3149              we've set the inner register and must properly indicate that.  */
3150           tem = op0, nmode = mode;
3151           accum_inner = accum;
3152           if (GET_CODE (accum) == SUBREG)
3153             {
3154               accum_inner = SUBREG_REG (accum);
3155               nmode = GET_MODE (accum_inner);
3156               tem = gen_lowpart (nmode, op0);
3157             }
3158
3159           insn = get_last_insn ();
3160           set_dst_reg_note (insn, REG_EQUAL,
3161                             gen_rtx_MULT (nmode, tem,
3162                                           gen_int_mode (val_so_far, nmode)),
3163                             accum_inner);
3164         }
3165     }
3166
3167   if (variant == negate_variant)
3168     {
3169       val_so_far = -val_so_far;
3170       accum = expand_unop (mode, neg_optab, accum, target, 0);
3171     }
3172   else if (variant == add_variant)
3173     {
3174       val_so_far = val_so_far + 1;
3175       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3176     }
3177
3178   /* Compare only the bits of val and val_so_far that are significant
3179      in the result mode, to avoid sign-/zero-extension confusion.  */
3180   nmode = GET_MODE_INNER (mode);
3181   if (nmode == VOIDmode)
3182     nmode = mode;
3183   val &= GET_MODE_MASK (nmode);
3184   val_so_far &= GET_MODE_MASK (nmode);
3185   gcc_assert (val == val_so_far);
3186
3187   return accum;
3188 }
3189
3190 /* Perform a multiplication and return an rtx for the result.
3191    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3192    TARGET is a suggestion for where to store the result (an rtx).
3193
3194    We check specially for a constant integer as OP1.
3195    If you want this check for OP0 as well, then before calling
3196    you should swap the two operands if OP0 would be constant.  */
3197
3198 rtx
3199 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3200              int unsignedp)
3201 {
3202   enum mult_variant variant;
3203   struct algorithm algorithm;
3204   rtx scalar_op1;
3205   int max_cost;
3206   bool speed = optimize_insn_for_speed_p ();
3207   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3208
3209   if (CONSTANT_P (op0))
3210     std::swap (op0, op1);
3211
3212   /* For vectors, there are several simplifications that can be made if
3213      all elements of the vector constant are identical.  */
3214   scalar_op1 = op1;
3215   if (GET_CODE (op1) == CONST_VECTOR)
3216     {
3217       int i, n = CONST_VECTOR_NUNITS (op1);
3218       scalar_op1 = CONST_VECTOR_ELT (op1, 0);
3219       for (i = 1; i < n; ++i)
3220         if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i)))
3221           goto skip_scalar;
3222     }
3223
3224   if (INTEGRAL_MODE_P (mode))
3225     {
3226       rtx fake_reg;
3227       HOST_WIDE_INT coeff;
3228       bool is_neg;
3229       int mode_bitsize;
3230
3231       if (op1 == CONST0_RTX (mode))
3232         return op1;
3233       if (op1 == CONST1_RTX (mode))
3234         return op0;
3235       if (op1 == CONSTM1_RTX (mode))
3236         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3237                             op0, target, 0);
3238
3239       if (do_trapv)
3240         goto skip_synth;
3241
3242       /* If mode is integer vector mode, check if the backend supports
3243          vector lshift (by scalar or vector) at all.  If not, we can't use
3244          synthetized multiply.  */
3245       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3246           && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3247           && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3248         goto skip_synth;
3249
3250       /* These are the operations that are potentially turned into
3251          a sequence of shifts and additions.  */
3252       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3253
3254       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3255          less than or equal in size to `unsigned int' this doesn't matter.
3256          If the mode is larger than `unsigned int', then synth_mult works
3257          only if the constant value exactly fits in an `unsigned int' without
3258          any truncation.  This means that multiplying by negative values does
3259          not work; results are off by 2^32 on a 32 bit machine.  */
3260       if (CONST_INT_P (scalar_op1))
3261         {
3262           coeff = INTVAL (scalar_op1);
3263           is_neg = coeff < 0;
3264         }
3265 #if TARGET_SUPPORTS_WIDE_INT
3266       else if (CONST_WIDE_INT_P (scalar_op1))
3267 #else
3268       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3269 #endif
3270         {
3271           int shift = wi::exact_log2 (std::make_pair (scalar_op1, mode));
3272           /* Perfect power of 2 (other than 1, which is handled above).  */
3273           if (shift > 0)
3274             return expand_shift (LSHIFT_EXPR, mode, op0,
3275                                  shift, target, unsignedp);
3276           else
3277             goto skip_synth;
3278         }
3279       else
3280         goto skip_synth;
3281
3282       /* We used to test optimize here, on the grounds that it's better to
3283          produce a smaller program when -O is not used.  But this causes
3284          such a terrible slowdown sometimes that it seems better to always
3285          use synth_mult.  */
3286
3287       /* Special case powers of two.  */
3288       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3289           && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3290         return expand_shift (LSHIFT_EXPR, mode, op0,
3291                              floor_log2 (coeff), target, unsignedp);
3292
3293       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3294
3295       /* Attempt to handle multiplication of DImode values by negative
3296          coefficients, by performing the multiplication by a positive
3297          multiplier and then inverting the result.  */
3298       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3299         {
3300           /* Its safe to use -coeff even for INT_MIN, as the
3301              result is interpreted as an unsigned coefficient.
3302              Exclude cost of op0 from max_cost to match the cost
3303              calculation of the synth_mult.  */
3304           coeff = -(unsigned HOST_WIDE_INT) coeff;
3305           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
3306                       - neg_cost (speed, mode));
3307           if (max_cost <= 0)
3308             goto skip_synth;
3309
3310           /* Special case powers of two.  */
3311           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3312             {
3313               rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3314                                        floor_log2 (coeff), target, unsignedp);
3315               return expand_unop (mode, neg_optab, temp, target, 0);
3316             }
3317
3318           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3319                                    max_cost))
3320             {
3321               rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3322                                             &algorithm, variant);
3323               return expand_unop (mode, neg_optab, temp, target, 0);
3324             }
3325           goto skip_synth;
3326         }
3327
3328       /* Exclude cost of op0 from max_cost to match the cost
3329          calculation of the synth_mult.  */
3330       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3331       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3332         return expand_mult_const (mode, op0, coeff, target,
3333                                   &algorithm, variant);
3334     }
3335  skip_synth:
3336
3337   /* Expand x*2.0 as x+x.  */
3338   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1))
3339     {
3340       REAL_VALUE_TYPE d;
3341       REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1);
3342
3343       if (REAL_VALUES_EQUAL (d, dconst2))
3344         {
3345           op0 = force_reg (GET_MODE (op0), op0);
3346           return expand_binop (mode, add_optab, op0, op0,
3347                                target, unsignedp, OPTAB_LIB_WIDEN);
3348         }
3349     }
3350  skip_scalar:
3351
3352   /* This used to use umul_optab if unsigned, but for non-widening multiply
3353      there is no difference between signed and unsigned.  */
3354   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3355                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3356   gcc_assert (op0);
3357   return op0;
3358 }
3359
3360 /* Return a cost estimate for multiplying a register by the given
3361    COEFFicient in the given MODE and SPEED.  */
3362
3363 int
3364 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3365 {
3366   int max_cost;
3367   struct algorithm algorithm;
3368   enum mult_variant variant;
3369
3370   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3371   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), speed);
3372   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3373     return algorithm.cost.cost;
3374   else
3375     return max_cost;
3376 }
3377
3378 /* Perform a widening multiplication and return an rtx for the result.
3379    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3380    TARGET is a suggestion for where to store the result (an rtx).
3381    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3382    or smul_widen_optab.
3383
3384    We check specially for a constant integer as OP1, comparing the
3385    cost of a widening multiply against the cost of a sequence of shifts
3386    and adds.  */
3387
3388 rtx
3389 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3390                       int unsignedp, optab this_optab)
3391 {
3392   bool speed = optimize_insn_for_speed_p ();
3393   rtx cop1;
3394
3395   if (CONST_INT_P (op1)
3396       && GET_MODE (op0) != VOIDmode
3397       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3398                                 this_optab == umul_widen_optab))
3399       && CONST_INT_P (cop1)
3400       && (INTVAL (cop1) >= 0
3401           || HWI_COMPUTABLE_MODE_P (mode)))
3402     {
3403       HOST_WIDE_INT coeff = INTVAL (cop1);
3404       int max_cost;
3405       enum mult_variant variant;
3406       struct algorithm algorithm;
3407
3408       if (coeff == 0)
3409         return CONST0_RTX (mode);
3410
3411       /* Special case powers of two.  */
3412       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3413         {
3414           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3415           return expand_shift (LSHIFT_EXPR, mode, op0,
3416                                floor_log2 (coeff), target, unsignedp);
3417         }
3418
3419       /* Exclude cost of op0 from max_cost to match the cost
3420          calculation of the synth_mult.  */
3421       max_cost = mul_widen_cost (speed, mode);
3422       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3423                                max_cost))
3424         {
3425           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3426           return expand_mult_const (mode, op0, coeff, target,
3427                                     &algorithm, variant);
3428         }
3429     }
3430   return expand_binop (mode, this_optab, op0, op1, target,
3431                        unsignedp, OPTAB_LIB_WIDEN);
3432 }
3433 \f
3434 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3435    replace division by D, and put the least significant N bits of the result
3436    in *MULTIPLIER_PTR and return the most significant bit.
3437
3438    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3439    needed precision is in PRECISION (should be <= N).
3440
3441    PRECISION should be as small as possible so this function can choose
3442    multiplier more freely.
3443
3444    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3445    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3446
3447    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3448    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3449
3450 unsigned HOST_WIDE_INT
3451 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3452                    unsigned HOST_WIDE_INT *multiplier_ptr,
3453                    int *post_shift_ptr, int *lgup_ptr)
3454 {
3455   int lgup, post_shift;
3456   int pow, pow2;
3457
3458   /* lgup = ceil(log2(divisor)); */
3459   lgup = ceil_log2 (d);
3460
3461   gcc_assert (lgup <= n);
3462
3463   pow = n + lgup;
3464   pow2 = n + lgup - precision;
3465
3466   /* mlow = 2^(N + lgup)/d */
3467   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3468   wide_int mlow = wi::udiv_trunc (val, d);
3469
3470   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3471   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3472   wide_int mhigh = wi::udiv_trunc (val, d);
3473
3474   /* If precision == N, then mlow, mhigh exceed 2^N
3475      (but they do not exceed 2^(N+1)).  */
3476
3477   /* Reduce to lowest terms.  */
3478   for (post_shift = lgup; post_shift > 0; post_shift--)
3479     {
3480       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3481                                                        HOST_BITS_PER_WIDE_INT);
3482       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3483                                                        HOST_BITS_PER_WIDE_INT);
3484       if (ml_lo >= mh_lo)
3485         break;
3486
3487       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3488       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3489     }
3490
3491   *post_shift_ptr = post_shift;
3492   *lgup_ptr = lgup;
3493   if (n < HOST_BITS_PER_WIDE_INT)
3494     {
3495       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3496       *multiplier_ptr = mhigh.to_uhwi () & mask;
3497       return mhigh.to_uhwi () >= mask;
3498     }
3499   else
3500     {
3501       *multiplier_ptr = mhigh.to_uhwi ();
3502       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3503     }
3504 }
3505
3506 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3507    congruent to 1 (mod 2**N).  */
3508
3509 static unsigned HOST_WIDE_INT
3510 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3511 {
3512   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3513
3514   /* The algorithm notes that the choice y = x satisfies
3515      x*y == 1 mod 2^3, since x is assumed odd.
3516      Each iteration doubles the number of bits of significance in y.  */
3517
3518   unsigned HOST_WIDE_INT mask;
3519   unsigned HOST_WIDE_INT y = x;
3520   int nbit = 3;
3521
3522   mask = (n == HOST_BITS_PER_WIDE_INT
3523           ? ~(unsigned HOST_WIDE_INT) 0
3524           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3525
3526   while (nbit < n)
3527     {
3528       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3529       nbit *= 2;
3530     }
3531   return y;
3532 }
3533
3534 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3535    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3536    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3537    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3538    become signed.
3539
3540    The result is put in TARGET if that is convenient.
3541
3542    MODE is the mode of operation.  */
3543
3544 rtx
3545 expand_mult_highpart_adjust (machine_mode mode, rtx adj_operand, rtx op0,
3546                              rtx op1, rtx target, int unsignedp)
3547 {
3548   rtx tem;
3549   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3550
3551   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3552                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3553   tem = expand_and (mode, tem, op1, NULL_RTX);
3554   adj_operand
3555     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3556                      adj_operand);
3557
3558   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3559                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3560   tem = expand_and (mode, tem, op0, NULL_RTX);
3561   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3562                           target);
3563
3564   return target;
3565 }
3566
3567 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3568
3569 static rtx
3570 extract_high_half (machine_mode mode, rtx op)
3571 {
3572   machine_mode wider_mode;
3573
3574   if (mode == word_mode)
3575     return gen_highpart (mode, op);
3576
3577   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3578
3579   wider_mode = GET_MODE_WIDER_MODE (mode);
3580   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3581                      GET_MODE_BITSIZE (mode), 0, 1);
3582   return convert_modes (mode, wider_mode, op, 0);
3583 }
3584
3585 /* Like expmed_mult_highpart, but only consider using a multiplication
3586    optab.  OP1 is an rtx for the constant operand.  */
3587
3588 static rtx
3589 expmed_mult_highpart_optab (machine_mode mode, rtx op0, rtx op1,
3590                             rtx target, int unsignedp, int max_cost)
3591 {
3592   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3593   machine_mode wider_mode;
3594   optab moptab;
3595   rtx tem;
3596   int size;
3597   bool speed = optimize_insn_for_speed_p ();
3598
3599   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3600
3601   wider_mode = GET_MODE_WIDER_MODE (mode);
3602   size = GET_MODE_BITSIZE (mode);
3603
3604   /* Firstly, try using a multiplication insn that only generates the needed
3605      high part of the product, and in the sign flavor of unsignedp.  */
3606   if (mul_highpart_cost (speed, mode) < max_cost)
3607     {
3608       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3609       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3610                           unsignedp, OPTAB_DIRECT);
3611       if (tem)
3612         return tem;
3613     }
3614
3615   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3616      Need to adjust the result after the multiplication.  */
3617   if (size - 1 < BITS_PER_WORD
3618       && (mul_highpart_cost (speed, mode)
3619           + 2 * shift_cost (speed, mode, size-1)
3620           + 4 * add_cost (speed, mode) < max_cost))
3621     {
3622       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3623       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3624                           unsignedp, OPTAB_DIRECT);
3625       if (tem)
3626         /* We used the wrong signedness.  Adjust the result.  */
3627         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3628                                             tem, unsignedp);
3629     }
3630
3631   /* Try widening multiplication.  */
3632   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3633   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3634       && mul_widen_cost (speed, wider_mode) < max_cost)
3635     {
3636       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3637                           unsignedp, OPTAB_WIDEN);
3638       if (tem)
3639         return extract_high_half (mode, tem);
3640     }
3641
3642   /* Try widening the mode and perform a non-widening multiplication.  */
3643   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3644       && size - 1 < BITS_PER_WORD
3645       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3646           < max_cost))
3647     {
3648       rtx_insn *insns;
3649       rtx wop0, wop1;
3650
3651       /* We need to widen the operands, for example to ensure the
3652          constant multiplier is correctly sign or zero extended.
3653          Use a sequence to clean-up any instructions emitted by
3654          the conversions if things don't work out.  */
3655       start_sequence ();
3656       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3657       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3658       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3659                           unsignedp, OPTAB_WIDEN);
3660       insns = get_insns ();
3661       end_sequence ();
3662
3663       if (tem)
3664         {
3665           emit_insn (insns);
3666           return extract_high_half (mode, tem);
3667         }
3668     }
3669
3670   /* Try widening multiplication of opposite signedness, and adjust.  */
3671   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3672   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3673       && size - 1 < BITS_PER_WORD
3674       && (mul_widen_cost (speed, wider_mode)
3675           + 2 * shift_cost (speed, mode, size-1)
3676           + 4 * add_cost (speed, mode) < max_cost))
3677     {
3678       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3679                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3680       if (tem != 0)
3681         {
3682           tem = extract_high_half (mode, tem);
3683           /* We used the wrong signedness.  Adjust the result.  */
3684           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3685                                               target, unsignedp);
3686         }
3687     }
3688
3689   return 0;
3690 }
3691
3692 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3693    putting the high half of the result in TARGET if that is convenient,
3694    and return where the result is.  If the operation can not be performed,
3695    0 is returned.
3696
3697    MODE is the mode of operation and result.
3698
3699    UNSIGNEDP nonzero means unsigned multiply.
3700
3701    MAX_COST is the total allowed cost for the expanded RTL.  */
3702
3703 static rtx
3704 expmed_mult_highpart (machine_mode mode, rtx op0, rtx op1,
3705                       rtx target, int unsignedp, int max_cost)
3706 {
3707   machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3708   unsigned HOST_WIDE_INT cnst1;
3709   int extra_cost;
3710   bool sign_adjust = false;
3711   enum mult_variant variant;
3712   struct algorithm alg;
3713   rtx tem;
3714   bool speed = optimize_insn_for_speed_p ();
3715
3716   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3717   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3718   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3719
3720   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3721
3722   /* We can't optimize modes wider than BITS_PER_WORD.
3723      ??? We might be able to perform double-word arithmetic if
3724      mode == word_mode, however all the cost calculations in
3725      synth_mult etc. assume single-word operations.  */
3726   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3727     return expmed_mult_highpart_optab (mode, op0, op1, target,
3728                                        unsignedp, max_cost);
3729
3730   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3731
3732   /* Check whether we try to multiply by a negative constant.  */
3733   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3734     {
3735       sign_adjust = true;
3736       extra_cost += add_cost (speed, mode);
3737     }
3738
3739   /* See whether shift/add multiplication is cheap enough.  */
3740   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3741                            max_cost - extra_cost))
3742     {
3743       /* See whether the specialized multiplication optabs are
3744          cheaper than the shift/add version.  */
3745       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3746                                         alg.cost.cost + extra_cost);
3747       if (tem)
3748         return tem;
3749
3750       tem = convert_to_mode (wider_mode, op0, unsignedp);
3751       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3752       tem = extract_high_half (mode, tem);
3753
3754       /* Adjust result for signedness.  */
3755       if (sign_adjust)
3756         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3757
3758       return tem;
3759     }
3760   return expmed_mult_highpart_optab (mode, op0, op1, target,
3761                                      unsignedp, max_cost);
3762 }
3763
3764
3765 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3766
3767 static rtx
3768 expand_smod_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3769 {
3770   rtx result, temp, shift;
3771   rtx_code_label *label;
3772   int logd;
3773   int prec = GET_MODE_PRECISION (mode);
3774
3775   logd = floor_log2 (d);
3776   result = gen_reg_rtx (mode);
3777
3778   /* Avoid conditional branches when they're expensive.  */
3779   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3780       && optimize_insn_for_speed_p ())
3781     {
3782       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3783                                       mode, 0, -1);
3784       if (signmask)
3785         {
3786           HOST_WIDE_INT masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3787           signmask = force_reg (mode, signmask);
3788           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3789
3790           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3791              which instruction sequence to use.  If logical right shifts
3792              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3793              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3794
3795           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3796           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3797               || (set_src_cost (temp, optimize_insn_for_speed_p ())
3798                   > COSTS_N_INSNS (2)))
3799             {
3800               temp = expand_binop (mode, xor_optab, op0, signmask,
3801                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3802               temp = expand_binop (mode, sub_optab, temp, signmask,
3803                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3804               temp = expand_binop (mode, and_optab, temp,
3805                                    gen_int_mode (masklow, mode),
3806                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3807               temp = expand_binop (mode, xor_optab, temp, signmask,
3808                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3809               temp = expand_binop (mode, sub_optab, temp, signmask,
3810                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3811             }
3812           else
3813             {
3814               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3815                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3816               signmask = force_reg (mode, signmask);
3817
3818               temp = expand_binop (mode, add_optab, op0, signmask,
3819                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3820               temp = expand_binop (mode, and_optab, temp,
3821                                    gen_int_mode (masklow, mode),
3822                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3823               temp = expand_binop (mode, sub_optab, temp, signmask,
3824                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3825             }
3826           return temp;
3827         }
3828     }
3829
3830   /* Mask contains the mode's signbit and the significant bits of the
3831      modulus.  By including the signbit in the operation, many targets
3832      can avoid an explicit compare operation in the following comparison
3833      against zero.  */
3834   wide_int mask = wi::mask (logd, false, prec);
3835   mask = wi::set_bit (mask, prec - 1);
3836
3837   temp = expand_binop (mode, and_optab, op0,
3838                        immed_wide_int_const (mask, mode),
3839                        result, 1, OPTAB_LIB_WIDEN);
3840   if (temp != result)
3841     emit_move_insn (result, temp);
3842
3843   label = gen_label_rtx ();
3844   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3845
3846   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3847                        0, OPTAB_LIB_WIDEN);
3848
3849   mask = wi::mask (logd, true, prec);
3850   temp = expand_binop (mode, ior_optab, temp,
3851                        immed_wide_int_const (mask, mode),
3852                        result, 1, OPTAB_LIB_WIDEN);
3853   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3854                        0, OPTAB_LIB_WIDEN);
3855   if (temp != result)
3856     emit_move_insn (result, temp);
3857   emit_label (label);
3858   return result;
3859 }
3860
3861 /* Expand signed division of OP0 by a power of two D in mode MODE.
3862    This routine is only called for positive values of D.  */
3863
3864 static rtx
3865 expand_sdiv_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3866 {
3867   rtx temp;
3868   rtx_code_label *label;
3869   int logd;
3870
3871   logd = floor_log2 (d);
3872
3873   if (d == 2
3874       && BRANCH_COST (optimize_insn_for_speed_p (),
3875                       false) >= 1)
3876     {
3877       temp = gen_reg_rtx (mode);
3878       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3879       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3880                            0, OPTAB_LIB_WIDEN);
3881       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3882     }
3883
3884 #ifdef HAVE_conditional_move
3885   if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3886       >= 2)
3887     {
3888       rtx temp2;
3889
3890       start_sequence ();
3891       temp2 = copy_to_mode_reg (mode, op0);
3892       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3893                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3894       temp = force_reg (mode, temp);
3895
3896       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3897       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3898                                      mode, temp, temp2, mode, 0);
3899       if (temp2)
3900         {
3901           rtx_insn *seq = get_insns ();
3902           end_sequence ();
3903           emit_insn (seq);
3904           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3905         }
3906       end_sequence ();
3907     }
3908 #endif
3909
3910   if (BRANCH_COST (optimize_insn_for_speed_p (),
3911                    false) >= 2)
3912     {
3913       int ushift = GET_MODE_BITSIZE (mode) - logd;
3914
3915       temp = gen_reg_rtx (mode);
3916       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3917       if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
3918           || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3919              > COSTS_N_INSNS (1))
3920         temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3921                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3922       else
3923         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3924                              ushift, NULL_RTX, 1);
3925       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3926                            0, OPTAB_LIB_WIDEN);
3927       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3928     }
3929
3930   label = gen_label_rtx ();
3931   temp = copy_to_mode_reg (mode, op0);
3932   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3933   expand_inc (temp, gen_int_mode (d - 1, mode));
3934   emit_label (label);
3935   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3936 }
3937 \f
3938 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3939    if that is convenient, and returning where the result is.
3940    You may request either the quotient or the remainder as the result;
3941    specify REM_FLAG nonzero to get the remainder.
3942
3943    CODE is the expression code for which kind of division this is;
3944    it controls how rounding is done.  MODE is the machine mode to use.
3945    UNSIGNEDP nonzero means do unsigned division.  */
3946
3947 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3948    and then correct it by or'ing in missing high bits
3949    if result of ANDI is nonzero.
3950    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3951    This could optimize to a bfexts instruction.
3952    But C doesn't use these operations, so their optimizations are
3953    left for later.  */
3954 /* ??? For modulo, we don't actually need the highpart of the first product,
3955    the low part will do nicely.  And for small divisors, the second multiply
3956    can also be a low-part only multiply or even be completely left out.
3957    E.g. to calculate the remainder of a division by 3 with a 32 bit
3958    multiply, multiply with 0x55555556 and extract the upper two bits;
3959    the result is exact for inputs up to 0x1fffffff.
3960    The input range can be reduced by using cross-sum rules.
3961    For odd divisors >= 3, the following table gives right shift counts
3962    so that if a number is shifted by an integer multiple of the given
3963    amount, the remainder stays the same:
3964    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3965    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3966    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3967    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3968    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3969
3970    Cross-sum rules for even numbers can be derived by leaving as many bits
3971    to the right alone as the divisor has zeros to the right.
3972    E.g. if x is an unsigned 32 bit number:
3973    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3974    */
3975
3976 rtx
3977 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
3978                rtx op0, rtx op1, rtx target, int unsignedp)
3979 {
3980   machine_mode compute_mode;
3981   rtx tquotient;
3982   rtx quotient = 0, remainder = 0;
3983   rtx_insn *last;
3984   int size;
3985   rtx_insn *insn;
3986   optab optab1, optab2;
3987   int op1_is_constant, op1_is_pow2 = 0;
3988   int max_cost, extra_cost;
3989   static HOST_WIDE_INT last_div_const = 0;
3990   bool speed = optimize_insn_for_speed_p ();
3991
3992   op1_is_constant = CONST_INT_P (op1);
3993   if (op1_is_constant)
3994     {
3995       unsigned HOST_WIDE_INT ext_op1 = UINTVAL (op1);
3996       if (unsignedp)
3997         ext_op1 &= GET_MODE_MASK (mode);
3998       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3999                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
4000     }
4001
4002   /*
4003      This is the structure of expand_divmod:
4004
4005      First comes code to fix up the operands so we can perform the operations
4006      correctly and efficiently.
4007
4008      Second comes a switch statement with code specific for each rounding mode.
4009      For some special operands this code emits all RTL for the desired
4010      operation, for other cases, it generates only a quotient and stores it in
4011      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
4012      to indicate that it has not done anything.
4013
4014      Last comes code that finishes the operation.  If QUOTIENT is set and
4015      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
4016      QUOTIENT is not set, it is computed using trunc rounding.
4017
4018      We try to generate special code for division and remainder when OP1 is a
4019      constant.  If |OP1| = 2**n we can use shifts and some other fast
4020      operations.  For other values of OP1, we compute a carefully selected
4021      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4022      by m.
4023
4024      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4025      half of the product.  Different strategies for generating the product are
4026      implemented in expmed_mult_highpart.
4027
4028      If what we actually want is the remainder, we generate that by another
4029      by-constant multiplication and a subtraction.  */
4030
4031   /* We shouldn't be called with OP1 == const1_rtx, but some of the
4032      code below will malfunction if we are, so check here and handle
4033      the special case if so.  */
4034   if (op1 == const1_rtx)
4035     return rem_flag ? const0_rtx : op0;
4036
4037     /* When dividing by -1, we could get an overflow.
4038      negv_optab can handle overflows.  */
4039   if (! unsignedp && op1 == constm1_rtx)
4040     {
4041       if (rem_flag)
4042         return const0_rtx;
4043       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4044                           ? negv_optab : neg_optab, op0, target, 0);
4045     }
4046
4047   if (target
4048       /* Don't use the function value register as a target
4049          since we have to read it as well as write it,
4050          and function-inlining gets confused by this.  */
4051       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4052           /* Don't clobber an operand while doing a multi-step calculation.  */
4053           || ((rem_flag || op1_is_constant)
4054               && (reg_mentioned_p (target, op0)
4055                   || (MEM_P (op0) && MEM_P (target))))
4056           || reg_mentioned_p (target, op1)
4057           || (MEM_P (op1) && MEM_P (target))))
4058     target = 0;
4059
4060   /* Get the mode in which to perform this computation.  Normally it will
4061      be MODE, but sometimes we can't do the desired operation in MODE.
4062      If so, pick a wider mode in which we can do the operation.  Convert
4063      to that mode at the start to avoid repeated conversions.
4064
4065      First see what operations we need.  These depend on the expression
4066      we are evaluating.  (We assume that divxx3 insns exist under the
4067      same conditions that modxx3 insns and that these insns don't normally
4068      fail.  If these assumptions are not correct, we may generate less
4069      efficient code in some cases.)
4070
4071      Then see if we find a mode in which we can open-code that operation
4072      (either a division, modulus, or shift).  Finally, check for the smallest
4073      mode for which we can do the operation with a library call.  */
4074
4075   /* We might want to refine this now that we have division-by-constant
4076      optimization.  Since expmed_mult_highpart tries so many variants, it is
4077      not straightforward to generalize this.  Maybe we should make an array
4078      of possible modes in init_expmed?  Save this for GCC 2.7.  */
4079
4080   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
4081             ? (unsignedp ? lshr_optab : ashr_optab)
4082             : (unsignedp ? udiv_optab : sdiv_optab));
4083   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
4084             ? optab1
4085             : (unsignedp ? udivmod_optab : sdivmod_optab));
4086
4087   for (compute_mode = mode; compute_mode != VOIDmode;
4088        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4089     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4090         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4091       break;
4092
4093   if (compute_mode == VOIDmode)
4094     for (compute_mode = mode; compute_mode != VOIDmode;
4095          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4096       if (optab_libfunc (optab1, compute_mode)
4097           || optab_libfunc (optab2, compute_mode))
4098         break;
4099
4100   /* If we still couldn't find a mode, use MODE, but expand_binop will
4101      probably die.  */
4102   if (compute_mode == VOIDmode)
4103     compute_mode = mode;
4104
4105   if (target && GET_MODE (target) == compute_mode)
4106     tquotient = target;
4107   else
4108     tquotient = gen_reg_rtx (compute_mode);
4109
4110   size = GET_MODE_BITSIZE (compute_mode);
4111 #if 0
4112   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4113      (mode), and thereby get better code when OP1 is a constant.  Do that
4114      later.  It will require going over all usages of SIZE below.  */
4115   size = GET_MODE_BITSIZE (mode);
4116 #endif
4117
4118   /* Only deduct something for a REM if the last divide done was
4119      for a different constant.   Then set the constant of the last
4120      divide.  */
4121   max_cost = (unsignedp
4122               ? udiv_cost (speed, compute_mode)
4123               : sdiv_cost (speed, compute_mode));
4124   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4125                      && INTVAL (op1) == last_div_const))
4126     max_cost -= (mul_cost (speed, compute_mode)
4127                  + add_cost (speed, compute_mode));
4128
4129   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4130
4131   /* Now convert to the best mode to use.  */
4132   if (compute_mode != mode)
4133     {
4134       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4135       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4136
4137       /* convert_modes may have placed op1 into a register, so we
4138          must recompute the following.  */
4139       op1_is_constant = CONST_INT_P (op1);
4140       op1_is_pow2 = (op1_is_constant
4141                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4142                           || (! unsignedp
4143                               && EXACT_POWER_OF_2_OR_ZERO_P (-UINTVAL (op1))))));
4144     }
4145
4146   /* If one of the operands is a volatile MEM, copy it into a register.  */
4147
4148   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4149     op0 = force_reg (compute_mode, op0);
4150   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4151     op1 = force_reg (compute_mode, op1);
4152
4153   /* If we need the remainder or if OP1 is constant, we need to
4154      put OP0 in a register in case it has any queued subexpressions.  */
4155   if (rem_flag || op1_is_constant)
4156     op0 = force_reg (compute_mode, op0);
4157
4158   last = get_last_insn ();
4159
4160   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4161   if (unsignedp)
4162     {
4163       if (code == FLOOR_DIV_EXPR)
4164         code = TRUNC_DIV_EXPR;
4165       if (code == FLOOR_MOD_EXPR)
4166         code = TRUNC_MOD_EXPR;
4167       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4168         code = TRUNC_DIV_EXPR;
4169     }
4170
4171   if (op1 != const0_rtx)
4172     switch (code)
4173       {
4174       case TRUNC_MOD_EXPR:
4175       case TRUNC_DIV_EXPR:
4176         if (op1_is_constant)
4177           {
4178             if (unsignedp)
4179               {
4180                 unsigned HOST_WIDE_INT mh, ml;
4181                 int pre_shift, post_shift;
4182                 int dummy;
4183                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4184                                             & GET_MODE_MASK (compute_mode));
4185
4186                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4187                   {
4188                     pre_shift = floor_log2 (d);
4189                     if (rem_flag)
4190                       {
4191                         unsigned HOST_WIDE_INT mask
4192                           = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4193                         remainder
4194                           = expand_binop (compute_mode, and_optab, op0,
4195                                           gen_int_mode (mask, compute_mode),
4196                                           remainder, 1,
4197                                           OPTAB_LIB_WIDEN);
4198                         if (remainder)
4199                           return gen_lowpart (mode, remainder);
4200                       }
4201                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4202                                              pre_shift, tquotient, 1);
4203                   }
4204                 else if (size <= HOST_BITS_PER_WIDE_INT)
4205                   {
4206                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4207                       {
4208                         /* Most significant bit of divisor is set; emit an scc
4209                            insn.  */
4210                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4211                                                           compute_mode, 1, 1);
4212                       }
4213                     else
4214                       {
4215                         /* Find a suitable multiplier and right shift count
4216                            instead of multiplying with D.  */
4217
4218                         mh = choose_multiplier (d, size, size,
4219                                                 &ml, &post_shift, &dummy);
4220
4221                         /* If the suggested multiplier is more than SIZE bits,
4222                            we can do better for even divisors, using an
4223                            initial right shift.  */
4224                         if (mh != 0 && (d & 1) == 0)
4225                           {
4226                             pre_shift = floor_log2 (d & -d);
4227                             mh = choose_multiplier (d >> pre_shift, size,
4228                                                     size - pre_shift,
4229                                                     &ml, &post_shift, &dummy);
4230                             gcc_assert (!mh);
4231                           }
4232                         else
4233                           pre_shift = 0;
4234
4235                         if (mh != 0)
4236                           {
4237                             rtx t1, t2, t3, t4;
4238
4239                             if (post_shift - 1 >= BITS_PER_WORD)
4240                               goto fail1;
4241
4242                             extra_cost
4243                               = (shift_cost (speed, compute_mode, post_shift - 1)
4244                                  + shift_cost (speed, compute_mode, 1)
4245                                  + 2 * add_cost (speed, compute_mode));
4246                             t1 = expmed_mult_highpart
4247                               (compute_mode, op0,
4248                                gen_int_mode (ml, compute_mode),
4249                                NULL_RTX, 1, max_cost - extra_cost);
4250                             if (t1 == 0)
4251                               goto fail1;
4252                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4253                                                                op0, t1),
4254                                                 NULL_RTX);
4255                             t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4256                                                t2, 1, NULL_RTX, 1);
4257                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4258                                                               t1, t3),
4259                                                 NULL_RTX);
4260                             quotient = expand_shift
4261                               (RSHIFT_EXPR, compute_mode, t4,
4262                                post_shift - 1, tquotient, 1);
4263                           }
4264                         else
4265                           {
4266                             rtx t1, t2;
4267
4268                             if (pre_shift >= BITS_PER_WORD
4269                                 || post_shift >= BITS_PER_WORD)
4270                               goto fail1;
4271
4272                             t1 = expand_shift
4273                               (RSHIFT_EXPR, compute_mode, op0,
4274                                pre_shift, NULL_RTX, 1);
4275                             extra_cost
4276                               = (shift_cost (speed, compute_mode, pre_shift)
4277                                  + shift_cost (speed, compute_mode, post_shift));
4278                             t2 = expmed_mult_highpart
4279                               (compute_mode, t1,
4280                                gen_int_mode (ml, compute_mode),
4281                                NULL_RTX, 1, max_cost - extra_cost);
4282                             if (t2 == 0)
4283                               goto fail1;
4284                             quotient = expand_shift
4285                               (RSHIFT_EXPR, compute_mode, t2,
4286                                post_shift, tquotient, 1);
4287                           }
4288                       }
4289                   }
4290                 else            /* Too wide mode to use tricky code */
4291                   break;
4292
4293                 insn = get_last_insn ();
4294                 if (insn != last)
4295                   set_dst_reg_note (insn, REG_EQUAL,
4296                                     gen_rtx_UDIV (compute_mode, op0, op1),
4297                                     quotient);
4298               }
4299             else                /* TRUNC_DIV, signed */
4300               {
4301                 unsigned HOST_WIDE_INT ml;
4302                 int lgup, post_shift;
4303                 rtx mlr;
4304                 HOST_WIDE_INT d = INTVAL (op1);
4305                 unsigned HOST_WIDE_INT abs_d;
4306
4307                 /* Since d might be INT_MIN, we have to cast to
4308                    unsigned HOST_WIDE_INT before negating to avoid
4309                    undefined signed overflow.  */
4310                 abs_d = (d >= 0
4311                          ? (unsigned HOST_WIDE_INT) d
4312                          : - (unsigned HOST_WIDE_INT) d);
4313
4314                 /* n rem d = n rem -d */
4315                 if (rem_flag && d < 0)
4316                   {
4317                     d = abs_d;
4318                     op1 = gen_int_mode (abs_d, compute_mode);
4319                   }
4320
4321                 if (d == 1)
4322                   quotient = op0;
4323                 else if (d == -1)
4324                   quotient = expand_unop (compute_mode, neg_optab, op0,
4325                                           tquotient, 0);
4326                 else if (HOST_BITS_PER_WIDE_INT >= size
4327                          && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4328                   {
4329                     /* This case is not handled correctly below.  */
4330                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4331                                                 compute_mode, 1, 1);
4332                     if (quotient == 0)
4333                       goto fail1;
4334                   }
4335                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4336                          && (rem_flag
4337                              ? smod_pow2_cheap (speed, compute_mode)
4338                              : sdiv_pow2_cheap (speed, compute_mode))
4339                          /* We assume that cheap metric is true if the
4340                             optab has an expander for this mode.  */
4341                          && ((optab_handler ((rem_flag ? smod_optab
4342                                               : sdiv_optab),
4343                                              compute_mode)
4344                               != CODE_FOR_nothing)
4345                              || (optab_handler (sdivmod_optab,
4346                                                 compute_mode)
4347                                  != CODE_FOR_nothing)))
4348                   ;
4349                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4350                   {
4351                     if (rem_flag)
4352                       {
4353                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4354                         if (remainder)
4355                           return gen_lowpart (mode, remainder);
4356                       }
4357
4358                     if (sdiv_pow2_cheap (speed, compute_mode)
4359                         && ((optab_handler (sdiv_optab, compute_mode)
4360                              != CODE_FOR_nothing)
4361                             || (optab_handler (sdivmod_optab, compute_mode)
4362                                 != CODE_FOR_nothing)))
4363                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4364                                                 compute_mode, op0,
4365                                                 gen_int_mode (abs_d,
4366                                                               compute_mode),
4367                                                 NULL_RTX, 0);
4368                     else
4369                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4370
4371                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4372                        negate the quotient.  */
4373                     if (d < 0)
4374                       {
4375                         insn = get_last_insn ();
4376                         if (insn != last
4377                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4378                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4379                           set_dst_reg_note (insn, REG_EQUAL,
4380                                             gen_rtx_DIV (compute_mode, op0,
4381                                                          gen_int_mode
4382                                                            (abs_d,
4383                                                             compute_mode)),
4384                                             quotient);
4385
4386                         quotient = expand_unop (compute_mode, neg_optab,
4387                                                 quotient, quotient, 0);
4388                       }
4389                   }
4390                 else if (size <= HOST_BITS_PER_WIDE_INT)
4391                   {
4392                     choose_multiplier (abs_d, size, size - 1,
4393                                        &ml, &post_shift, &lgup);
4394                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4395                       {
4396                         rtx t1, t2, t3;
4397
4398                         if (post_shift >= BITS_PER_WORD
4399                             || size - 1 >= BITS_PER_WORD)
4400                           goto fail1;
4401
4402                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4403                                       + shift_cost (speed, compute_mode, size - 1)
4404                                       + add_cost (speed, compute_mode));
4405                         t1 = expmed_mult_highpart
4406                           (compute_mode, op0, gen_int_mode (ml, compute_mode),
4407                            NULL_RTX, 0, max_cost - extra_cost);
4408                         if (t1 == 0)
4409                           goto fail1;
4410                         t2 = expand_shift
4411                           (RSHIFT_EXPR, compute_mode, t1,
4412                            post_shift, NULL_RTX, 0);
4413                         t3 = expand_shift
4414                           (RSHIFT_EXPR, compute_mode, op0,
4415                            size - 1, NULL_RTX, 0);
4416                         if (d < 0)
4417                           quotient
4418                             = force_operand (gen_rtx_MINUS (compute_mode,
4419                                                             t3, t2),
4420                                              tquotient);
4421                         else
4422                           quotient
4423                             = force_operand (gen_rtx_MINUS (compute_mode,
4424                                                             t2, t3),
4425                                              tquotient);
4426                       }
4427                     else
4428                       {
4429                         rtx t1, t2, t3, t4;
4430
4431                         if (post_shift >= BITS_PER_WORD
4432                             || size - 1 >= BITS_PER_WORD)
4433                           goto fail1;
4434
4435                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4436                         mlr = gen_int_mode (ml, compute_mode);
4437                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4438                                       + shift_cost (speed, compute_mode, size - 1)
4439                                       + 2 * add_cost (speed, compute_mode));
4440                         t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4441                                                    NULL_RTX, 0,
4442                                                    max_cost - extra_cost);
4443                         if (t1 == 0)
4444                           goto fail1;
4445                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4446                                                           t1, op0),
4447                                             NULL_RTX);
4448                         t3 = expand_shift
4449                           (RSHIFT_EXPR, compute_mode, t2,
4450                            post_shift, NULL_RTX, 0);
4451                         t4 = expand_shift
4452                           (RSHIFT_EXPR, compute_mode, op0,
4453                            size - 1, NULL_RTX, 0);
4454                         if (d < 0)
4455                           quotient
4456                             = force_operand (gen_rtx_MINUS (compute_mode,
4457                                                             t4, t3),
4458                                              tquotient);
4459                         else
4460                           quotient
4461                             = force_operand (gen_rtx_MINUS (compute_mode,
4462                                                             t3, t4),
4463                                              tquotient);
4464                       }
4465                   }
4466                 else            /* Too wide mode to use tricky code */
4467                   break;
4468
4469                 insn = get_last_insn ();
4470                 if (insn != last)
4471                   set_dst_reg_note (insn, REG_EQUAL,
4472                                     gen_rtx_DIV (compute_mode, op0, op1),
4473                                     quotient);
4474               }
4475             break;
4476           }
4477       fail1:
4478         delete_insns_since (last);
4479         break;
4480
4481       case FLOOR_DIV_EXPR:
4482       case FLOOR_MOD_EXPR:
4483       /* We will come here only for signed operations.  */
4484         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4485           {
4486             unsigned HOST_WIDE_INT mh, ml;
4487             int pre_shift, lgup, post_shift;
4488             HOST_WIDE_INT d = INTVAL (op1);
4489
4490             if (d > 0)
4491               {
4492                 /* We could just as easily deal with negative constants here,
4493                    but it does not seem worth the trouble for GCC 2.6.  */
4494                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4495                   {
4496                     pre_shift = floor_log2 (d);
4497                     if (rem_flag)
4498                       {
4499                         unsigned HOST_WIDE_INT mask
4500                           = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4501                         remainder = expand_binop
4502                           (compute_mode, and_optab, op0,
4503                            gen_int_mode (mask, compute_mode),
4504                            remainder, 0, OPTAB_LIB_WIDEN);
4505                         if (remainder)
4506                           return gen_lowpart (mode, remainder);
4507                       }
4508                     quotient = expand_shift
4509                       (RSHIFT_EXPR, compute_mode, op0,
4510                        pre_shift, tquotient, 0);
4511                   }
4512                 else
4513                   {
4514                     rtx t1, t2, t3, t4;
4515
4516                     mh = choose_multiplier (d, size, size - 1,
4517                                             &ml, &post_shift, &lgup);
4518                     gcc_assert (!mh);
4519
4520                     if (post_shift < BITS_PER_WORD
4521                         && size - 1 < BITS_PER_WORD)
4522                       {
4523                         t1 = expand_shift
4524                           (RSHIFT_EXPR, compute_mode, op0,
4525                            size - 1, NULL_RTX, 0);
4526                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4527                                            NULL_RTX, 0, OPTAB_WIDEN);
4528                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4529                                       + shift_cost (speed, compute_mode, size - 1)
4530                                       + 2 * add_cost (speed, compute_mode));
4531                         t3 = expmed_mult_highpart
4532                           (compute_mode, t2, gen_int_mode (ml, compute_mode),
4533                            NULL_RTX, 1, max_cost - extra_cost);
4534                         if (t3 != 0)
4535                           {
4536                             t4 = expand_shift
4537                               (RSHIFT_EXPR, compute_mode, t3,
4538                                post_shift, NULL_RTX, 1);
4539                             quotient = expand_binop (compute_mode, xor_optab,
4540                                                      t4, t1, tquotient, 0,
4541                                                      OPTAB_WIDEN);
4542                           }
4543                       }
4544                   }
4545               }
4546             else
4547               {
4548                 rtx nsign, t1, t2, t3, t4;
4549                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4550                                                   op0, constm1_rtx), NULL_RTX);
4551                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4552                                    0, OPTAB_WIDEN);
4553                 nsign = expand_shift
4554                   (RSHIFT_EXPR, compute_mode, t2,
4555                    size - 1, NULL_RTX, 0);
4556                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4557                                     NULL_RTX);
4558                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4559                                     NULL_RTX, 0);
4560                 if (t4)
4561                   {
4562                     rtx t5;
4563                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4564                                       NULL_RTX, 0);
4565                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4566                                                             t4, t5),
4567                                               tquotient);
4568                   }
4569               }
4570           }
4571
4572         if (quotient != 0)
4573           break;
4574         delete_insns_since (last);
4575
4576         /* Try using an instruction that produces both the quotient and
4577            remainder, using truncation.  We can easily compensate the quotient
4578            or remainder to get floor rounding, once we have the remainder.
4579            Notice that we compute also the final remainder value here,
4580            and return the result right away.  */
4581         if (target == 0 || GET_MODE (target) != compute_mode)
4582           target = gen_reg_rtx (compute_mode);
4583
4584         if (rem_flag)
4585           {
4586             remainder
4587               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4588             quotient = gen_reg_rtx (compute_mode);
4589           }
4590         else
4591           {
4592             quotient
4593               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4594             remainder = gen_reg_rtx (compute_mode);
4595           }
4596
4597         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4598                                  quotient, remainder, 0))
4599           {
4600             /* This could be computed with a branch-less sequence.
4601                Save that for later.  */
4602             rtx tem;
4603             rtx_code_label *label = gen_label_rtx ();
4604             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4605             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4606                                 NULL_RTX, 0, OPTAB_WIDEN);
4607             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4608             expand_dec (quotient, const1_rtx);
4609             expand_inc (remainder, op1);
4610             emit_label (label);
4611             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4612           }
4613
4614         /* No luck with division elimination or divmod.  Have to do it
4615            by conditionally adjusting op0 *and* the result.  */
4616         {
4617           rtx_code_label *label1, *label2, *label3, *label4, *label5;
4618           rtx adjusted_op0;
4619           rtx tem;
4620
4621           quotient = gen_reg_rtx (compute_mode);
4622           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4623           label1 = gen_label_rtx ();
4624           label2 = gen_label_rtx ();
4625           label3 = gen_label_rtx ();
4626           label4 = gen_label_rtx ();
4627           label5 = gen_label_rtx ();
4628           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4629           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4630           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4631                               quotient, 0, OPTAB_LIB_WIDEN);
4632           if (tem != quotient)
4633             emit_move_insn (quotient, tem);
4634           emit_jump_insn (gen_jump (label5));
4635           emit_barrier ();
4636           emit_label (label1);
4637           expand_inc (adjusted_op0, const1_rtx);
4638           emit_jump_insn (gen_jump (label4));
4639           emit_barrier ();
4640           emit_label (label2);
4641           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4642           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4643                               quotient, 0, OPTAB_LIB_WIDEN);
4644           if (tem != quotient)
4645             emit_move_insn (quotient, tem);
4646           emit_jump_insn (gen_jump (label5));
4647           emit_barrier ();
4648           emit_label (label3);
4649           expand_dec (adjusted_op0, const1_rtx);
4650           emit_label (label4);
4651           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4652                               quotient, 0, OPTAB_LIB_WIDEN);
4653           if (tem != quotient)
4654             emit_move_insn (quotient, tem);
4655           expand_dec (quotient, const1_rtx);
4656           emit_label (label5);
4657         }
4658         break;
4659
4660       case CEIL_DIV_EXPR:
4661       case CEIL_MOD_EXPR:
4662         if (unsignedp)
4663           {
4664             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4665               {
4666                 rtx t1, t2, t3;
4667                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4668                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4669                                    floor_log2 (d), tquotient, 1);
4670                 t2 = expand_binop (compute_mode, and_optab, op0,
4671                                    gen_int_mode (d - 1, compute_mode),
4672                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4673                 t3 = gen_reg_rtx (compute_mode);
4674                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4675                                       compute_mode, 1, 1);
4676                 if (t3 == 0)
4677                   {
4678                     rtx_code_label *lab;
4679                     lab = gen_label_rtx ();
4680                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4681                     expand_inc (t1, const1_rtx);
4682                     emit_label (lab);
4683                     quotient = t1;
4684                   }
4685                 else
4686                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4687                                                           t1, t3),
4688                                             tquotient);
4689                 break;
4690               }
4691
4692             /* Try using an instruction that produces both the quotient and
4693                remainder, using truncation.  We can easily compensate the
4694                quotient or remainder to get ceiling rounding, once we have the
4695                remainder.  Notice that we compute also the final remainder
4696                value here, and return the result right away.  */
4697             if (target == 0 || GET_MODE (target) != compute_mode)
4698               target = gen_reg_rtx (compute_mode);
4699
4700             if (rem_flag)
4701               {
4702                 remainder = (REG_P (target)
4703                              ? target : gen_reg_rtx (compute_mode));
4704                 quotient = gen_reg_rtx (compute_mode);
4705               }
4706             else
4707               {
4708                 quotient = (REG_P (target)
4709                             ? target : gen_reg_rtx (compute_mode));
4710                 remainder = gen_reg_rtx (compute_mode);
4711               }
4712
4713             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4714                                      remainder, 1))
4715               {
4716                 /* This could be computed with a branch-less sequence.
4717                    Save that for later.  */
4718                 rtx_code_label *label = gen_label_rtx ();
4719                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4720                                  compute_mode, label);
4721                 expand_inc (quotient, const1_rtx);
4722                 expand_dec (remainder, op1);
4723                 emit_label (label);
4724                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4725               }
4726
4727             /* No luck with division elimination or divmod.  Have to do it
4728                by conditionally adjusting op0 *and* the result.  */
4729             {
4730               rtx_code_label *label1, *label2;
4731               rtx adjusted_op0, tem;
4732
4733               quotient = gen_reg_rtx (compute_mode);
4734               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4735               label1 = gen_label_rtx ();
4736               label2 = gen_label_rtx ();
4737               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4738                                compute_mode, label1);
4739               emit_move_insn  (quotient, const0_rtx);
4740               emit_jump_insn (gen_jump (label2));
4741               emit_barrier ();
4742               emit_label (label1);
4743               expand_dec (adjusted_op0, const1_rtx);
4744               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4745                                   quotient, 1, OPTAB_LIB_WIDEN);
4746               if (tem != quotient)
4747                 emit_move_insn (quotient, tem);
4748               expand_inc (quotient, const1_rtx);
4749               emit_label (label2);
4750             }
4751           }
4752         else /* signed */
4753           {
4754             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4755                 && INTVAL (op1) >= 0)
4756               {
4757                 /* This is extremely similar to the code for the unsigned case
4758                    above.  For 2.7 we should merge these variants, but for
4759                    2.6.1 I don't want to touch the code for unsigned since that
4760                    get used in C.  The signed case will only be used by other
4761                    languages (Ada).  */
4762
4763                 rtx t1, t2, t3;
4764                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4765                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4766                                    floor_log2 (d), tquotient, 0);
4767                 t2 = expand_binop (compute_mode, and_optab, op0,
4768                                    gen_int_mode (d - 1, compute_mode),
4769                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4770                 t3 = gen_reg_rtx (compute_mode);
4771                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4772                                       compute_mode, 1, 1);
4773                 if (t3 == 0)
4774                   {
4775                     rtx_code_label *lab;
4776                     lab = gen_label_rtx ();
4777                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4778                     expand_inc (t1, const1_rtx);
4779                     emit_label (lab);
4780                     quotient = t1;
4781                   }
4782                 else
4783                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4784                                                           t1, t3),
4785                                             tquotient);
4786                 break;
4787               }
4788
4789             /* Try using an instruction that produces both the quotient and
4790                remainder, using truncation.  We can easily compensate the
4791                quotient or remainder to get ceiling rounding, once we have the
4792                remainder.  Notice that we compute also the final remainder
4793                value here, and return the result right away.  */
4794             if (target == 0 || GET_MODE (target) != compute_mode)
4795               target = gen_reg_rtx (compute_mode);
4796             if (rem_flag)
4797               {
4798                 remainder= (REG_P (target)
4799                             ? target : gen_reg_rtx (compute_mode));
4800                 quotient = gen_reg_rtx (compute_mode);
4801               }
4802             else
4803               {
4804                 quotient = (REG_P (target)
4805                             ? target : gen_reg_rtx (compute_mode));
4806                 remainder = gen_reg_rtx (compute_mode);
4807               }
4808
4809             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4810                                      remainder, 0))
4811               {
4812                 /* This could be computed with a branch-less sequence.
4813                    Save that for later.  */
4814                 rtx tem;
4815                 rtx_code_label *label = gen_label_rtx ();
4816                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4817                                  compute_mode, label);
4818                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4819                                     NULL_RTX, 0, OPTAB_WIDEN);
4820                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4821                 expand_inc (quotient, const1_rtx);
4822                 expand_dec (remainder, op1);
4823                 emit_label (label);
4824                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4825               }
4826
4827             /* No luck with division elimination or divmod.  Have to do it
4828                by conditionally adjusting op0 *and* the result.  */
4829             {
4830               rtx_code_label *label1, *label2, *label3, *label4, *label5;
4831               rtx adjusted_op0;
4832               rtx tem;
4833
4834               quotient = gen_reg_rtx (compute_mode);
4835               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4836               label1 = gen_label_rtx ();
4837               label2 = gen_label_rtx ();
4838               label3 = gen_label_rtx ();
4839               label4 = gen_label_rtx ();
4840               label5 = gen_label_rtx ();
4841               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4842               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4843                                compute_mode, label1);
4844               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4845                                   quotient, 0, OPTAB_LIB_WIDEN);
4846               if (tem != quotient)
4847                 emit_move_insn (quotient, tem);
4848               emit_jump_insn (gen_jump (label5));
4849               emit_barrier ();
4850               emit_label (label1);
4851               expand_dec (adjusted_op0, const1_rtx);
4852               emit_jump_insn (gen_jump (label4));
4853               emit_barrier ();
4854               emit_label (label2);
4855               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4856                                compute_mode, label3);
4857               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4858                                   quotient, 0, OPTAB_LIB_WIDEN);
4859               if (tem != quotient)
4860                 emit_move_insn (quotient, tem);
4861               emit_jump_insn (gen_jump (label5));
4862               emit_barrier ();
4863               emit_label (label3);
4864               expand_inc (adjusted_op0, const1_rtx);
4865               emit_label (label4);
4866               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4867                                   quotient, 0, OPTAB_LIB_WIDEN);
4868               if (tem != quotient)
4869                 emit_move_insn (quotient, tem);
4870               expand_inc (quotient, const1_rtx);
4871               emit_label (label5);
4872             }
4873           }
4874         break;
4875
4876       case EXACT_DIV_EXPR:
4877         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4878           {
4879             HOST_WIDE_INT d = INTVAL (op1);
4880             unsigned HOST_WIDE_INT ml;
4881             int pre_shift;
4882             rtx t1;
4883
4884             pre_shift = floor_log2 (d & -d);
4885             ml = invert_mod2n (d >> pre_shift, size);
4886             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4887                                pre_shift, NULL_RTX, unsignedp);
4888             quotient = expand_mult (compute_mode, t1,
4889                                     gen_int_mode (ml, compute_mode),
4890                                     NULL_RTX, 1);
4891
4892             insn = get_last_insn ();
4893             set_dst_reg_note (insn, REG_EQUAL,
4894                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4895                                               compute_mode, op0, op1),
4896                               quotient);
4897           }
4898         break;
4899
4900       case ROUND_DIV_EXPR:
4901       case ROUND_MOD_EXPR:
4902         if (unsignedp)
4903           {
4904             rtx tem;
4905             rtx_code_label *label;
4906             label = gen_label_rtx ();
4907             quotient = gen_reg_rtx (compute_mode);
4908             remainder = gen_reg_rtx (compute_mode);
4909             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4910               {
4911                 rtx tem;
4912                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4913                                          quotient, 1, OPTAB_LIB_WIDEN);
4914                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4915                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4916                                           remainder, 1, OPTAB_LIB_WIDEN);
4917               }
4918             tem = plus_constant (compute_mode, op1, -1);
4919             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4920             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4921             expand_inc (quotient, const1_rtx);
4922             expand_dec (remainder, op1);
4923             emit_label (label);
4924           }
4925         else
4926           {
4927             rtx abs_rem, abs_op1, tem, mask;
4928             rtx_code_label *label;
4929             label = gen_label_rtx ();
4930             quotient = gen_reg_rtx (compute_mode);
4931             remainder = gen_reg_rtx (compute_mode);
4932             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4933               {
4934                 rtx tem;
4935                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4936                                          quotient, 0, OPTAB_LIB_WIDEN);
4937                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4938                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4939                                           remainder, 0, OPTAB_LIB_WIDEN);
4940               }
4941             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4942             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4943             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4944                                 1, NULL_RTX, 1);
4945             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4946             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4947                                 NULL_RTX, 0, OPTAB_WIDEN);
4948             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4949                                  size - 1, NULL_RTX, 0);
4950             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4951                                 NULL_RTX, 0, OPTAB_WIDEN);
4952             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4953                                 NULL_RTX, 0, OPTAB_WIDEN);
4954             expand_inc (quotient, tem);
4955             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4956                                 NULL_RTX, 0, OPTAB_WIDEN);
4957             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4958                                 NULL_RTX, 0, OPTAB_WIDEN);
4959             expand_dec (remainder, tem);
4960             emit_label (label);
4961           }
4962         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4963
4964       default:
4965         gcc_unreachable ();
4966       }
4967
4968   if (quotient == 0)
4969     {
4970       if (target && GET_MODE (target) != compute_mode)
4971         target = 0;
4972
4973       if (rem_flag)
4974         {
4975           /* Try to produce the remainder without producing the quotient.
4976              If we seem to have a divmod pattern that does not require widening,
4977              don't try widening here.  We should really have a WIDEN argument
4978              to expand_twoval_binop, since what we'd really like to do here is
4979              1) try a mod insn in compute_mode
4980              2) try a divmod insn in compute_mode
4981              3) try a div insn in compute_mode and multiply-subtract to get
4982                 remainder
4983              4) try the same things with widening allowed.  */
4984           remainder
4985             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4986                                  op0, op1, target,
4987                                  unsignedp,
4988                                  ((optab_handler (optab2, compute_mode)
4989                                    != CODE_FOR_nothing)
4990                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4991           if (remainder == 0)
4992             {
4993               /* No luck there.  Can we do remainder and divide at once
4994                  without a library call?  */
4995               remainder = gen_reg_rtx (compute_mode);
4996               if (! expand_twoval_binop ((unsignedp
4997                                           ? udivmod_optab
4998                                           : sdivmod_optab),
4999                                          op0, op1,
5000                                          NULL_RTX, remainder, unsignedp))
5001                 remainder = 0;
5002             }
5003
5004           if (remainder)
5005             return gen_lowpart (mode, remainder);
5006         }
5007
5008       /* Produce the quotient.  Try a quotient insn, but not a library call.
5009          If we have a divmod in this mode, use it in preference to widening
5010          the div (for this test we assume it will not fail). Note that optab2
5011          is set to the one of the two optabs that the call below will use.  */
5012       quotient
5013         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5014                              op0, op1, rem_flag ? NULL_RTX : target,
5015                              unsignedp,
5016                              ((optab_handler (optab2, compute_mode)
5017                                != CODE_FOR_nothing)
5018                               ? OPTAB_DIRECT : OPTAB_WIDEN));
5019
5020       if (quotient == 0)
5021         {
5022           /* No luck there.  Try a quotient-and-remainder insn,
5023              keeping the quotient alone.  */
5024           quotient = gen_reg_rtx (compute_mode);
5025           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5026                                      op0, op1,
5027                                      quotient, NULL_RTX, unsignedp))
5028             {
5029               quotient = 0;
5030               if (! rem_flag)
5031                 /* Still no luck.  If we are not computing the remainder,
5032                    use a library call for the quotient.  */
5033                 quotient = sign_expand_binop (compute_mode,
5034                                               udiv_optab, sdiv_optab,
5035                                               op0, op1, target,
5036                                               unsignedp, OPTAB_LIB_WIDEN);
5037             }
5038         }
5039     }
5040
5041   if (rem_flag)
5042     {
5043       if (target && GET_MODE (target) != compute_mode)
5044         target = 0;
5045
5046       if (quotient == 0)
5047         {
5048           /* No divide instruction either.  Use library for remainder.  */
5049           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5050                                          op0, op1, target,
5051                                          unsignedp, OPTAB_LIB_WIDEN);
5052           /* No remainder function.  Try a quotient-and-remainder
5053              function, keeping the remainder.  */
5054           if (!remainder)
5055             {
5056               remainder = gen_reg_rtx (compute_mode);
5057               if (!expand_twoval_binop_libfunc
5058                   (unsignedp ? udivmod_optab : sdivmod_optab,
5059                    op0, op1,
5060                    NULL_RTX, remainder,
5061                    unsignedp ? UMOD : MOD))
5062                 remainder = NULL_RTX;
5063             }
5064         }
5065       else
5066         {
5067           /* We divided.  Now finish doing X - Y * (X / Y).  */
5068           remainder = expand_mult (compute_mode, quotient, op1,
5069                                    NULL_RTX, unsignedp);
5070           remainder = expand_binop (compute_mode, sub_optab, op0,
5071                                     remainder, target, unsignedp,
5072                                     OPTAB_LIB_WIDEN);
5073         }
5074     }
5075
5076   return gen_lowpart (mode, rem_flag ? remainder : quotient);
5077 }
5078 \f
5079 /* Return a tree node with data type TYPE, describing the value of X.
5080    Usually this is an VAR_DECL, if there is no obvious better choice.
5081    X may be an expression, however we only support those expressions
5082    generated by loop.c.  */
5083
5084 tree
5085 make_tree (tree type, rtx x)
5086 {
5087   tree t;
5088
5089   switch (GET_CODE (x))
5090     {
5091     case CONST_INT:
5092     case CONST_WIDE_INT:
5093       t = wide_int_to_tree (type, std::make_pair (x, TYPE_MODE (type)));
5094       return t;
5095
5096     case CONST_DOUBLE:
5097       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5098       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5099         t = wide_int_to_tree (type,
5100                               wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5101                                                     HOST_BITS_PER_WIDE_INT * 2));
5102       else
5103         {
5104           REAL_VALUE_TYPE d;
5105
5106           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
5107           t = build_real (type, d);
5108         }
5109
5110       return t;
5111
5112     case CONST_VECTOR:
5113       {
5114         int units = CONST_VECTOR_NUNITS (x);
5115         tree itype = TREE_TYPE (type);
5116         tree *elts;
5117         int i;
5118
5119         /* Build a tree with vector elements.  */
5120         elts = XALLOCAVEC (tree, units);
5121         for (i = units - 1; i >= 0; --i)
5122           {
5123             rtx elt = CONST_VECTOR_ELT (x, i);
5124             elts[i] = make_tree (itype, elt);
5125           }
5126
5127         return build_vector (type, elts);
5128       }
5129
5130     case PLUS:
5131       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5132                           make_tree (type, XEXP (x, 1)));
5133
5134     case MINUS:
5135       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5136                           make_tree (type, XEXP (x, 1)));
5137
5138     case NEG:
5139       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5140
5141     case MULT:
5142       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5143                           make_tree (type, XEXP (x, 1)));
5144
5145     case ASHIFT:
5146       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5147                           make_tree (type, XEXP (x, 1)));
5148
5149     case LSHIFTRT:
5150       t = unsigned_type_for (type);
5151       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5152                                          make_tree (t, XEXP (x, 0)),
5153                                          make_tree (type, XEXP (x, 1))));
5154
5155     case ASHIFTRT:
5156       t = signed_type_for (type);
5157       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5158                                          make_tree (t, XEXP (x, 0)),
5159                                          make_tree (type, XEXP (x, 1))));
5160
5161     case DIV:
5162       if (TREE_CODE (type) != REAL_TYPE)
5163         t = signed_type_for (type);
5164       else
5165         t = type;
5166
5167       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5168                                          make_tree (t, XEXP (x, 0)),
5169                                          make_tree (t, XEXP (x, 1))));
5170     case UDIV:
5171       t = unsigned_type_for (type);
5172       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5173                                          make_tree (t, XEXP (x, 0)),
5174                                          make_tree (t, XEXP (x, 1))));
5175
5176     case SIGN_EXTEND:
5177     case ZERO_EXTEND:
5178       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5179                                           GET_CODE (x) == ZERO_EXTEND);
5180       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5181
5182     case CONST:
5183       return make_tree (type, XEXP (x, 0));
5184
5185     case SYMBOL_REF:
5186       t = SYMBOL_REF_DECL (x);
5187       if (t)
5188         return fold_convert (type, build_fold_addr_expr (t));
5189       /* else fall through.  */
5190
5191     default:
5192       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5193
5194       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5195          address mode to pointer mode.  */
5196       if (POINTER_TYPE_P (type))
5197         x = convert_memory_address_addr_space
5198               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5199
5200       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5201          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5202       t->decl_with_rtl.rtl = x;
5203
5204       return t;
5205     }
5206 }
5207 \f
5208 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5209    and returning TARGET.
5210
5211    If TARGET is 0, a pseudo-register or constant is returned.  */
5212
5213 rtx
5214 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5215 {
5216   rtx tem = 0;
5217
5218   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5219     tem = simplify_binary_operation (AND, mode, op0, op1);
5220   if (tem == 0)
5221     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5222
5223   if (target == 0)
5224     target = tem;
5225   else if (tem != target)
5226     emit_move_insn (target, tem);
5227   return target;
5228 }
5229
5230 /* Helper function for emit_store_flag.  */
5231 rtx
5232 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5233              machine_mode mode, machine_mode compare_mode,
5234              int unsignedp, rtx x, rtx y, int normalizep,
5235              machine_mode target_mode)
5236 {
5237   struct expand_operand ops[4];
5238   rtx op0, comparison, subtarget;
5239   rtx_insn *last;
5240   machine_mode result_mode = targetm.cstore_mode (icode);
5241
5242   last = get_last_insn ();
5243   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5244   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5245   if (!x || !y)
5246     {
5247       delete_insns_since (last);
5248       return NULL_RTX;
5249     }
5250
5251   if (target_mode == VOIDmode)
5252     target_mode = result_mode;
5253   if (!target)
5254     target = gen_reg_rtx (target_mode);
5255
5256   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5257
5258   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5259   create_fixed_operand (&ops[1], comparison);
5260   create_fixed_operand (&ops[2], x);
5261   create_fixed_operand (&ops[3], y);
5262   if (!maybe_expand_insn (icode, 4, ops))
5263     {
5264       delete_insns_since (last);
5265       return NULL_RTX;
5266     }
5267   subtarget = ops[0].value;
5268
5269   /* If we are converting to a wider mode, first convert to
5270      TARGET_MODE, then normalize.  This produces better combining
5271      opportunities on machines that have a SIGN_EXTRACT when we are
5272      testing a single bit.  This mostly benefits the 68k.
5273
5274      If STORE_FLAG_VALUE does not have the sign bit set when
5275      interpreted in MODE, we can do this conversion as unsigned, which
5276      is usually more efficient.  */
5277   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5278     {
5279       convert_move (target, subtarget,
5280                     val_signbit_known_clear_p (result_mode,
5281                                                STORE_FLAG_VALUE));
5282       op0 = target;
5283       result_mode = target_mode;
5284     }
5285   else
5286     op0 = subtarget;
5287
5288   /* If we want to keep subexpressions around, don't reuse our last
5289      target.  */
5290   if (optimize)
5291     subtarget = 0;
5292
5293   /* Now normalize to the proper value in MODE.  Sometimes we don't
5294      have to do anything.  */
5295   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5296     ;
5297   /* STORE_FLAG_VALUE might be the most negative number, so write
5298      the comparison this way to avoid a compiler-time warning.  */
5299   else if (- normalizep == STORE_FLAG_VALUE)
5300     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5301
5302   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5303      it hard to use a value of just the sign bit due to ANSI integer
5304      constant typing rules.  */
5305   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5306     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5307                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5308                         normalizep == 1);
5309   else
5310     {
5311       gcc_assert (STORE_FLAG_VALUE & 1);
5312
5313       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5314       if (normalizep == -1)
5315         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5316     }
5317
5318   /* If we were converting to a smaller mode, do the conversion now.  */
5319   if (target_mode != result_mode)
5320     {
5321       convert_move (target, op0, 0);
5322       return target;
5323     }
5324   else
5325     return op0;
5326 }
5327
5328
5329 /* A subroutine of emit_store_flag only including "tricks" that do not
5330    need a recursive call.  These are kept separate to avoid infinite
5331    loops.  */
5332
5333 static rtx
5334 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5335                    machine_mode mode, int unsignedp, int normalizep,
5336                    machine_mode target_mode)
5337 {
5338   rtx subtarget;
5339   enum insn_code icode;
5340   machine_mode compare_mode;
5341   enum mode_class mclass;
5342   enum rtx_code scode;
5343   rtx tem;
5344
5345   if (unsignedp)
5346     code = unsigned_condition (code);
5347   scode = swap_condition (code);
5348
5349   /* If one operand is constant, make it the second one.  Only do this
5350      if the other operand is not constant as well.  */
5351
5352   if (swap_commutative_operands_p (op0, op1))
5353     {
5354       tem = op0;
5355       op0 = op1;
5356       op1 = tem;
5357       code = swap_condition (code);
5358     }
5359
5360   if (mode == VOIDmode)
5361     mode = GET_MODE (op0);
5362
5363   /* For some comparisons with 1 and -1, we can convert this to
5364      comparisons with zero.  This will often produce more opportunities for
5365      store-flag insns.  */
5366
5367   switch (code)
5368     {
5369     case LT:
5370       if (op1 == const1_rtx)
5371         op1 = const0_rtx, code = LE;
5372       break;
5373     case LE:
5374       if (op1 == constm1_rtx)
5375         op1 = const0_rtx, code = LT;
5376       break;
5377     case GE:
5378       if (op1 == const1_rtx)
5379         op1 = const0_rtx, code = GT;
5380       break;
5381     case GT:
5382       if (op1 == constm1_rtx)
5383         op1 = const0_rtx, code = GE;
5384       break;
5385     case GEU:
5386       if (op1 == const1_rtx)
5387         op1 = const0_rtx, code = NE;
5388       break;
5389     case LTU:
5390       if (op1 == const1_rtx)
5391         op1 = const0_rtx, code = EQ;
5392       break;
5393     default:
5394       break;
5395     }
5396
5397   /* If we are comparing a double-word integer with zero or -1, we can
5398      convert the comparison into one involving a single word.  */
5399   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5400       && GET_MODE_CLASS (mode) == MODE_INT
5401       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5402     {
5403       if ((code == EQ || code == NE)
5404           && (op1 == const0_rtx || op1 == constm1_rtx))
5405         {
5406           rtx op00, op01;
5407
5408           /* Do a logical OR or AND of the two words and compare the
5409              result.  */
5410           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5411           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5412           tem = expand_binop (word_mode,
5413                               op1 == const0_rtx ? ior_optab : and_optab,
5414                               op00, op01, NULL_RTX, unsignedp,
5415                               OPTAB_DIRECT);
5416
5417           if (tem != 0)
5418             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5419                                    unsignedp, normalizep);
5420         }
5421       else if ((code == LT || code == GE) && op1 == const0_rtx)
5422         {
5423           rtx op0h;
5424
5425           /* If testing the sign bit, can just test on high word.  */
5426           op0h = simplify_gen_subreg (word_mode, op0, mode,
5427                                       subreg_highpart_offset (word_mode,
5428                                                               mode));
5429           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5430                                  unsignedp, normalizep);
5431         }
5432       else
5433         tem = NULL_RTX;
5434
5435       if (tem)
5436         {
5437           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5438             return tem;
5439           if (!target)
5440             target = gen_reg_rtx (target_mode);
5441
5442           convert_move (target, tem,
5443                         !val_signbit_known_set_p (word_mode,
5444                                                   (normalizep ? normalizep
5445                                                    : STORE_FLAG_VALUE)));
5446           return target;
5447         }
5448     }
5449
5450   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5451      complement of A (for GE) and shifting the sign bit to the low bit.  */
5452   if (op1 == const0_rtx && (code == LT || code == GE)
5453       && GET_MODE_CLASS (mode) == MODE_INT
5454       && (normalizep || STORE_FLAG_VALUE == 1
5455           || val_signbit_p (mode, STORE_FLAG_VALUE)))
5456     {
5457       subtarget = target;
5458
5459       if (!target)
5460         target_mode = mode;
5461
5462       /* If the result is to be wider than OP0, it is best to convert it
5463          first.  If it is to be narrower, it is *incorrect* to convert it
5464          first.  */
5465       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5466         {
5467           op0 = convert_modes (target_mode, mode, op0, 0);
5468           mode = target_mode;
5469         }
5470
5471       if (target_mode != mode)
5472         subtarget = 0;
5473
5474       if (code == GE)
5475         op0 = expand_unop (mode, one_cmpl_optab, op0,
5476                            ((STORE_FLAG_VALUE == 1 || normalizep)
5477                             ? 0 : subtarget), 0);
5478
5479       if (STORE_FLAG_VALUE == 1 || normalizep)
5480         /* If we are supposed to produce a 0/1 value, we want to do
5481            a logical shift from the sign bit to the low-order bit; for
5482            a -1/0 value, we do an arithmetic shift.  */
5483         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5484                             GET_MODE_BITSIZE (mode) - 1,
5485                             subtarget, normalizep != -1);
5486
5487       if (mode != target_mode)
5488         op0 = convert_modes (target_mode, mode, op0, 0);
5489
5490       return op0;
5491     }
5492
5493   mclass = GET_MODE_CLASS (mode);
5494   for (compare_mode = mode; compare_mode != VOIDmode;
5495        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5496     {
5497      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5498      icode = optab_handler (cstore_optab, optab_mode);
5499      if (icode != CODE_FOR_nothing)
5500         {
5501           do_pending_stack_adjust ();
5502           tem = emit_cstore (target, icode, code, mode, compare_mode,
5503                              unsignedp, op0, op1, normalizep, target_mode);
5504           if (tem)
5505             return tem;
5506
5507           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5508             {
5509               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5510                                  unsignedp, op1, op0, normalizep, target_mode);
5511               if (tem)
5512                 return tem;
5513             }
5514           break;
5515         }
5516     }
5517
5518   return 0;
5519 }
5520
5521 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5522    and storing in TARGET.  Normally return TARGET.
5523    Return 0 if that cannot be done.
5524
5525    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5526    it is VOIDmode, they cannot both be CONST_INT.
5527
5528    UNSIGNEDP is for the case where we have to widen the operands
5529    to perform the operation.  It says to use zero-extension.
5530
5531    NORMALIZEP is 1 if we should convert the result to be either zero
5532    or one.  Normalize is -1 if we should convert the result to be
5533    either zero or -1.  If NORMALIZEP is zero, the result will be left
5534    "raw" out of the scc insn.  */
5535
5536 rtx
5537 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5538                  machine_mode mode, int unsignedp, int normalizep)
5539 {
5540   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5541   enum rtx_code rcode;
5542   rtx subtarget;
5543   rtx tem, trueval;
5544   rtx_insn *last;
5545
5546   /* If we compare constants, we shouldn't use a store-flag operation,
5547      but a constant load.  We can get there via the vanilla route that
5548      usually generates a compare-branch sequence, but will in this case
5549      fold the comparison to a constant, and thus elide the branch.  */
5550   if (CONSTANT_P (op0) && CONSTANT_P (op1))
5551     return NULL_RTX;
5552
5553   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5554                            target_mode);
5555   if (tem)
5556     return tem;
5557
5558   /* If we reached here, we can't do this with a scc insn, however there
5559      are some comparisons that can be done in other ways.  Don't do any
5560      of these cases if branches are very cheap.  */
5561   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5562     return 0;
5563
5564   /* See what we need to return.  We can only return a 1, -1, or the
5565      sign bit.  */
5566
5567   if (normalizep == 0)
5568     {
5569       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5570         normalizep = STORE_FLAG_VALUE;
5571
5572       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5573         ;
5574       else
5575         return 0;
5576     }
5577
5578   last = get_last_insn ();
5579
5580   /* If optimizing, use different pseudo registers for each insn, instead
5581      of reusing the same pseudo.  This leads to better CSE, but slows
5582      down the compiler, since there are more pseudos */
5583   subtarget = (!optimize
5584                && (target_mode == mode)) ? target : NULL_RTX;
5585   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5586
5587   /* For floating-point comparisons, try the reverse comparison or try
5588      changing the "orderedness" of the comparison.  */
5589   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5590     {
5591       enum rtx_code first_code;
5592       bool and_them;
5593
5594       rcode = reverse_condition_maybe_unordered (code);
5595       if (can_compare_p (rcode, mode, ccp_store_flag)
5596           && (code == ORDERED || code == UNORDERED
5597               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5598               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5599         {
5600           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5601                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5602
5603           /* For the reverse comparison, use either an addition or a XOR.  */
5604           if (want_add
5605               && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5606                            optimize_insn_for_speed_p ()) == 0)
5607             {
5608               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5609                                        STORE_FLAG_VALUE, target_mode);
5610               if (tem)
5611                 return expand_binop (target_mode, add_optab, tem,
5612                                      gen_int_mode (normalizep, target_mode),
5613                                      target, 0, OPTAB_WIDEN);
5614             }
5615           else if (!want_add
5616                    && rtx_cost (trueval, XOR, 1,
5617                                 optimize_insn_for_speed_p ()) == 0)
5618             {
5619               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5620                                        normalizep, target_mode);
5621               if (tem)
5622                 return expand_binop (target_mode, xor_optab, tem, trueval,
5623                                      target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5624             }
5625         }
5626
5627       delete_insns_since (last);
5628
5629       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5630       if (code == ORDERED || code == UNORDERED)
5631         return 0;
5632
5633       and_them = split_comparison (code, mode, &first_code, &code);
5634
5635       /* If there are no NaNs, the first comparison should always fall through.
5636          Effectively change the comparison to the other one.  */
5637       if (!HONOR_NANS (mode))
5638         {
5639           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5640           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5641                                     target_mode);
5642         }
5643
5644 #ifdef HAVE_conditional_move
5645       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5646          conditional move.  */
5647       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5648                                normalizep, target_mode);
5649       if (tem == 0)
5650         return 0;
5651
5652       if (and_them)
5653         tem = emit_conditional_move (target, code, op0, op1, mode,
5654                                      tem, const0_rtx, GET_MODE (tem), 0);
5655       else
5656         tem = emit_conditional_move (target, code, op0, op1, mode,
5657                                      trueval, tem, GET_MODE (tem), 0);
5658
5659       if (tem == 0)
5660         delete_insns_since (last);
5661       return tem;
5662 #else
5663       return 0;
5664 #endif
5665     }
5666
5667   /* The remaining tricks only apply to integer comparisons.  */
5668
5669   if (GET_MODE_CLASS (mode) != MODE_INT)
5670     return 0;
5671
5672   /* If this is an equality comparison of integers, we can try to exclusive-or
5673      (or subtract) the two operands and use a recursive call to try the
5674      comparison with zero.  Don't do any of these cases if branches are
5675      very cheap.  */
5676
5677   if ((code == EQ || code == NE) && op1 != const0_rtx)
5678     {
5679       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5680                           OPTAB_WIDEN);
5681
5682       if (tem == 0)
5683         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5684                             OPTAB_WIDEN);
5685       if (tem != 0)
5686         tem = emit_store_flag (target, code, tem, const0_rtx,
5687                                mode, unsignedp, normalizep);
5688       if (tem != 0)
5689         return tem;
5690
5691       delete_insns_since (last);
5692     }
5693
5694   /* For integer comparisons, try the reverse comparison.  However, for
5695      small X and if we'd have anyway to extend, implementing "X != 0"
5696      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5697   rcode = reverse_condition (code);
5698   if (can_compare_p (rcode, mode, ccp_store_flag)
5699       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5700             && code == NE
5701             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5702             && op1 == const0_rtx))
5703     {
5704       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5705                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5706
5707       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5708       if (want_add
5709           && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5710                        optimize_insn_for_speed_p ()) == 0)
5711         {
5712           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5713                                    STORE_FLAG_VALUE, target_mode);
5714           if (tem != 0)
5715             tem = expand_binop (target_mode, add_optab, tem,
5716                                 gen_int_mode (normalizep, target_mode),
5717                                 target, 0, OPTAB_WIDEN);
5718         }
5719       else if (!want_add
5720                && rtx_cost (trueval, XOR, 1,
5721                             optimize_insn_for_speed_p ()) == 0)
5722         {
5723           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5724                                    normalizep, target_mode);
5725           if (tem != 0)
5726             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5727                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5728         }
5729
5730       if (tem != 0)
5731         return tem;
5732       delete_insns_since (last);
5733     }
5734
5735   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5736      the constant zero.  Reject all other comparisons at this point.  Only
5737      do LE and GT if branches are expensive since they are expensive on
5738      2-operand machines.  */
5739
5740   if (op1 != const0_rtx
5741       || (code != EQ && code != NE
5742           && (BRANCH_COST (optimize_insn_for_speed_p (),
5743                            false) <= 1 || (code != LE && code != GT))))
5744     return 0;
5745
5746   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5747      do the necessary operation below.  */
5748
5749   tem = 0;
5750
5751   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5752      the sign bit set.  */
5753
5754   if (code == LE)
5755     {
5756       /* This is destructive, so SUBTARGET can't be OP0.  */
5757       if (rtx_equal_p (subtarget, op0))
5758         subtarget = 0;
5759
5760       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5761                           OPTAB_WIDEN);
5762       if (tem)
5763         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5764                             OPTAB_WIDEN);
5765     }
5766
5767   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5768      number of bits in the mode of OP0, minus one.  */
5769
5770   if (code == GT)
5771     {
5772       if (rtx_equal_p (subtarget, op0))
5773         subtarget = 0;
5774
5775       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5776                           GET_MODE_BITSIZE (mode) - 1,
5777                           subtarget, 0);
5778       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5779                           OPTAB_WIDEN);
5780     }
5781
5782   if (code == EQ || code == NE)
5783     {
5784       /* For EQ or NE, one way to do the comparison is to apply an operation
5785          that converts the operand into a positive number if it is nonzero
5786          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5787          for NE we negate.  This puts the result in the sign bit.  Then we
5788          normalize with a shift, if needed.
5789
5790          Two operations that can do the above actions are ABS and FFS, so try
5791          them.  If that doesn't work, and MODE is smaller than a full word,
5792          we can use zero-extension to the wider mode (an unsigned conversion)
5793          as the operation.  */
5794
5795       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5796          that is compensated by the subsequent overflow when subtracting
5797          one / negating.  */
5798
5799       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5800         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5801       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5802         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5803       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5804         {
5805           tem = convert_modes (word_mode, mode, op0, 1);
5806           mode = word_mode;
5807         }
5808
5809       if (tem != 0)
5810         {
5811           if (code == EQ)
5812             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5813                                 0, OPTAB_WIDEN);
5814           else
5815             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5816         }
5817
5818       /* If we couldn't do it that way, for NE we can "or" the two's complement
5819          of the value with itself.  For EQ, we take the one's complement of
5820          that "or", which is an extra insn, so we only handle EQ if branches
5821          are expensive.  */
5822
5823       if (tem == 0
5824           && (code == NE
5825               || BRANCH_COST (optimize_insn_for_speed_p (),
5826                               false) > 1))
5827         {
5828           if (rtx_equal_p (subtarget, op0))
5829             subtarget = 0;
5830
5831           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5832           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5833                               OPTAB_WIDEN);
5834
5835           if (tem && code == EQ)
5836             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5837         }
5838     }
5839
5840   if (tem && normalizep)
5841     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5842                         GET_MODE_BITSIZE (mode) - 1,
5843                         subtarget, normalizep == 1);
5844
5845   if (tem)
5846     {
5847       if (!target)
5848         ;
5849       else if (GET_MODE (tem) != target_mode)
5850         {
5851           convert_move (target, tem, 0);
5852           tem = target;
5853         }
5854       else if (!subtarget)
5855         {
5856           emit_move_insn (target, tem);
5857           tem = target;
5858         }
5859     }
5860   else
5861     delete_insns_since (last);
5862
5863   return tem;
5864 }
5865
5866 /* Like emit_store_flag, but always succeeds.  */
5867
5868 rtx
5869 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5870                        machine_mode mode, int unsignedp, int normalizep)
5871 {
5872   rtx tem;
5873   rtx_code_label *label;
5874   rtx trueval, falseval;
5875
5876   /* First see if emit_store_flag can do the job.  */
5877   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5878   if (tem != 0)
5879     return tem;
5880
5881   if (!target)
5882     target = gen_reg_rtx (word_mode);
5883
5884   /* If this failed, we have to do this with set/compare/jump/set code.
5885      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5886   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5887   if (code == NE
5888       && GET_MODE_CLASS (mode) == MODE_INT
5889       && REG_P (target)
5890       && op0 == target
5891       && op1 == const0_rtx)
5892     {
5893       label = gen_label_rtx ();
5894       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5895                                mode, NULL_RTX, NULL_RTX, label, -1);
5896       emit_move_insn (target, trueval);
5897       emit_label (label);
5898       return target;
5899     }
5900
5901   if (!REG_P (target)
5902       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5903     target = gen_reg_rtx (GET_MODE (target));
5904
5905   /* Jump in the right direction if the target cannot implement CODE
5906      but can jump on its reverse condition.  */
5907   falseval = const0_rtx;
5908   if (! can_compare_p (code, mode, ccp_jump)
5909       && (! FLOAT_MODE_P (mode)
5910           || code == ORDERED || code == UNORDERED
5911           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5912           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5913     {
5914       enum rtx_code rcode;
5915       if (FLOAT_MODE_P (mode))
5916         rcode = reverse_condition_maybe_unordered (code);
5917       else
5918         rcode = reverse_condition (code);
5919
5920       /* Canonicalize to UNORDERED for the libcall.  */
5921       if (can_compare_p (rcode, mode, ccp_jump)
5922           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5923         {
5924           falseval = trueval;
5925           trueval = const0_rtx;
5926           code = rcode;
5927         }
5928     }
5929
5930   emit_move_insn (target, trueval);
5931   label = gen_label_rtx ();
5932   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5933                            NULL_RTX, label, -1);
5934
5935   emit_move_insn (target, falseval);
5936   emit_label (label);
5937
5938   return target;
5939 }
5940 \f
5941 /* Perform possibly multi-word comparison and conditional jump to LABEL
5942    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5943    now a thin wrapper around do_compare_rtx_and_jump.  */
5944
5945 static void
5946 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
5947                  rtx_code_label *label)
5948 {
5949   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5950   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5951                            NULL_RTX, NULL_RTX, label, -1);
5952 }