gcc-4_9-branch/gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2014 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "tm.h"
  26 #include "diagnostic-core.h"
  27 #include "rtl.h"
  28 #include "tree.h"
  29 #include "stor-layout.h"
  30 #include "tm_p.h"
  31 #include "flags.h"
  32 #include "insn-config.h"
  33 #include "expr.h"
  34 #include "optabs.h"
  35 #include "recog.h"
  36 #include "langhooks.h"
  37 #include "df.h"
  38 #include "target.h"
  39 #include "expmed.h"
  40
  41 struct target_expmed default_target_expmed;
  42 #if SWITCHABLE_TARGET
  43 struct target_expmed *this_target_expmed = &default_target_expmed;
  44 #endif
  45
  46 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  47                                    unsigned HOST_WIDE_INT,
  48                                    unsigned HOST_WIDE_INT,
  49                                    unsigned HOST_WIDE_INT,
  50                                    rtx);
  51 static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT,
  52                                      unsigned HOST_WIDE_INT,
  53                                      rtx);
  54 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  55                                    unsigned HOST_WIDE_INT,
  56                                    unsigned HOST_WIDE_INT,
  57                                    unsigned HOST_WIDE_INT,
  58                                    rtx);
  59 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  60                                     unsigned HOST_WIDE_INT,
  61                                     unsigned HOST_WIDE_INT, rtx, int);
  62 static rtx extract_fixed_bit_field_1 (enum machine_mode, rtx,
  63                                       unsigned HOST_WIDE_INT,
  64                                       unsigned HOST_WIDE_INT, rtx, int);
  65 static rtx mask_rtx (enum machine_mode, int, int, int);
  66 static rtx lshift_value (enum machine_mode, unsigned HOST_WIDE_INT, int);
  67 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  68                                     unsigned HOST_WIDE_INT, int);
  69 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  70 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  71 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  72
  73 /* Test whether a value is zero of a power of two.  */
  74 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
  75   (((x) & ((x) - (unsigned HOST_WIDE_INT) 1)) == 0)
  76
  77 struct init_expmed_rtl
  78 {
  79   struct rtx_def reg;
  80   struct rtx_def plus;
  81   struct rtx_def neg;
  82   struct rtx_def mult;
  83   struct rtx_def sdiv;
  84   struct rtx_def udiv;
  85   struct rtx_def sdiv_32;
  86   struct rtx_def smod_32;
  87   struct rtx_def wide_mult;
  88   struct rtx_def wide_lshr;
  89   struct rtx_def wide_trunc;
  90   struct rtx_def shift;
  91   struct rtx_def shift_mult;
  92   struct rtx_def shift_add;
  93   struct rtx_def shift_sub0;
  94   struct rtx_def shift_sub1;
  95   struct rtx_def zext;
  96   struct rtx_def trunc;
  97
  98   rtx pow2[MAX_BITS_PER_WORD];
  99   rtx cint[MAX_BITS_PER_WORD];
 100 };
 101
 102 static void
 103 init_expmed_one_conv (struct init_expmed_rtl *all, enum machine_mode to_mode,
 104                       enum machine_mode from_mode, bool speed)
 105 {
 106   int to_size, from_size;
 107   rtx which;
 108
 109   /* We're given no information about the true size of a partial integer,
 110      only the size of the "full" integer it requires for storage.  For
 111      comparison purposes here, reduce the bit size by one in that case.  */
 112   to_size = (GET_MODE_BITSIZE (to_mode)
 113              - (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT));
 114   from_size = (GET_MODE_BITSIZE (from_mode)
 115                - (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT));
 116
 117   /* Assume cost of zero-extend and sign-extend is the same.  */
 118   which = (to_size < from_size ? &all->trunc : &all->zext);
 119
 120   PUT_MODE (&all->reg, from_mode);
 121   set_convert_cost (to_mode, from_mode, speed, set_src_cost (which, speed));
 122 }
 123
 124 static void
 125 init_expmed_one_mode (struct init_expmed_rtl *all,
 126                       enum machine_mode mode, int speed)
 127 {
 128   int m, n, mode_bitsize;
 129   enum machine_mode mode_from;
 130
 131   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 132
 133   PUT_MODE (&all->reg, mode);
 134   PUT_MODE (&all->plus, mode);
 135   PUT_MODE (&all->neg, mode);
 136   PUT_MODE (&all->mult, mode);
 137   PUT_MODE (&all->sdiv, mode);
 138   PUT_MODE (&all->udiv, mode);
 139   PUT_MODE (&all->sdiv_32, mode);
 140   PUT_MODE (&all->smod_32, mode);
 141   PUT_MODE (&all->wide_trunc, mode);
 142   PUT_MODE (&all->shift, mode);
 143   PUT_MODE (&all->shift_mult, mode);
 144   PUT_MODE (&all->shift_add, mode);
 145   PUT_MODE (&all->shift_sub0, mode);
 146   PUT_MODE (&all->shift_sub1, mode);
 147   PUT_MODE (&all->zext, mode);
 148   PUT_MODE (&all->trunc, mode);
 149
 150   set_add_cost (speed, mode, set_src_cost (&all->plus, speed));
 151   set_neg_cost (speed, mode, set_src_cost (&all->neg, speed));
 152   set_mul_cost (speed, mode, set_src_cost (&all->mult, speed));
 153   set_sdiv_cost (speed, mode, set_src_cost (&all->sdiv, speed));
 154   set_udiv_cost (speed, mode, set_src_cost (&all->udiv, speed));
 155
 156   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (&all->sdiv_32, speed)
 157                                      <= 2 * add_cost (speed, mode)));
 158   set_smod_pow2_cheap (speed, mode, (set_src_cost (&all->smod_32, speed)
 159                                      <= 4 * add_cost (speed, mode)));
 160
 161   set_shift_cost (speed, mode, 0, 0);
 162   {
 163     int cost = add_cost (speed, mode);
 164     set_shiftadd_cost (speed, mode, 0, cost);
 165     set_shiftsub0_cost (speed, mode, 0, cost);
 166     set_shiftsub1_cost (speed, mode, 0, cost);
 167   }
 168
 169   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 170   for (m = 1; m < n; m++)
 171     {
 172       XEXP (&all->shift, 1) = all->cint[m];
 173       XEXP (&all->shift_mult, 1) = all->pow2[m];
 174
 175       set_shift_cost (speed, mode, m, set_src_cost (&all->shift, speed));
 176       set_shiftadd_cost (speed, mode, m, set_src_cost (&all->shift_add, speed));
 177       set_shiftsub0_cost (speed, mode, m, set_src_cost (&all->shift_sub0, speed));
 178       set_shiftsub1_cost (speed, mode, m, set_src_cost (&all->shift_sub1, speed));
 179     }
 180
 181   if (SCALAR_INT_MODE_P (mode))
 182     {
 183       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 184            mode_from = (enum machine_mode)(mode_from + 1))
 185         init_expmed_one_conv (all, mode, mode_from, speed);
 186     }
 187   if (GET_MODE_CLASS (mode) == MODE_INT)
 188     {
 189       enum machine_mode  wider_mode = GET_MODE_WIDER_MODE (mode);
 190       if (wider_mode != VOIDmode)
 191         {
 192           PUT_MODE (&all->zext, wider_mode);
 193           PUT_MODE (&all->wide_mult, wider_mode);
 194           PUT_MODE (&all->wide_lshr, wider_mode);
 195           XEXP (&all->wide_lshr, 1) = GEN_INT (mode_bitsize);
 196
 197           set_mul_widen_cost (speed, wider_mode,
 198                               set_src_cost (&all->wide_mult, speed));
 199           set_mul_highpart_cost (speed, mode,
 200                                  set_src_cost (&all->wide_trunc, speed));
 201         }
 202     }
 203 }
 204
 205 void
 206 init_expmed (void)
 207 {
 208   struct init_expmed_rtl all;
 209   enum machine_mode mode;
 210   int m, speed;
 211
 212   memset (&all, 0, sizeof all);
 213   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 214     {
 215       all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 216       all.cint[m] = GEN_INT (m);
 217     }
 218
 219   PUT_CODE (&all.reg, REG);
 220   /* Avoid using hard regs in ways which may be unsupported.  */
 221   SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
 222
 223   PUT_CODE (&all.plus, PLUS);
 224   XEXP (&all.plus, 0) = &all.reg;
 225   XEXP (&all.plus, 1) = &all.reg;
 226
 227   PUT_CODE (&all.neg, NEG);
 228   XEXP (&all.neg, 0) = &all.reg;
 229
 230   PUT_CODE (&all.mult, MULT);
 231   XEXP (&all.mult, 0) = &all.reg;
 232   XEXP (&all.mult, 1) = &all.reg;
 233
 234   PUT_CODE (&all.sdiv, DIV);
 235   XEXP (&all.sdiv, 0) = &all.reg;
 236   XEXP (&all.sdiv, 1) = &all.reg;
 237
 238   PUT_CODE (&all.udiv, UDIV);
 239   XEXP (&all.udiv, 0) = &all.reg;
 240   XEXP (&all.udiv, 1) = &all.reg;
 241
 242   PUT_CODE (&all.sdiv_32, DIV);
 243   XEXP (&all.sdiv_32, 0) = &all.reg;
 244   XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? all.cint[32] : GEN_INT (32);
 245
 246   PUT_CODE (&all.smod_32, MOD);
 247   XEXP (&all.smod_32, 0) = &all.reg;
 248   XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
 249
 250   PUT_CODE (&all.zext, ZERO_EXTEND);
 251   XEXP (&all.zext, 0) = &all.reg;
 252
 253   PUT_CODE (&all.wide_mult, MULT);
 254   XEXP (&all.wide_mult, 0) = &all.zext;
 255   XEXP (&all.wide_mult, 1) = &all.zext;
 256
 257   PUT_CODE (&all.wide_lshr, LSHIFTRT);
 258   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
 259
 260   PUT_CODE (&all.wide_trunc, TRUNCATE);
 261   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
 262
 263   PUT_CODE (&all.shift, ASHIFT);
 264   XEXP (&all.shift, 0) = &all.reg;
 265
 266   PUT_CODE (&all.shift_mult, MULT);
 267   XEXP (&all.shift_mult, 0) = &all.reg;
 268
 269   PUT_CODE (&all.shift_add, PLUS);
 270   XEXP (&all.shift_add, 0) = &all.shift_mult;
 271   XEXP (&all.shift_add, 1) = &all.reg;
 272
 273   PUT_CODE (&all.shift_sub0, MINUS);
 274   XEXP (&all.shift_sub0, 0) = &all.shift_mult;
 275   XEXP (&all.shift_sub0, 1) = &all.reg;
 276
 277   PUT_CODE (&all.shift_sub1, MINUS);
 278   XEXP (&all.shift_sub1, 0) = &all.reg;
 279   XEXP (&all.shift_sub1, 1) = &all.shift_mult;
 280
 281   PUT_CODE (&all.trunc, TRUNCATE);
 282   XEXP (&all.trunc, 0) = &all.reg;
 283
 284   for (speed = 0; speed < 2; speed++)
 285     {
 286       crtl->maybe_hot_insn_p = speed;
 287       set_zero_cost (speed, set_src_cost (const0_rtx, speed));
 288
 289       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 290            mode = (enum machine_mode)(mode + 1))
 291         init_expmed_one_mode (&all, mode, speed);
 292
 293       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 294         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 295              mode = (enum machine_mode)(mode + 1))
 296           init_expmed_one_mode (&all, mode, speed);
 297
 298       if (MIN_MODE_VECTOR_INT != VOIDmode)
 299         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 300              mode = (enum machine_mode)(mode + 1))
 301           init_expmed_one_mode (&all, mode, speed);
 302     }
 303
 304   if (alg_hash_used_p ())
 305     {
 306       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 307       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 308     }
 309   else
 310     set_alg_hash_used_p (true);
 311   default_rtl_profile ();
 312 }
 313
 314 /* Return an rtx representing minus the value of X.
 315    MODE is the intended mode of the result,
 316    useful if X is a CONST_INT.  */
 317
 318 rtx
 319 negate_rtx (enum machine_mode mode, rtx x)
 320 {
 321   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 322
 323   if (result == 0)
 324     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 325
 326   return result;
 327 }
 328
 329 /* Adjust bitfield memory MEM so that it points to the first unit of mode
 330    MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
 331    If MODE is BLKmode, return a reference to every byte in the bitfield.
 332    Set *NEW_BITNUM to the bit position of the field within the new memory.  */
 333
 334 static rtx
 335 narrow_bit_field_mem (rtx mem, enum machine_mode mode,
 336                       unsigned HOST_WIDE_INT bitsize,
 337                       unsigned HOST_WIDE_INT bitnum,
 338                       unsigned HOST_WIDE_INT *new_bitnum)
 339 {
 340   if (mode == BLKmode)
 341     {
 342       *new_bitnum = bitnum % BITS_PER_UNIT;
 343       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 344       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 345                             / BITS_PER_UNIT);
 346       return adjust_bitfield_address_size (mem, mode, offset, size);
 347     }
 348   else
 349     {
 350       unsigned int unit = GET_MODE_BITSIZE (mode);
 351       *new_bitnum = bitnum % unit;
 352       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 353       return adjust_bitfield_address (mem, mode, offset);
 354     }
 355 }
 356
 357 /* The caller wants to perform insertion or extraction PATTERN on a
 358    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 359    BITREGION_START and BITREGION_END are as for store_bit_field
 360    and FIELDMODE is the natural mode of the field.
 361
 362    Search for a mode that is compatible with the memory access
 363    restrictions and (where applicable) with a register insertion or
 364    extraction.  Return the new memory on success, storing the adjusted
 365    bit position in *NEW_BITNUM.  Return null otherwise.  */
 366
 367 static rtx
 368 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 369                               rtx op0, HOST_WIDE_INT bitsize,
 370                               HOST_WIDE_INT bitnum,
 371                               unsigned HOST_WIDE_INT bitregion_start,
 372                               unsigned HOST_WIDE_INT bitregion_end,
 373                               enum machine_mode fieldmode,
 374                               unsigned HOST_WIDE_INT *new_bitnum)
 375 {
 376   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 377                                 bitregion_end, MEM_ALIGN (op0),
 378                                 MEM_VOLATILE_P (op0));
 379   enum machine_mode best_mode;
 380   if (iter.next_mode (&best_mode))
 381     {
 382       /* We can use a memory in BEST_MODE.  See whether this is true for
 383          any wider modes.  All other things being equal, we prefer to
 384          use the widest mode possible because it tends to expose more
 385          CSE opportunities.  */
 386       if (!iter.prefer_smaller_modes ())
 387         {
 388           /* Limit the search to the mode required by the corresponding
 389              register insertion or extraction instruction, if any.  */
 390           enum machine_mode limit_mode = word_mode;
 391           extraction_insn insn;
 392           if (get_best_reg_extraction_insn (&insn, pattern,
 393                                             GET_MODE_BITSIZE (best_mode),
 394                                             fieldmode))
 395             limit_mode = insn.field_mode;
 396
 397           enum machine_mode wider_mode;
 398           while (iter.next_mode (&wider_mode)
 399                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 400             best_mode = wider_mode;
 401         }
 402       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 403                                    new_bitnum);
 404     }
 405   return NULL_RTX;
 406 }
 407
 408 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 409    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 410    offset is then BITNUM / BITS_PER_UNIT.  */
 411
 412 static bool
 413 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
 414                      unsigned HOST_WIDE_INT bitsize,
 415                      enum machine_mode struct_mode)
 416 {
 417   if (BYTES_BIG_ENDIAN)
 418     return (bitnum % BITS_PER_UNIT == 0
 419             && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
 420                 || (bitnum + bitsize) % BITS_PER_WORD == 0));
 421   else
 422     return bitnum % BITS_PER_WORD == 0;
 423 }
 424
 425 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
 426    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
 427    Return false if the access would touch memory outside the range
 428    BITREGION_START to BITREGION_END for conformance to the C++ memory
 429    model.  */
 430
 431 static bool
 432 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 433                             unsigned HOST_WIDE_INT bitnum,
 434                             enum machine_mode fieldmode,
 435                             unsigned HOST_WIDE_INT bitregion_start,
 436                             unsigned HOST_WIDE_INT bitregion_end)
 437 {
 438   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
 439
 440   /* -fstrict-volatile-bitfields must be enabled and we must have a
 441      volatile MEM.  */
 442   if (!MEM_P (op0)
 443       || !MEM_VOLATILE_P (op0)
 444       || flag_strict_volatile_bitfields <= 0)
 445     return false;
 446
 447   /* Non-integral modes likely only happen with packed structures.
 448      Punt.  */
 449   if (!SCALAR_INT_MODE_P (fieldmode))
 450     return false;
 451
 452   /* The bit size must not be larger than the field mode, and
 453      the field mode must not be larger than a word.  */
 454   if (bitsize > modesize || modesize > BITS_PER_WORD)
 455     return false;
 456
 457   /* Check for cases of unaligned fields that must be split.  */
 458   if (bitnum % BITS_PER_UNIT + bitsize > modesize
 459       || (STRICT_ALIGNMENT
 460           && bitnum % GET_MODE_ALIGNMENT (fieldmode) + bitsize > modesize))
 461     return false;
 462
 463   /* Check for cases where the C++ memory model applies.  */
 464   if (bitregion_end != 0
 465       && (bitnum - bitnum % modesize < bitregion_start
 466           || bitnum - bitnum % modesize + modesize - 1 > bitregion_end))
 467     return false;
 468
 469   return true;
 470 }
 471
 472 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 473    bit number BITNUM can be treated as a simple value of mode MODE.  */
 474
 475 static bool
 476 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 477                        unsigned HOST_WIDE_INT bitnum, enum machine_mode mode)
 478 {
 479   return (MEM_P (op0)
 480           && bitnum % BITS_PER_UNIT == 0
 481           && bitsize == GET_MODE_BITSIZE (mode)
 482           && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
 483               || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
 484                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 485 }
 486 \f
 487 /* Try to use instruction INSV to store VALUE into a field of OP0.
 488    BITSIZE and BITNUM are as for store_bit_field.  */
 489
 490 static bool
 491 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 492                             unsigned HOST_WIDE_INT bitsize,
 493                             unsigned HOST_WIDE_INT bitnum,
 494                             rtx value)
 495 {
 496   struct expand_operand ops[4];
 497   rtx value1;
 498   rtx xop0 = op0;
 499   rtx last = get_last_insn ();
 500   bool copy_back = false;
 501
 502   enum machine_mode op_mode = insv->field_mode;
 503   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 504   if (bitsize == 0 || bitsize > unit)
 505     return false;
 506
 507   if (MEM_P (xop0))
 508     /* Get a reference to the first byte of the field.  */
 509     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 510                                  &bitnum);
 511   else
 512     {
 513       /* Convert from counting within OP0 to counting in OP_MODE.  */
 514       if (BYTES_BIG_ENDIAN)
 515         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 516
 517       /* If xop0 is a register, we need it in OP_MODE
 518          to make it acceptable to the format of insv.  */
 519       if (GET_CODE (xop0) == SUBREG)
 520         /* We can't just change the mode, because this might clobber op0,
 521            and we will need the original value of op0 if insv fails.  */
 522         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 523       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 524         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 525     }
 526
 527   /* If the destination is a paradoxical subreg such that we need a
 528      truncate to the inner mode, perform the insertion on a temporary and
 529      truncate the result to the original destination.  Note that we can't
 530      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 531      X) 0)) is (reg:N X).  */
 532   if (GET_CODE (xop0) == SUBREG
 533       && REG_P (SUBREG_REG (xop0))
 534       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 535                                          op_mode))
 536     {
 537       rtx tem = gen_reg_rtx (op_mode);
 538       emit_move_insn (tem, xop0);
 539       xop0 = tem;
 540       copy_back = true;
 541     }
 542
 543   /* There are similar overflow check at the start of store_bit_field_1,
 544      but that only check the situation where the field lies completely
 545      outside the register, while there do have situation where the field
 546      lies partialy in the register, we need to adjust bitsize for this
 547      partial overflow situation.  Without this fix, pr48335-2.c on big-endian
 548      will broken on those arch support bit insert instruction, like arm, aarch64
 549      etc.  */
 550   if (bitsize + bitnum > unit && bitnum < unit)
 551     {
 552       warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
 553                "destination object, data truncated into %wu-bit",
 554                bitsize, unit - bitnum);
 555       bitsize = unit - bitnum;
 556     }
 557
 558   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 559      "backwards" from the size of the unit we are inserting into.
 560      Otherwise, we count bits from the most significant on a
 561      BYTES/BITS_BIG_ENDIAN machine.  */
 562
 563   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 564     bitnum = unit - bitsize - bitnum;
 565
 566   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 567   value1 = value;
 568   if (GET_MODE (value) != op_mode)
 569     {
 570       if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 571         {
 572           /* Optimization: Don't bother really extending VALUE
 573              if it has all the bits we will actually use.  However,
 574              if we must narrow it, be sure we do it correctly.  */
 575
 576           if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 577             {
 578               rtx tmp;
 579
 580               tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 581               if (! tmp)
 582                 tmp = simplify_gen_subreg (op_mode,
 583                                            force_reg (GET_MODE (value),
 584                                                       value1),
 585                                            GET_MODE (value), 0);
 586               value1 = tmp;
 587             }
 588           else
 589             value1 = gen_lowpart (op_mode, value1);
 590         }
 591       else if (CONST_INT_P (value))
 592         value1 = gen_int_mode (INTVAL (value), op_mode);
 593       else
 594         /* Parse phase is supposed to make VALUE's data type
 595            match that of the component reference, which is a type
 596            at least as wide as the field; so VALUE should have
 597            a mode that corresponds to that type.  */
 598         gcc_assert (CONSTANT_P (value));
 599     }
 600
 601   create_fixed_operand (&ops[0], xop0);
 602   create_integer_operand (&ops[1], bitsize);
 603   create_integer_operand (&ops[2], bitnum);
 604   create_input_operand (&ops[3], value1, op_mode);
 605   if (maybe_expand_insn (insv->icode, 4, ops))
 606     {
 607       if (copy_back)
 608         convert_move (op0, xop0, true);
 609       return true;
 610     }
 611   delete_insns_since (last);
 612   return false;
 613 }
 614
 615 /* A subroutine of store_bit_field, with the same arguments.  Return true
 616    if the operation could be implemented.
 617
 618    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 619    no other way of implementing the operation.  If FALLBACK_P is false,
 620    return false instead.  */
 621
 622 static bool
 623 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 624                    unsigned HOST_WIDE_INT bitnum,
 625                    unsigned HOST_WIDE_INT bitregion_start,
 626                    unsigned HOST_WIDE_INT bitregion_end,
 627                    enum machine_mode fieldmode,
 628                    rtx value, bool fallback_p)
 629 {
 630   rtx op0 = str_rtx;
 631   rtx orig_value;
 632
 633   while (GET_CODE (op0) == SUBREG)
 634     {
 635       /* The following line once was done only if WORDS_BIG_ENDIAN,
 636          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 637          meaningful at a much higher level; when structures are copied
 638          between memory and regs, the higher-numbered regs
 639          always get higher addresses.  */
 640       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 641       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 642       int byte_offset = 0;
 643
 644       /* Paradoxical subregs need special handling on big endian machines.  */
 645       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 646         {
 647           int difference = inner_mode_size - outer_mode_size;
 648
 649           if (WORDS_BIG_ENDIAN)
 650             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 651           if (BYTES_BIG_ENDIAN)
 652             byte_offset += difference % UNITS_PER_WORD;
 653         }
 654       else
 655         byte_offset = SUBREG_BYTE (op0);
 656
 657       bitnum += byte_offset * BITS_PER_UNIT;
 658       op0 = SUBREG_REG (op0);
 659     }
 660
 661   /* No action is needed if the target is a register and if the field
 662      lies completely outside that register.  This can occur if the source
 663      code contains an out-of-bounds access to a small array.  */
 664   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 665     return true;
 666
 667   /* Use vec_set patterns for inserting parts of vectors whenever
 668      available.  */
 669   if (VECTOR_MODE_P (GET_MODE (op0))
 670       && !MEM_P (op0)
 671       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
 672       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 673       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 674       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 675     {
 676       struct expand_operand ops[3];
 677       enum machine_mode outermode = GET_MODE (op0);
 678       enum machine_mode innermode = GET_MODE_INNER (outermode);
 679       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 680       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 681
 682       create_fixed_operand (&ops[0], op0);
 683       create_input_operand (&ops[1], value, innermode);
 684       create_integer_operand (&ops[2], pos);
 685       if (maybe_expand_insn (icode, 3, ops))
 686         return true;
 687     }
 688
 689   /* If the target is a register, overwriting the entire object, or storing
 690      a full-word or multi-word field can be done with just a SUBREG.  */
 691   if (!MEM_P (op0)
 692       && bitsize == GET_MODE_BITSIZE (fieldmode)
 693       && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
 694           || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
 695     {
 696       /* Use the subreg machinery either to narrow OP0 to the required
 697          words or to cope with mode punning between equal-sized modes.
 698          In the latter case, use subreg on the rhs side, not lhs.  */
 699       rtx sub;
 700
 701       if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
 702         {
 703           sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
 704           if (sub)
 705             {
 706               emit_move_insn (op0, sub);
 707               return true;
 708             }
 709         }
 710       else
 711         {
 712           sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 713                                      bitnum / BITS_PER_UNIT);
 714           if (sub)
 715             {
 716               emit_move_insn (sub, value);
 717               return true;
 718             }
 719         }
 720     }
 721
 722   /* If the target is memory, storing any naturally aligned field can be
 723      done with a simple store.  For targets that support fast unaligned
 724      memory, any naturally sized, unit aligned field can be done directly.  */
 725   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
 726     {
 727       op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
 728       emit_move_insn (op0, value);
 729       return true;
 730     }
 731
 732   /* Make sure we are playing with integral modes.  Pun with subregs
 733      if we aren't.  This must come after the entire register case above,
 734      since that case is valid for any mode.  The following cases are only
 735      valid for integral modes.  */
 736   {
 737     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 738     if (imode != GET_MODE (op0))
 739       {
 740         if (MEM_P (op0))
 741           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
 742         else
 743           {
 744             gcc_assert (imode != BLKmode);
 745             op0 = gen_lowpart (imode, op0);
 746           }
 747       }
 748   }
 749
 750   /* Storing an lsb-aligned field in a register
 751      can be done with a movstrict instruction.  */
 752
 753   if (!MEM_P (op0)
 754       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
 755       && bitsize == GET_MODE_BITSIZE (fieldmode)
 756       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 757     {
 758       struct expand_operand ops[2];
 759       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 760       rtx arg0 = op0;
 761       unsigned HOST_WIDE_INT subreg_off;
 762
 763       if (GET_CODE (arg0) == SUBREG)
 764         {
 765           /* Else we've got some float mode source being extracted into
 766              a different float mode destination -- this combination of
 767              subregs results in Severe Tire Damage.  */
 768           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 769                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 770                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 771           arg0 = SUBREG_REG (arg0);
 772         }
 773
 774       subreg_off = bitnum / BITS_PER_UNIT;
 775       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 776         {
 777           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 778
 779           create_fixed_operand (&ops[0], arg0);
 780           /* Shrink the source operand to FIELDMODE.  */
 781           create_convert_operand_to (&ops[1], value, fieldmode, false);
 782           if (maybe_expand_insn (icode, 2, ops))
 783             return true;
 784         }
 785     }
 786
 787   /* Handle fields bigger than a word.  */
 788
 789   if (bitsize > BITS_PER_WORD)
 790     {
 791       /* Here we transfer the words of the field
 792          in the order least significant first.
 793          This is because the most significant word is the one which may
 794          be less than full.
 795          However, only do that if the value is not BLKmode.  */
 796
 797       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 798       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 799       unsigned int i;
 800       rtx last;
 801
 802       /* This is the mode we must force value to, so that there will be enough
 803          subwords to extract.  Note that fieldmode will often (always?) be
 804          VOIDmode, because that is what store_field uses to indicate that this
 805          is a bit field, but passing VOIDmode to operand_subword_force
 806          is not allowed.  */
 807       fieldmode = GET_MODE (value);
 808       if (fieldmode == VOIDmode)
 809         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 810
 811       last = get_last_insn ();
 812       for (i = 0; i < nwords; i++)
 813         {
 814           /* If I is 0, use the low-order word in both field and target;
 815              if I is 1, use the next to lowest word; and so on.  */
 816           unsigned int wordnum = (backwards
 817                                   ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
 818                                   - i - 1
 819                                   : i);
 820           unsigned int bit_offset = (backwards
 821                                      ? MAX ((int) bitsize - ((int) i + 1)
 822                                             * BITS_PER_WORD,
 823                                             0)
 824                                      : (int) i * BITS_PER_WORD);
 825           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 826           unsigned HOST_WIDE_INT new_bitsize =
 827             MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 828
 829           /* If the remaining chunk doesn't have full wordsize we have
 830              to make sure that for big endian machines the higher order
 831              bits are used.  */
 832           if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
 833             value_word = simplify_expand_binop (word_mode, lshr_optab,
 834                                                 value_word,
 835                                                 GEN_INT (BITS_PER_WORD
 836                                                          - new_bitsize),
 837                                                 NULL_RTX, true,
 838                                                 OPTAB_LIB_WIDEN);
 839
 840           if (!store_bit_field_1 (op0, new_bitsize,
 841                                   bitnum + bit_offset,
 842                                   bitregion_start, bitregion_end,
 843                                   word_mode,
 844                                   value_word, fallback_p))
 845             {
 846               delete_insns_since (last);
 847               return false;
 848             }
 849         }
 850       return true;
 851     }
 852
 853   /* If VALUE has a floating-point or complex mode, access it as an
 854      integer of the corresponding size.  This can occur on a machine
 855      with 64 bit registers that uses SFmode for float.  It can also
 856      occur for unaligned float or complex fields.  */
 857   orig_value = value;
 858   if (GET_MODE (value) != VOIDmode
 859       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 860       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 861     {
 862       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 863       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 864     }
 865
 866   /* If OP0 is a multi-word register, narrow it to the affected word.
 867      If the region spans two words, defer to store_split_bit_field.  */
 868   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 869     {
 870       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
 871                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
 872       gcc_assert (op0);
 873       bitnum %= BITS_PER_WORD;
 874       if (bitnum + bitsize > BITS_PER_WORD)
 875         {
 876           if (!fallback_p)
 877             return false;
 878
 879           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
 880                                  bitregion_end, value);
 881           return true;
 882         }
 883     }
 884
 885   /* From here on we can assume that the field to be stored in fits
 886      within a word.  If the destination is a register, it too fits
 887      in a word.  */
 888
 889   extraction_insn insv;
 890   if (!MEM_P (op0)
 891       && get_best_reg_extraction_insn (&insv, EP_insv,
 892                                        GET_MODE_BITSIZE (GET_MODE (op0)),
 893                                        fieldmode)
 894       && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 895     return true;
 896
 897   /* If OP0 is a memory, try copying it to a register and seeing if a
 898      cheap register alternative is available.  */
 899   if (MEM_P (op0))
 900     {
 901       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
 902                                         fieldmode)
 903           && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 904         return true;
 905
 906       rtx last = get_last_insn ();
 907
 908       /* Try loading part of OP0 into a register, inserting the bitfield
 909          into that, and then copying the result back to OP0.  */
 910       unsigned HOST_WIDE_INT bitpos;
 911       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
 912                                                bitregion_start, bitregion_end,
 913                                                fieldmode, &bitpos);
 914       if (xop0)
 915         {
 916           rtx tempreg = copy_to_reg (xop0);
 917           if (store_bit_field_1 (tempreg, bitsize, bitpos,
 918                                  bitregion_start, bitregion_end,
 919                                  fieldmode, orig_value, false))
 920             {
 921               emit_move_insn (xop0, tempreg);
 922               return true;
 923             }
 924           delete_insns_since (last);
 925         }
 926     }
 927
 928   if (!fallback_p)
 929     return false;
 930
 931   store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
 932                          bitregion_end, value);
 933   return true;
 934 }
 935
 936 /* Generate code to store value from rtx VALUE
 937    into a bit-field within structure STR_RTX
 938    containing BITSIZE bits starting at bit BITNUM.
 939
 940    BITREGION_START is bitpos of the first bitfield in this region.
 941    BITREGION_END is the bitpos of the ending bitfield in this region.
 942    These two fields are 0, if the C++ memory model does not apply,
 943    or we are not interested in keeping track of bitfield regions.
 944
 945    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.  */
 946
 947 void
 948 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 949                  unsigned HOST_WIDE_INT bitnum,
 950                  unsigned HOST_WIDE_INT bitregion_start,
 951                  unsigned HOST_WIDE_INT bitregion_end,
 952                  enum machine_mode fieldmode,
 953                  rtx value)
 954 {
 955   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
 956   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode,
 957                                   bitregion_start, bitregion_end))
 958     {
 959       /* Storing any naturally aligned field can be done with a simple
 960          store.  For targets that support fast unaligned memory, any
 961          naturally sized, unit aligned field can be done directly.  */
 962       if (simple_mem_bitfield_p (str_rtx, bitsize, bitnum, fieldmode))
 963         {
 964           str_rtx = adjust_bitfield_address (str_rtx, fieldmode,
 965                                              bitnum / BITS_PER_UNIT);
 966           emit_move_insn (str_rtx, value);
 967         }
 968       else
 969         {
 970           str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum,
 971                                           &bitnum);
 972           /* Explicitly override the C/C++ memory model; ignore the
 973              bit range so that we can do the access in the mode mandated
 974              by -fstrict-volatile-bitfields instead.  */
 975           store_fixed_bit_field_1 (str_rtx, bitsize, bitnum, value);
 976         }
 977
 978       return;
 979     }
 980
 981   /* Under the C++0x memory model, we must not touch bits outside the
 982      bit region.  Adjust the address to start at the beginning of the
 983      bit region.  */
 984   if (MEM_P (str_rtx) && bitregion_start > 0)
 985     {
 986       enum machine_mode bestmode;
 987       HOST_WIDE_INT offset, size;
 988
 989       gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
 990
 991       offset = bitregion_start / BITS_PER_UNIT;
 992       bitnum -= bitregion_start;
 993       size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
 994       bitregion_end -= bitregion_start;
 995       bitregion_start = 0;
 996       bestmode = get_best_mode (bitsize, bitnum,
 997                                 bitregion_start, bitregion_end,
 998                                 MEM_ALIGN (str_rtx), VOIDmode,
 999                                 MEM_VOLATILE_P (str_rtx));
1000       str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
1001     }
1002
1003   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1004                           bitregion_start, bitregion_end,
1005                           fieldmode, value, true))
1006     gcc_unreachable ();
1007 }
1008 \f
1009 /* Use shifts and boolean operations to store VALUE into a bit field of
1010    width BITSIZE in OP0, starting at bit BITNUM.  */
1011
1012 static void
1013 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1014                        unsigned HOST_WIDE_INT bitnum,
1015                        unsigned HOST_WIDE_INT bitregion_start,
1016                        unsigned HOST_WIDE_INT bitregion_end,
1017                        rtx value)
1018 {
1019   /* There is a case not handled here:
1020      a structure with a known alignment of just a halfword
1021      and a field split across two aligned halfwords within the structure.
1022      Or likewise a structure with a known alignment of just a byte
1023      and a field split across two bytes.
1024      Such cases are not supposed to be able to occur.  */
1025
1026   if (MEM_P (op0))
1027     {
1028       enum machine_mode mode = GET_MODE (op0);
1029       if (GET_MODE_BITSIZE (mode) == 0
1030           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
1031         mode = word_mode;
1032       mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1033                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
1034
1035       if (mode == VOIDmode)
1036         {
1037           /* The only way this should occur is if the field spans word
1038              boundaries.  */
1039           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
1040                                  bitregion_end, value);
1041           return;
1042         }
1043
1044       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1045     }
1046
1047   store_fixed_bit_field_1 (op0, bitsize, bitnum, value);
1048 }
1049
1050 /* Helper function for store_fixed_bit_field, stores
1051    the bit field always using the MODE of OP0.  */
1052
1053 static void
1054 store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize,
1055                          unsigned HOST_WIDE_INT bitnum,
1056                          rtx value)
1057 {
1058   enum machine_mode mode;
1059   rtx temp;
1060   int all_zero = 0;
1061   int all_one = 0;
1062
1063   mode = GET_MODE (op0);
1064   gcc_assert (SCALAR_INT_MODE_P (mode));
1065
1066   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1067      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1068
1069   if (BYTES_BIG_ENDIAN)
1070     /* BITNUM is the distance between our msb
1071        and that of the containing datum.
1072        Convert it to the distance from the lsb.  */
1073     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1074
1075   /* Now BITNUM is always the distance between our lsb
1076      and that of OP0.  */
1077
1078   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1079      we must first convert its mode to MODE.  */
1080
1081   if (CONST_INT_P (value))
1082     {
1083       HOST_WIDE_INT v = INTVAL (value);
1084
1085       if (bitsize < HOST_BITS_PER_WIDE_INT)
1086         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
1087
1088       if (v == 0)
1089         all_zero = 1;
1090       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1091                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
1092                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
1093         all_one = 1;
1094
1095       value = lshift_value (mode, v, bitnum);
1096     }
1097   else
1098     {
1099       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1100                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1101
1102       if (GET_MODE (value) != mode)
1103         value = convert_to_mode (mode, value, 1);
1104
1105       if (must_and)
1106         value = expand_binop (mode, and_optab, value,
1107                               mask_rtx (mode, 0, bitsize, 0),
1108                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1109       if (bitnum > 0)
1110         value = expand_shift (LSHIFT_EXPR, mode, value,
1111                               bitnum, NULL_RTX, 1);
1112     }
1113
1114   /* Now clear the chosen bits in OP0,
1115      except that if VALUE is -1 we need not bother.  */
1116   /* We keep the intermediates in registers to allow CSE to combine
1117      consecutive bitfield assignments.  */
1118
1119   temp = force_reg (mode, op0);
1120
1121   if (! all_one)
1122     {
1123       temp = expand_binop (mode, and_optab, temp,
1124                            mask_rtx (mode, bitnum, bitsize, 1),
1125                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1126       temp = force_reg (mode, temp);
1127     }
1128
1129   /* Now logical-or VALUE into OP0, unless it is zero.  */
1130
1131   if (! all_zero)
1132     {
1133       temp = expand_binop (mode, ior_optab, temp, value,
1134                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1135       temp = force_reg (mode, temp);
1136     }
1137
1138   if (op0 != temp)
1139     {
1140       op0 = copy_rtx (op0);
1141       emit_move_insn (op0, temp);
1142     }
1143 }
1144 \f
1145 /* Store a bit field that is split across multiple accessible memory objects.
1146
1147    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1148    BITSIZE is the field width; BITPOS the position of its first bit
1149    (within the word).
1150    VALUE is the value to store.
1151
1152    This does not yet handle fields wider than BITS_PER_WORD.  */
1153
1154 static void
1155 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1156                        unsigned HOST_WIDE_INT bitpos,
1157                        unsigned HOST_WIDE_INT bitregion_start,
1158                        unsigned HOST_WIDE_INT bitregion_end,
1159                        rtx value)
1160 {
1161   unsigned int unit;
1162   unsigned int bitsdone = 0;
1163
1164   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1165      much at a time.  */
1166   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1167     unit = BITS_PER_WORD;
1168   else
1169     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1170
1171   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1172      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1173      again, and we will mutually recurse forever.  */
1174   if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1175     unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0)));
1176
1177   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1178      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1179      that VALUE might be a floating-point constant.  */
1180   if (CONSTANT_P (value) && !CONST_INT_P (value))
1181     {
1182       rtx word = gen_lowpart_common (word_mode, value);
1183
1184       if (word && (value != word))
1185         value = word;
1186       else
1187         value = gen_lowpart_common (word_mode,
1188                                     force_reg (GET_MODE (value) != VOIDmode
1189                                                ? GET_MODE (value)
1190                                                : word_mode, value));
1191     }
1192
1193   while (bitsdone < bitsize)
1194     {
1195       unsigned HOST_WIDE_INT thissize;
1196       rtx part, word;
1197       unsigned HOST_WIDE_INT thispos;
1198       unsigned HOST_WIDE_INT offset;
1199
1200       offset = (bitpos + bitsdone) / unit;
1201       thispos = (bitpos + bitsdone) % unit;
1202
1203       /* When region of bytes we can touch is restricted, decrease
1204          UNIT close to the end of the region as needed.  If op0 is a REG
1205          or SUBREG of REG, don't do this, as there can't be data races
1206          on a register and we can expand shorter code in some cases.  */
1207       if (bitregion_end
1208           && unit > BITS_PER_UNIT
1209           && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1210           && !REG_P (op0)
1211           && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1212         {
1213           unit = unit / 2;
1214           continue;
1215         }
1216
1217       /* THISSIZE must not overrun a word boundary.  Otherwise,
1218          store_fixed_bit_field will call us again, and we will mutually
1219          recurse forever.  */
1220       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1221       thissize = MIN (thissize, unit - thispos);
1222
1223       if (BYTES_BIG_ENDIAN)
1224         {
1225           /* Fetch successively less significant portions.  */
1226           if (CONST_INT_P (value))
1227             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1228                              >> (bitsize - bitsdone - thissize))
1229                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1230           else
1231             {
1232               int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1233               /* The args are chosen so that the last part includes the
1234                  lsb.  Give extract_bit_field the value it needs (with
1235                  endianness compensation) to fetch the piece we want.  */
1236               part = extract_fixed_bit_field (word_mode, value, thissize,
1237                                               total_bits - bitsize + bitsdone,
1238                                               NULL_RTX, 1);
1239             }
1240         }
1241       else
1242         {
1243           /* Fetch successively more significant portions.  */
1244           if (CONST_INT_P (value))
1245             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1246                              >> bitsdone)
1247                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1248           else
1249             part = extract_fixed_bit_field (word_mode, value, thissize,
1250                                             bitsdone, NULL_RTX, 1);
1251         }
1252
1253       /* If OP0 is a register, then handle OFFSET here.
1254
1255          When handling multiword bitfields, extract_bit_field may pass
1256          down a word_mode SUBREG of a larger REG for a bitfield that actually
1257          crosses a word boundary.  Thus, for a SUBREG, we must find
1258          the current word starting from the base register.  */
1259       if (GET_CODE (op0) == SUBREG)
1260         {
1261           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD)
1262                             + (offset * unit / BITS_PER_WORD);
1263           enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1264           if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1265             word = word_offset ? const0_rtx : op0;
1266           else
1267             word = operand_subword_force (SUBREG_REG (op0), word_offset,
1268                                           GET_MODE (SUBREG_REG (op0)));
1269           offset &= BITS_PER_WORD / unit - 1;
1270         }
1271       else if (REG_P (op0))
1272         {
1273           enum machine_mode op0_mode = GET_MODE (op0);
1274           if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1275             word = offset ? const0_rtx : op0;
1276           else
1277             word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1278                                           GET_MODE (op0));
1279           offset &= BITS_PER_WORD / unit - 1;
1280         }
1281       else
1282         word = op0;
1283
1284       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1285          it is just an out-of-bounds access.  Ignore it.  */
1286       if (word != const0_rtx)
1287         store_fixed_bit_field (word, thissize, offset * unit + thispos,
1288                                bitregion_start, bitregion_end, part);
1289       bitsdone += thissize;
1290     }
1291 }
1292 \f
1293 /* A subroutine of extract_bit_field_1 that converts return value X
1294    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1295    to extract_bit_field.  */
1296
1297 static rtx
1298 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1299                              enum machine_mode tmode, bool unsignedp)
1300 {
1301   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1302     return x;
1303
1304   /* If the x mode is not a scalar integral, first convert to the
1305      integer mode of that size and then access it as a floating-point
1306      value via a SUBREG.  */
1307   if (!SCALAR_INT_MODE_P (tmode))
1308     {
1309       enum machine_mode smode;
1310
1311       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1312       x = convert_to_mode (smode, x, unsignedp);
1313       x = force_reg (smode, x);
1314       return gen_lowpart (tmode, x);
1315     }
1316
1317   return convert_to_mode (tmode, x, unsignedp);
1318 }
1319
1320 /* Try to use an ext(z)v pattern to extract a field from OP0.
1321    Return the extracted value on success, otherwise return null.
1322    EXT_MODE is the mode of the extraction and the other arguments
1323    are as for extract_bit_field.  */
1324
1325 static rtx
1326 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1327                               unsigned HOST_WIDE_INT bitsize,
1328                               unsigned HOST_WIDE_INT bitnum,
1329                               int unsignedp, rtx target,
1330                               enum machine_mode mode, enum machine_mode tmode)
1331 {
1332   struct expand_operand ops[4];
1333   rtx spec_target = target;
1334   rtx spec_target_subreg = 0;
1335   enum machine_mode ext_mode = extv->field_mode;
1336   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1337
1338   if (bitsize == 0 || unit < bitsize)
1339     return NULL_RTX;
1340
1341   if (MEM_P (op0))
1342     /* Get a reference to the first byte of the field.  */
1343     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1344                                 &bitnum);
1345   else
1346     {
1347       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1348       if (BYTES_BIG_ENDIAN)
1349         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1350
1351       /* If op0 is a register, we need it in EXT_MODE to make it
1352          acceptable to the format of ext(z)v.  */
1353       if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1354         return NULL_RTX;
1355       if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1356         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1357     }
1358
1359   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1360      "backwards" from the size of the unit we are extracting from.
1361      Otherwise, we count bits from the most significant on a
1362      BYTES/BITS_BIG_ENDIAN machine.  */
1363
1364   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1365     bitnum = unit - bitsize - bitnum;
1366
1367   if (target == 0)
1368     target = spec_target = gen_reg_rtx (tmode);
1369
1370   if (GET_MODE (target) != ext_mode)
1371     {
1372       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1373          between the mode of the extraction (word_mode) and the target
1374          mode.  Instead, create a temporary and use convert_move to set
1375          the target.  */
1376       if (REG_P (target)
1377           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1378         {
1379           target = gen_lowpart (ext_mode, target);
1380           if (GET_MODE_PRECISION (ext_mode)
1381               > GET_MODE_PRECISION (GET_MODE (spec_target)))
1382             spec_target_subreg = target;
1383         }
1384       else
1385         target = gen_reg_rtx (ext_mode);
1386     }
1387
1388   create_output_operand (&ops[0], target, ext_mode);
1389   create_fixed_operand (&ops[1], op0);
1390   create_integer_operand (&ops[2], bitsize);
1391   create_integer_operand (&ops[3], bitnum);
1392   if (maybe_expand_insn (extv->icode, 4, ops))
1393     {
1394       target = ops[0].value;
1395       if (target == spec_target)
1396         return target;
1397       if (target == spec_target_subreg)
1398         return spec_target;
1399       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1400     }
1401   return NULL_RTX;
1402 }
1403
1404 /* A subroutine of extract_bit_field, with the same arguments.
1405    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1406    if we can find no other means of implementing the operation.
1407    if FALLBACK_P is false, return NULL instead.  */
1408
1409 static rtx
1410 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1411                      unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1412                      enum machine_mode mode, enum machine_mode tmode,
1413                      bool fallback_p)
1414 {
1415   rtx op0 = str_rtx;
1416   enum machine_mode int_mode;
1417   enum machine_mode mode1;
1418
1419   if (tmode == VOIDmode)
1420     tmode = mode;
1421
1422   while (GET_CODE (op0) == SUBREG)
1423     {
1424       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1425       op0 = SUBREG_REG (op0);
1426     }
1427
1428   /* If we have an out-of-bounds access to a register, just return an
1429      uninitialized register of the required mode.  This can occur if the
1430      source code contains an out-of-bounds access to a small array.  */
1431   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1432     return gen_reg_rtx (tmode);
1433
1434   if (REG_P (op0)
1435       && mode == GET_MODE (op0)
1436       && bitnum == 0
1437       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1438     {
1439       /* We're trying to extract a full register from itself.  */
1440       return op0;
1441     }
1442
1443   /* See if we can get a better vector mode before extracting.  */
1444   if (VECTOR_MODE_P (GET_MODE (op0))
1445       && !MEM_P (op0)
1446       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1447     {
1448       enum machine_mode new_mode;
1449
1450       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1451         new_mode = MIN_MODE_VECTOR_FLOAT;
1452       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1453         new_mode = MIN_MODE_VECTOR_FRACT;
1454       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1455         new_mode = MIN_MODE_VECTOR_UFRACT;
1456       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1457         new_mode = MIN_MODE_VECTOR_ACCUM;
1458       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1459         new_mode = MIN_MODE_VECTOR_UACCUM;
1460       else
1461         new_mode = MIN_MODE_VECTOR_INT;
1462
1463       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1464         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1465             && targetm.vector_mode_supported_p (new_mode))
1466           break;
1467       if (new_mode != VOIDmode)
1468         op0 = gen_lowpart (new_mode, op0);
1469     }
1470
1471   /* Use vec_extract patterns for extracting parts of vectors whenever
1472      available.  */
1473   if (VECTOR_MODE_P (GET_MODE (op0))
1474       && !MEM_P (op0)
1475       && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1476       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1477           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1478     {
1479       struct expand_operand ops[3];
1480       enum machine_mode outermode = GET_MODE (op0);
1481       enum machine_mode innermode = GET_MODE_INNER (outermode);
1482       enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1483       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1484
1485       create_output_operand (&ops[0], target, innermode);
1486       create_input_operand (&ops[1], op0, outermode);
1487       create_integer_operand (&ops[2], pos);
1488       if (maybe_expand_insn (icode, 3, ops))
1489         {
1490           target = ops[0].value;
1491           if (GET_MODE (target) != mode)
1492             return gen_lowpart (tmode, target);
1493           return target;
1494         }
1495     }
1496
1497   /* Make sure we are playing with integral modes.  Pun with subregs
1498      if we aren't.  */
1499   {
1500     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1501     if (imode != GET_MODE (op0))
1502       {
1503         if (MEM_P (op0))
1504           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1505         else if (imode != BLKmode)
1506           {
1507             op0 = gen_lowpart (imode, op0);
1508
1509             /* If we got a SUBREG, force it into a register since we
1510                aren't going to be able to do another SUBREG on it.  */
1511             if (GET_CODE (op0) == SUBREG)
1512               op0 = force_reg (imode, op0);
1513           }
1514         else if (REG_P (op0))
1515           {
1516             rtx reg, subreg;
1517             imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1518                                             MODE_INT);
1519             reg = gen_reg_rtx (imode);
1520             subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1521             emit_move_insn (subreg, op0);
1522             op0 = reg;
1523             bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1524           }
1525         else
1526           {
1527             HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1528             rtx mem = assign_stack_temp (GET_MODE (op0), size);
1529             emit_move_insn (mem, op0);
1530             op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1531           }
1532       }
1533   }
1534
1535   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1536      If that's wrong, the solution is to test for it and set TARGET to 0
1537      if needed.  */
1538
1539   /* Get the mode of the field to use for atomic access or subreg
1540      conversion.  */
1541   mode1 = mode;
1542   if (SCALAR_INT_MODE_P (tmode))
1543     {
1544       enum machine_mode try_mode = mode_for_size (bitsize,
1545                                                   GET_MODE_CLASS (tmode), 0);
1546       if (try_mode != BLKmode)
1547         mode1 = try_mode;
1548     }
1549   gcc_assert (mode1 != BLKmode);
1550
1551   /* Extraction of a full MODE1 value can be done with a subreg as long
1552      as the least significant bit of the value is the least significant
1553      bit of either OP0 or a word of OP0.  */
1554   if (!MEM_P (op0)
1555       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1556       && bitsize == GET_MODE_BITSIZE (mode1)
1557       && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1558     {
1559       rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1560                                      bitnum / BITS_PER_UNIT);
1561       if (sub)
1562         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1563     }
1564
1565   /* Extraction of a full MODE1 value can be done with a load as long as
1566      the field is on a byte boundary and is sufficiently aligned.  */
1567   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1568     {
1569       op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1570       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1571     }
1572
1573   /* Handle fields bigger than a word.  */
1574
1575   if (bitsize > BITS_PER_WORD)
1576     {
1577       /* Here we transfer the words of the field
1578          in the order least significant first.
1579          This is because the most significant word is the one which may
1580          be less than full.  */
1581
1582       unsigned int backwards = WORDS_BIG_ENDIAN;
1583       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1584       unsigned int i;
1585       rtx last;
1586
1587       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1588         target = gen_reg_rtx (mode);
1589
1590       /* Indicate for flow that the entire target reg is being set.  */
1591       emit_clobber (target);
1592
1593       last = get_last_insn ();
1594       for (i = 0; i < nwords; i++)
1595         {
1596           /* If I is 0, use the low-order word in both field and target;
1597              if I is 1, use the next to lowest word; and so on.  */
1598           /* Word number in TARGET to use.  */
1599           unsigned int wordnum
1600             = (backwards
1601                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1602                : i);
1603           /* Offset from start of field in OP0.  */
1604           unsigned int bit_offset = (backwards
1605                                      ? MAX ((int) bitsize - ((int) i + 1)
1606                                             * BITS_PER_WORD,
1607                                             0)
1608                                      : (int) i * BITS_PER_WORD);
1609           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1610           rtx result_part
1611             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1612                                              bitsize - i * BITS_PER_WORD),
1613                                    bitnum + bit_offset, 1, target_part,
1614                                    mode, word_mode, fallback_p);
1615
1616           gcc_assert (target_part);
1617           if (!result_part)
1618             {
1619               delete_insns_since (last);
1620               return NULL;
1621             }
1622
1623           if (result_part != target_part)
1624             emit_move_insn (target_part, result_part);
1625         }
1626
1627       if (unsignedp)
1628         {
1629           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1630              need to be zero'd out.  */
1631           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1632             {
1633               unsigned int i, total_words;
1634
1635               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1636               for (i = nwords; i < total_words; i++)
1637                 emit_move_insn
1638                   (operand_subword (target,
1639                                     backwards ? total_words - i - 1 : i,
1640                                     1, VOIDmode),
1641                    const0_rtx);
1642             }
1643           return target;
1644         }
1645
1646       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1647       target = expand_shift (LSHIFT_EXPR, mode, target,
1648                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1649       return expand_shift (RSHIFT_EXPR, mode, target,
1650                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1651     }
1652
1653   /* If OP0 is a multi-word register, narrow it to the affected word.
1654      If the region spans two words, defer to extract_split_bit_field.  */
1655   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1656     {
1657       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1658                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1659       bitnum %= BITS_PER_WORD;
1660       if (bitnum + bitsize > BITS_PER_WORD)
1661         {
1662           if (!fallback_p)
1663             return NULL_RTX;
1664           target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1665           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1666         }
1667     }
1668
1669   /* From here on we know the desired field is smaller than a word.
1670      If OP0 is a register, it too fits within a word.  */
1671   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1672   extraction_insn extv;
1673   if (!MEM_P (op0)
1674       /* ??? We could limit the structure size to the part of OP0 that
1675          contains the field, with appropriate checks for endianness
1676          and TRULY_NOOP_TRUNCATION.  */
1677       && get_best_reg_extraction_insn (&extv, pattern,
1678                                        GET_MODE_BITSIZE (GET_MODE (op0)),
1679                                        tmode))
1680     {
1681       rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1682                                                  unsignedp, target, mode,
1683                                                  tmode);
1684       if (result)
1685         return result;
1686     }
1687
1688   /* If OP0 is a memory, try copying it to a register and seeing if a
1689      cheap register alternative is available.  */
1690   if (MEM_P (op0))
1691     {
1692       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1693                                         tmode))
1694         {
1695           rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1696                                                      bitnum, unsignedp,
1697                                                      target, mode,
1698                                                      tmode);
1699           if (result)
1700             return result;
1701         }
1702
1703       rtx last = get_last_insn ();
1704
1705       /* Try loading part of OP0 into a register and extracting the
1706          bitfield from that.  */
1707       unsigned HOST_WIDE_INT bitpos;
1708       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1709                                                0, 0, tmode, &bitpos);
1710       if (xop0)
1711         {
1712           xop0 = copy_to_reg (xop0);
1713           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1714                                             unsignedp, target,
1715                                             mode, tmode, false);
1716           if (result)
1717             return result;
1718           delete_insns_since (last);
1719         }
1720     }
1721
1722   if (!fallback_p)
1723     return NULL;
1724
1725   /* Find a correspondingly-sized integer field, so we can apply
1726      shifts and masks to it.  */
1727   int_mode = int_mode_for_mode (tmode);
1728   if (int_mode == BLKmode)
1729     int_mode = int_mode_for_mode (mode);
1730   /* Should probably push op0 out to memory and then do a load.  */
1731   gcc_assert (int_mode != BLKmode);
1732
1733   target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum,
1734                                     target, unsignedp);
1735   return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1736 }
1737
1738 /* Generate code to extract a byte-field from STR_RTX
1739    containing BITSIZE bits, starting at BITNUM,
1740    and put it in TARGET if possible (if TARGET is nonzero).
1741    Regardless of TARGET, we return the rtx for where the value is placed.
1742
1743    STR_RTX is the structure containing the byte (a REG or MEM).
1744    UNSIGNEDP is nonzero if this is an unsigned bit field.
1745    MODE is the natural mode of the field value once extracted.
1746    TMODE is the mode the caller would like the value to have;
1747    but the value may be returned with type MODE instead.
1748
1749    If a TARGET is specified and we can store in it at no extra cost,
1750    we do so, and return TARGET.
1751    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1752    if they are equally easy.  */
1753
1754 rtx
1755 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1756                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1757                    enum machine_mode mode, enum machine_mode tmode)
1758 {
1759   enum machine_mode mode1;
1760
1761   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1762   if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1763     mode1 = GET_MODE (str_rtx);
1764   else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1765     mode1 = GET_MODE (target);
1766   else
1767     mode1 = tmode;
1768
1769   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0))
1770     {
1771       rtx result;
1772
1773       /* Extraction of a full MODE1 value can be done with a load as long as
1774          the field is on a byte boundary and is sufficiently aligned.  */
1775       if (simple_mem_bitfield_p (str_rtx, bitsize, bitnum, mode1))
1776         result = adjust_bitfield_address (str_rtx, mode1,
1777                                           bitnum / BITS_PER_UNIT);
1778       else
1779         {
1780           str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum,
1781                                           &bitnum);
1782           result = extract_fixed_bit_field_1 (mode, str_rtx, bitsize, bitnum,
1783                                               target, unsignedp);
1784         }
1785
1786       return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1787     }
1788
1789   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1790                               target, mode, tmode, true);
1791 }
1792 \f
1793 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1794    from bit BITNUM of OP0.
1795
1796    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1797    If TARGET is nonzero, attempts to store the value there
1798    and return TARGET, but this is not guaranteed.
1799    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1800
1801 static rtx
1802 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1803                          unsigned HOST_WIDE_INT bitsize,
1804                          unsigned HOST_WIDE_INT bitnum, rtx target,
1805                          int unsignedp)
1806 {
1807   if (MEM_P (op0))
1808     {
1809       enum machine_mode mode
1810         = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode,
1811                          MEM_VOLATILE_P (op0));
1812
1813       if (mode == VOIDmode)
1814         /* The only way this should occur is if the field spans word
1815            boundaries.  */
1816         return extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1817
1818       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1819     }
1820
1821   return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum,
1822                                     target, unsignedp);
1823 }
1824
1825 /* Helper function for extract_fixed_bit_field, extracts
1826    the bit field always using the MODE of OP0.  */
1827
1828 static rtx
1829 extract_fixed_bit_field_1 (enum machine_mode tmode, rtx op0,
1830                            unsigned HOST_WIDE_INT bitsize,
1831                            unsigned HOST_WIDE_INT bitnum, rtx target,
1832                            int unsignedp)
1833 {
1834   enum machine_mode mode = GET_MODE (op0);
1835   gcc_assert (SCALAR_INT_MODE_P (mode));
1836
1837   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1838      for invalid input, such as extract equivalent of f5 from
1839      gcc.dg/pr48335-2.c.  */
1840
1841   if (BYTES_BIG_ENDIAN)
1842     /* BITNUM is the distance between our msb and that of OP0.
1843        Convert it to the distance from the lsb.  */
1844     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1845
1846   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1847      We have reduced the big-endian case to the little-endian case.  */
1848
1849   if (unsignedp)
1850     {
1851       if (bitnum)
1852         {
1853           /* If the field does not already start at the lsb,
1854              shift it so it does.  */
1855           /* Maybe propagate the target for the shift.  */
1856           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1857           if (tmode != mode)
1858             subtarget = 0;
1859           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
1860         }
1861       /* Convert the value to the desired mode.  */
1862       if (mode != tmode)
1863         op0 = convert_to_mode (tmode, op0, 1);
1864
1865       /* Unless the msb of the field used to be the msb when we shifted,
1866          mask out the upper bits.  */
1867
1868       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
1869         return expand_binop (GET_MODE (op0), and_optab, op0,
1870                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1871                              target, 1, OPTAB_LIB_WIDEN);
1872       return op0;
1873     }
1874
1875   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1876      then arithmetic-shift its lsb to the lsb of the word.  */
1877   op0 = force_reg (mode, op0);
1878
1879   /* Find the narrowest integer mode that contains the field.  */
1880
1881   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1882        mode = GET_MODE_WIDER_MODE (mode))
1883     if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
1884       {
1885         op0 = convert_to_mode (mode, op0, 0);
1886         break;
1887       }
1888
1889   if (mode != tmode)
1890     target = 0;
1891
1892   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
1893     {
1894       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
1895       /* Maybe propagate the target for the shift.  */
1896       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1897       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1898     }
1899
1900   return expand_shift (RSHIFT_EXPR, mode, op0,
1901                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1902 }
1903 \f
1904 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1905    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1906    complement of that if COMPLEMENT.  The mask is truncated if
1907    necessary to the width of mode MODE.  The mask is zero-extended if
1908    BITSIZE+BITPOS is too small for MODE.  */
1909
1910 static rtx
1911 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1912 {
1913   double_int mask;
1914
1915   mask = double_int::mask (bitsize);
1916   mask = mask.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);
1917
1918   if (complement)
1919     mask = ~mask;
1920
1921   return immed_double_int_const (mask, mode);
1922 }
1923
1924 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1925    VALUE << BITPOS.  */
1926
1927 static rtx
1928 lshift_value (enum machine_mode mode, unsigned HOST_WIDE_INT value,
1929               int bitpos)
1930 {
1931   double_int val;
1932
1933   val = double_int::from_uhwi (value);
1934   val = val.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);
1935
1936   return immed_double_int_const (val, mode);
1937 }
1938 \f
1939 /* Extract a bit field that is split across two words
1940    and return an RTX for the result.
1941
1942    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1943    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1944    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1945
1946 static rtx
1947 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1948                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1949 {
1950   unsigned int unit;
1951   unsigned int bitsdone = 0;
1952   rtx result = NULL_RTX;
1953   int first = 1;
1954
1955   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1956      much at a time.  */
1957   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1958     unit = BITS_PER_WORD;
1959   else
1960     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1961
1962   while (bitsdone < bitsize)
1963     {
1964       unsigned HOST_WIDE_INT thissize;
1965       rtx part, word;
1966       unsigned HOST_WIDE_INT thispos;
1967       unsigned HOST_WIDE_INT offset;
1968
1969       offset = (bitpos + bitsdone) / unit;
1970       thispos = (bitpos + bitsdone) % unit;
1971
1972       /* THISSIZE must not overrun a word boundary.  Otherwise,
1973          extract_fixed_bit_field will call us again, and we will mutually
1974          recurse forever.  */
1975       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1976       thissize = MIN (thissize, unit - thispos);
1977
1978       /* If OP0 is a register, then handle OFFSET here.
1979
1980          When handling multiword bitfields, extract_bit_field may pass
1981          down a word_mode SUBREG of a larger REG for a bitfield that actually
1982          crosses a word boundary.  Thus, for a SUBREG, we must find
1983          the current word starting from the base register.  */
1984       if (GET_CODE (op0) == SUBREG)
1985         {
1986           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1987           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1988                                         GET_MODE (SUBREG_REG (op0)));
1989           offset = 0;
1990         }
1991       else if (REG_P (op0))
1992         {
1993           word = operand_subword_force (op0, offset, GET_MODE (op0));
1994           offset = 0;
1995         }
1996       else
1997         word = op0;
1998
1999       /* Extract the parts in bit-counting order,
2000          whose meaning is determined by BYTES_PER_UNIT.
2001          OFFSET is in UNITs, and UNIT is in bits.  */
2002       part = extract_fixed_bit_field (word_mode, word, thissize,
2003                                       offset * unit + thispos, 0, 1);
2004       bitsdone += thissize;
2005
2006       /* Shift this part into place for the result.  */
2007       if (BYTES_BIG_ENDIAN)
2008         {
2009           if (bitsize != bitsdone)
2010             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2011                                  bitsize - bitsdone, 0, 1);
2012         }
2013       else
2014         {
2015           if (bitsdone != thissize)
2016             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2017                                  bitsdone - thissize, 0, 1);
2018         }
2019
2020       if (first)
2021         result = part;
2022       else
2023         /* Combine the parts with bitwise or.  This works
2024            because we extracted each part as an unsigned bit field.  */
2025         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2026                                OPTAB_LIB_WIDEN);
2027
2028       first = 0;
2029     }
2030
2031   /* Unsigned bit field: we are done.  */
2032   if (unsignedp)
2033     return result;
2034   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2035   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2036                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2037   return expand_shift (RSHIFT_EXPR, word_mode, result,
2038                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2039 }
2040 \f
2041 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2042    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2043    MODE, fill the upper bits with zeros.  Fail if the layout of either
2044    mode is unknown (as for CC modes) or if the extraction would involve
2045    unprofitable mode punning.  Return the value on success, otherwise
2046    return null.
2047
2048    This is different from gen_lowpart* in these respects:
2049
2050      - the returned value must always be considered an rvalue
2051
2052      - when MODE is wider than SRC_MODE, the extraction involves
2053        a zero extension
2054
2055      - when MODE is smaller than SRC_MODE, the extraction involves
2056        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2057
2058    In other words, this routine performs a computation, whereas the
2059    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2060    operations.  */
2061
2062 rtx
2063 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
2064 {
2065   enum machine_mode int_mode, src_int_mode;
2066
2067   if (mode == src_mode)
2068     return src;
2069
2070   if (CONSTANT_P (src))
2071     {
2072       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2073          fails, it will happily create (subreg (symbol_ref)) or similar
2074          invalid SUBREGs.  */
2075       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2076       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2077       if (ret)
2078         return ret;
2079
2080       if (GET_MODE (src) == VOIDmode
2081           || !validate_subreg (mode, src_mode, src, byte))
2082         return NULL_RTX;
2083
2084       src = force_reg (GET_MODE (src), src);
2085       return gen_rtx_SUBREG (mode, src, byte);
2086     }
2087
2088   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2089     return NULL_RTX;
2090
2091   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2092       && MODES_TIEABLE_P (mode, src_mode))
2093     {
2094       rtx x = gen_lowpart_common (mode, src);
2095       if (x)
2096         return x;
2097     }
2098
2099   src_int_mode = int_mode_for_mode (src_mode);
2100   int_mode = int_mode_for_mode (mode);
2101   if (src_int_mode == BLKmode || int_mode == BLKmode)
2102     return NULL_RTX;
2103
2104   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2105     return NULL_RTX;
2106   if (!MODES_TIEABLE_P (int_mode, mode))
2107     return NULL_RTX;
2108
2109   src = gen_lowpart (src_int_mode, src);
2110   src = convert_modes (int_mode, src_int_mode, src, true);
2111   src = gen_lowpart (mode, src);
2112   return src;
2113 }
2114 \f
2115 /* Add INC into TARGET.  */
2116
2117 void
2118 expand_inc (rtx target, rtx inc)
2119 {
2120   rtx value = expand_binop (GET_MODE (target), add_optab,
2121                             target, inc,
2122                             target, 0, OPTAB_LIB_WIDEN);
2123   if (value != target)
2124     emit_move_insn (target, value);
2125 }
2126
2127 /* Subtract DEC from TARGET.  */
2128
2129 void
2130 expand_dec (rtx target, rtx dec)
2131 {
2132   rtx value = expand_binop (GET_MODE (target), sub_optab,
2133                             target, dec,
2134                             target, 0, OPTAB_LIB_WIDEN);
2135   if (value != target)
2136     emit_move_insn (target, value);
2137 }
2138 \f
2139 /* Output a shift instruction for expression code CODE,
2140    with SHIFTED being the rtx for the value to shift,
2141    and AMOUNT the rtx for the amount to shift by.
2142    Store the result in the rtx TARGET, if that is convenient.
2143    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2144    Return the rtx for where the value is.  */
2145
2146 static rtx
2147 expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted,
2148                 rtx amount, rtx target, int unsignedp)
2149 {
2150   rtx op1, temp = 0;
2151   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2152   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2153   optab lshift_optab = ashl_optab;
2154   optab rshift_arith_optab = ashr_optab;
2155   optab rshift_uns_optab = lshr_optab;
2156   optab lrotate_optab = rotl_optab;
2157   optab rrotate_optab = rotr_optab;
2158   enum machine_mode op1_mode;
2159   enum machine_mode scalar_mode = mode;
2160   int attempt;
2161   bool speed = optimize_insn_for_speed_p ();
2162
2163   if (VECTOR_MODE_P (mode))
2164     scalar_mode = GET_MODE_INNER (mode);
2165   op1 = amount;
2166   op1_mode = GET_MODE (op1);
2167
2168   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2169      shift amount is a vector, use the vector/vector shift patterns.  */
2170   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2171     {
2172       lshift_optab = vashl_optab;
2173       rshift_arith_optab = vashr_optab;
2174       rshift_uns_optab = vlshr_optab;
2175       lrotate_optab = vrotl_optab;
2176       rrotate_optab = vrotr_optab;
2177     }
2178
2179   /* Previously detected shift-counts computed by NEGATE_EXPR
2180      and shifted in the other direction; but that does not work
2181      on all machines.  */
2182
2183   if (SHIFT_COUNT_TRUNCATED)
2184     {
2185       if (CONST_INT_P (op1)
2186           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2187               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2188         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2189                        % GET_MODE_BITSIZE (scalar_mode));
2190       else if (GET_CODE (op1) == SUBREG
2191                && subreg_lowpart_p (op1)
2192                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2193                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2194         op1 = SUBREG_REG (op1);
2195     }
2196
2197   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2198      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2199      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2200      amount instead.  */
2201   if (rotate
2202       && CONST_INT_P (op1)
2203       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2204                    GET_MODE_BITSIZE (scalar_mode) - 1))
2205     {
2206       op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2207       left = !left;
2208       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2209     }
2210
2211   if (op1 == const0_rtx)
2212     return shifted;
2213
2214   /* Check whether its cheaper to implement a left shift by a constant
2215      bit count by a sequence of additions.  */
2216   if (code == LSHIFT_EXPR
2217       && CONST_INT_P (op1)
2218       && INTVAL (op1) > 0
2219       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2220       && INTVAL (op1) < MAX_BITS_PER_WORD
2221       && (shift_cost (speed, mode, INTVAL (op1))
2222           > INTVAL (op1) * add_cost (speed, mode))
2223       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2224     {
2225       int i;
2226       for (i = 0; i < INTVAL (op1); i++)
2227         {
2228           temp = force_reg (mode, shifted);
2229           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2230                                   unsignedp, OPTAB_LIB_WIDEN);
2231         }
2232       return shifted;
2233     }
2234
2235   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2236     {
2237       enum optab_methods methods;
2238
2239       if (attempt == 0)
2240         methods = OPTAB_DIRECT;
2241       else if (attempt == 1)
2242         methods = OPTAB_WIDEN;
2243       else
2244         methods = OPTAB_LIB_WIDEN;
2245
2246       if (rotate)
2247         {
2248           /* Widening does not work for rotation.  */
2249           if (methods == OPTAB_WIDEN)
2250             continue;
2251           else if (methods == OPTAB_LIB_WIDEN)
2252             {
2253               /* If we have been unable to open-code this by a rotation,
2254                  do it as the IOR of two shifts.  I.e., to rotate A
2255                  by N bits, compute
2256                  (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2257                  where C is the bitsize of A.
2258
2259                  It is theoretically possible that the target machine might
2260                  not be able to perform either shift and hence we would
2261                  be making two libcalls rather than just the one for the
2262                  shift (similarly if IOR could not be done).  We will allow
2263                  this extremely unlikely lossage to avoid complicating the
2264                  code below.  */
2265
2266               rtx subtarget = target == shifted ? 0 : target;
2267               rtx new_amount, other_amount;
2268               rtx temp1;
2269
2270               new_amount = op1;
2271               if (op1 == const0_rtx)
2272                 return shifted;
2273               else if (CONST_INT_P (op1))
2274                 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
2275                                         - INTVAL (op1));
2276               else
2277                 {
2278                   other_amount
2279                     = simplify_gen_unary (NEG, GET_MODE (op1),
2280                                           op1, GET_MODE (op1));
2281                   HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2282                   other_amount
2283                     = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2284                                            gen_int_mode (mask, GET_MODE (op1)));
2285                 }
2286
2287               shifted = force_reg (mode, shifted);
2288
2289               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2290                                      mode, shifted, new_amount, 0, 1);
2291               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2292                                       mode, shifted, other_amount,
2293                                       subtarget, 1);
2294               return expand_binop (mode, ior_optab, temp, temp1, target,
2295                                    unsignedp, methods);
2296             }
2297
2298           temp = expand_binop (mode,
2299                                left ? lrotate_optab : rrotate_optab,
2300                                shifted, op1, target, unsignedp, methods);
2301         }
2302       else if (unsignedp)
2303         temp = expand_binop (mode,
2304                              left ? lshift_optab : rshift_uns_optab,
2305                              shifted, op1, target, unsignedp, methods);
2306
2307       /* Do arithmetic shifts.
2308          Also, if we are going to widen the operand, we can just as well
2309          use an arithmetic right-shift instead of a logical one.  */
2310       if (temp == 0 && ! rotate
2311           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2312         {
2313           enum optab_methods methods1 = methods;
2314
2315           /* If trying to widen a log shift to an arithmetic shift,
2316              don't accept an arithmetic shift of the same size.  */
2317           if (unsignedp)
2318             methods1 = OPTAB_MUST_WIDEN;
2319
2320           /* Arithmetic shift */
2321
2322           temp = expand_binop (mode,
2323                                left ? lshift_optab : rshift_arith_optab,
2324                                shifted, op1, target, unsignedp, methods1);
2325         }
2326
2327       /* We used to try extzv here for logical right shifts, but that was
2328          only useful for one machine, the VAX, and caused poor code
2329          generation there for lshrdi3, so the code was deleted and a
2330          define_expand for lshrsi3 was added to vax.md.  */
2331     }
2332
2333   gcc_assert (temp);
2334   return temp;
2335 }
2336
2337 /* Output a shift instruction for expression code CODE,
2338    with SHIFTED being the rtx for the value to shift,
2339    and AMOUNT the amount to shift by.
2340    Store the result in the rtx TARGET, if that is convenient.
2341    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2342    Return the rtx for where the value is.  */
2343
2344 rtx
2345 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2346               int amount, rtx target, int unsignedp)
2347 {
2348   return expand_shift_1 (code, mode,
2349                          shifted, GEN_INT (amount), target, unsignedp);
2350 }
2351
2352 /* Output a shift instruction for expression code CODE,
2353    with SHIFTED being the rtx for the value to shift,
2354    and AMOUNT the tree for the amount to shift by.
2355    Store the result in the rtx TARGET, if that is convenient.
2356    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2357    Return the rtx for where the value is.  */
2358
2359 rtx
2360 expand_variable_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2361                        tree amount, rtx target, int unsignedp)
2362 {
2363   return expand_shift_1 (code, mode,
2364                          shifted, expand_normal (amount), target, unsignedp);
2365 }
2366
2367 \f
2368 /* Indicates the type of fixup needed after a constant multiplication.
2369    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2370    the result should be negated, and ADD_VARIANT means that the
2371    multiplicand should be added to the result.  */
2372 enum mult_variant {basic_variant, negate_variant, add_variant};
2373
2374 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2375                         const struct mult_cost *, enum machine_mode mode);
2376 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2377                                  struct algorithm *, enum mult_variant *, int);
2378 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2379                               const struct algorithm *, enum mult_variant);
2380 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2381 static rtx extract_high_half (enum machine_mode, rtx);
2382 static rtx expmed_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2383 static rtx expmed_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2384                                        int, int);
2385 /* Compute and return the best algorithm for multiplying by T.
2386    The algorithm must cost less than cost_limit
2387    If retval.cost >= COST_LIMIT, no algorithm was found and all
2388    other field of the returned struct are undefined.
2389    MODE is the machine mode of the multiplication.  */
2390
2391 static void
2392 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2393             const struct mult_cost *cost_limit, enum machine_mode mode)
2394 {
2395   int m;
2396   struct algorithm *alg_in, *best_alg;
2397   struct mult_cost best_cost;
2398   struct mult_cost new_limit;
2399   int op_cost, op_latency;
2400   unsigned HOST_WIDE_INT orig_t = t;
2401   unsigned HOST_WIDE_INT q;
2402   int maxm, hash_index;
2403   bool cache_hit = false;
2404   enum alg_code cache_alg = alg_zero;
2405   bool speed = optimize_insn_for_speed_p ();
2406   enum machine_mode imode;
2407   struct alg_hash_entry *entry_ptr;
2408
2409   /* Indicate that no algorithm is yet found.  If no algorithm
2410      is found, this value will be returned and indicate failure.  */
2411   alg_out->cost.cost = cost_limit->cost + 1;
2412   alg_out->cost.latency = cost_limit->latency + 1;
2413
2414   if (cost_limit->cost < 0
2415       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2416     return;
2417
2418   /* Be prepared for vector modes.  */
2419   imode = GET_MODE_INNER (mode);
2420   if (imode == VOIDmode)
2421     imode = mode;
2422
2423   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2424
2425   /* Restrict the bits of "t" to the multiplication's mode.  */
2426   t &= GET_MODE_MASK (imode);
2427
2428   /* t == 1 can be done in zero cost.  */
2429   if (t == 1)
2430     {
2431       alg_out->ops = 1;
2432       alg_out->cost.cost = 0;
2433       alg_out->cost.latency = 0;
2434       alg_out->op[0] = alg_m;
2435       return;
2436     }
2437
2438   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2439      fail now.  */
2440   if (t == 0)
2441     {
2442       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2443         return;
2444       else
2445         {
2446           alg_out->ops = 1;
2447           alg_out->cost.cost = zero_cost (speed);
2448           alg_out->cost.latency = zero_cost (speed);
2449           alg_out->op[0] = alg_zero;
2450           return;
2451         }
2452     }
2453
2454   /* We'll be needing a couple extra algorithm structures now.  */
2455
2456   alg_in = XALLOCA (struct algorithm);
2457   best_alg = XALLOCA (struct algorithm);
2458   best_cost = *cost_limit;
2459
2460   /* Compute the hash index.  */
2461   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2462
2463   /* See if we already know what to do for T.  */
2464   entry_ptr = alg_hash_entry_ptr (hash_index);
2465   if (entry_ptr->t == t
2466       && entry_ptr->mode == mode
2467       && entry_ptr->mode == mode
2468       && entry_ptr->speed == speed
2469       && entry_ptr->alg != alg_unknown)
2470     {
2471       cache_alg = entry_ptr->alg;
2472
2473       if (cache_alg == alg_impossible)
2474         {
2475           /* The cache tells us that it's impossible to synthesize
2476              multiplication by T within entry_ptr->cost.  */
2477           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2478             /* COST_LIMIT is at least as restrictive as the one
2479                recorded in the hash table, in which case we have no
2480                hope of synthesizing a multiplication.  Just
2481                return.  */
2482             return;
2483
2484           /* If we get here, COST_LIMIT is less restrictive than the
2485              one recorded in the hash table, so we may be able to
2486              synthesize a multiplication.  Proceed as if we didn't
2487              have the cache entry.  */
2488         }
2489       else
2490         {
2491           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2492             /* The cached algorithm shows that this multiplication
2493                requires more cost than COST_LIMIT.  Just return.  This
2494                way, we don't clobber this cache entry with
2495                alg_impossible but retain useful information.  */
2496             return;
2497
2498           cache_hit = true;
2499
2500           switch (cache_alg)
2501             {
2502             case alg_shift:
2503               goto do_alg_shift;
2504
2505             case alg_add_t_m2:
2506             case alg_sub_t_m2:
2507               goto do_alg_addsub_t_m2;
2508
2509             case alg_add_factor:
2510             case alg_sub_factor:
2511               goto do_alg_addsub_factor;
2512
2513             case alg_add_t2_m:
2514               goto do_alg_add_t2_m;
2515
2516             case alg_sub_t2_m:
2517               goto do_alg_sub_t2_m;
2518
2519             default:
2520               gcc_unreachable ();
2521             }
2522         }
2523     }
2524
2525   /* If we have a group of zero bits at the low-order part of T, try
2526      multiplying by the remaining bits and then doing a shift.  */
2527
2528   if ((t & 1) == 0)
2529     {
2530     do_alg_shift:
2531       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2532       if (m < maxm)
2533         {
2534           q = t >> m;
2535           /* The function expand_shift will choose between a shift and
2536              a sequence of additions, so the observed cost is given as
2537              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2538           op_cost = m * add_cost (speed, mode);
2539           if (shift_cost (speed, mode, m) < op_cost)
2540             op_cost = shift_cost (speed, mode, m);
2541           new_limit.cost = best_cost.cost - op_cost;
2542           new_limit.latency = best_cost.latency - op_cost;
2543           synth_mult (alg_in, q, &new_limit, mode);
2544
2545           alg_in->cost.cost += op_cost;
2546           alg_in->cost.latency += op_cost;
2547           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2548             {
2549               struct algorithm *x;
2550               best_cost = alg_in->cost;
2551               x = alg_in, alg_in = best_alg, best_alg = x;
2552               best_alg->log[best_alg->ops] = m;
2553               best_alg->op[best_alg->ops] = alg_shift;
2554             }
2555
2556           /* See if treating ORIG_T as a signed number yields a better
2557              sequence.  Try this sequence only for a negative ORIG_T
2558              as it would be useless for a non-negative ORIG_T.  */
2559           if ((HOST_WIDE_INT) orig_t < 0)
2560             {
2561               /* Shift ORIG_T as follows because a right shift of a
2562                  negative-valued signed type is implementation
2563                  defined.  */
2564               q = ~(~orig_t >> m);
2565               /* The function expand_shift will choose between a shift
2566                  and a sequence of additions, so the observed cost is
2567                  given as MIN (m * add_cost(speed, mode),
2568                  shift_cost(speed, mode, m)).  */
2569               op_cost = m * add_cost (speed, mode);
2570               if (shift_cost (speed, mode, m) < op_cost)
2571                 op_cost = shift_cost (speed, mode, m);
2572               new_limit.cost = best_cost.cost - op_cost;
2573               new_limit.latency = best_cost.latency - op_cost;
2574               synth_mult (alg_in, q, &new_limit, mode);
2575
2576               alg_in->cost.cost += op_cost;
2577               alg_in->cost.latency += op_cost;
2578               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2579                 {
2580                   struct algorithm *x;
2581                   best_cost = alg_in->cost;
2582                   x = alg_in, alg_in = best_alg, best_alg = x;
2583                   best_alg->log[best_alg->ops] = m;
2584                   best_alg->op[best_alg->ops] = alg_shift;
2585                 }
2586             }
2587         }
2588       if (cache_hit)
2589         goto done;
2590     }
2591
2592   /* If we have an odd number, add or subtract one.  */
2593   if ((t & 1) != 0)
2594     {
2595       unsigned HOST_WIDE_INT w;
2596
2597     do_alg_addsub_t_m2:
2598       for (w = 1; (w & t) != 0; w <<= 1)
2599         ;
2600       /* If T was -1, then W will be zero after the loop.  This is another
2601          case where T ends with ...111.  Handling this with (T + 1) and
2602          subtract 1 produces slightly better code and results in algorithm
2603          selection much faster than treating it like the ...0111 case
2604          below.  */
2605       if (w == 0
2606           || (w > 2
2607               /* Reject the case where t is 3.
2608                  Thus we prefer addition in that case.  */
2609               && t != 3))
2610         {
2611           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2612
2613           op_cost = add_cost (speed, mode);
2614           new_limit.cost = best_cost.cost - op_cost;
2615           new_limit.latency = best_cost.latency - op_cost;
2616           synth_mult (alg_in, t + 1, &new_limit, mode);
2617
2618           alg_in->cost.cost += op_cost;
2619           alg_in->cost.latency += op_cost;
2620           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2621             {
2622               struct algorithm *x;
2623               best_cost = alg_in->cost;
2624               x = alg_in, alg_in = best_alg, best_alg = x;
2625               best_alg->log[best_alg->ops] = 0;
2626               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2627             }
2628         }
2629       else
2630         {
2631           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2632
2633           op_cost = add_cost (speed, mode);
2634           new_limit.cost = best_cost.cost - op_cost;
2635           new_limit.latency = best_cost.latency - op_cost;
2636           synth_mult (alg_in, t - 1, &new_limit, mode);
2637
2638           alg_in->cost.cost += op_cost;
2639           alg_in->cost.latency += op_cost;
2640           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2641             {
2642               struct algorithm *x;
2643               best_cost = alg_in->cost;
2644               x = alg_in, alg_in = best_alg, best_alg = x;
2645               best_alg->log[best_alg->ops] = 0;
2646               best_alg->op[best_alg->ops] = alg_add_t_m2;
2647             }
2648         }
2649
2650       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2651          quickly with a - a * n for some appropriate constant n.  */
2652       m = exact_log2 (-orig_t + 1);
2653       if (m >= 0 && m < maxm)
2654         {
2655           op_cost = shiftsub1_cost (speed, mode, m);
2656           new_limit.cost = best_cost.cost - op_cost;
2657           new_limit.latency = best_cost.latency - op_cost;
2658           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2659                       &new_limit, mode);
2660
2661           alg_in->cost.cost += op_cost;
2662           alg_in->cost.latency += op_cost;
2663           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2664             {
2665               struct algorithm *x;
2666               best_cost = alg_in->cost;
2667               x = alg_in, alg_in = best_alg, best_alg = x;
2668               best_alg->log[best_alg->ops] = m;
2669               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2670             }
2671         }
2672
2673       if (cache_hit)
2674         goto done;
2675     }
2676
2677   /* Look for factors of t of the form
2678      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2679      If we find such a factor, we can multiply by t using an algorithm that
2680      multiplies by q, shift the result by m and add/subtract it to itself.
2681
2682      We search for large factors first and loop down, even if large factors
2683      are less probable than small; if we find a large factor we will find a
2684      good sequence quickly, and therefore be able to prune (by decreasing
2685      COST_LIMIT) the search.  */
2686
2687  do_alg_addsub_factor:
2688   for (m = floor_log2 (t - 1); m >= 2; m--)
2689     {
2690       unsigned HOST_WIDE_INT d;
2691
2692       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2693       if (t % d == 0 && t > d && m < maxm
2694           && (!cache_hit || cache_alg == alg_add_factor))
2695         {
2696           /* If the target has a cheap shift-and-add instruction use
2697              that in preference to a shift insn followed by an add insn.
2698              Assume that the shift-and-add is "atomic" with a latency
2699              equal to its cost, otherwise assume that on superscalar
2700              hardware the shift may be executed concurrently with the
2701              earlier steps in the algorithm.  */
2702           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2703           if (shiftadd_cost (speed, mode, m) < op_cost)
2704             {
2705               op_cost = shiftadd_cost (speed, mode, m);
2706               op_latency = op_cost;
2707             }
2708           else
2709             op_latency = add_cost (speed, mode);
2710
2711           new_limit.cost = best_cost.cost - op_cost;
2712           new_limit.latency = best_cost.latency - op_latency;
2713           synth_mult (alg_in, t / d, &new_limit, mode);
2714
2715           alg_in->cost.cost += op_cost;
2716           alg_in->cost.latency += op_latency;
2717           if (alg_in->cost.latency < op_cost)
2718             alg_in->cost.latency = op_cost;
2719           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2720             {
2721               struct algorithm *x;
2722               best_cost = alg_in->cost;
2723               x = alg_in, alg_in = best_alg, best_alg = x;
2724               best_alg->log[best_alg->ops] = m;
2725               best_alg->op[best_alg->ops] = alg_add_factor;
2726             }
2727           /* Other factors will have been taken care of in the recursion.  */
2728           break;
2729         }
2730
2731       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2732       if (t % d == 0 && t > d && m < maxm
2733           && (!cache_hit || cache_alg == alg_sub_factor))
2734         {
2735           /* If the target has a cheap shift-and-subtract insn use
2736              that in preference to a shift insn followed by a sub insn.
2737              Assume that the shift-and-sub is "atomic" with a latency
2738              equal to it's cost, otherwise assume that on superscalar
2739              hardware the shift may be executed concurrently with the
2740              earlier steps in the algorithm.  */
2741           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2742           if (shiftsub0_cost (speed, mode, m) < op_cost)
2743             {
2744               op_cost = shiftsub0_cost (speed, mode, m);
2745               op_latency = op_cost;
2746             }
2747           else
2748             op_latency = add_cost (speed, mode);
2749
2750           new_limit.cost = best_cost.cost - op_cost;
2751           new_limit.latency = best_cost.latency - op_latency;
2752           synth_mult (alg_in, t / d, &new_limit, mode);
2753
2754           alg_in->cost.cost += op_cost;
2755           alg_in->cost.latency += op_latency;
2756           if (alg_in->cost.latency < op_cost)
2757             alg_in->cost.latency = op_cost;
2758           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2759             {
2760               struct algorithm *x;
2761               best_cost = alg_in->cost;
2762               x = alg_in, alg_in = best_alg, best_alg = x;
2763               best_alg->log[best_alg->ops] = m;
2764               best_alg->op[best_alg->ops] = alg_sub_factor;
2765             }
2766           break;
2767         }
2768     }
2769   if (cache_hit)
2770     goto done;
2771
2772   /* Try shift-and-add (load effective address) instructions,
2773      i.e. do a*3, a*5, a*9.  */
2774   if ((t & 1) != 0)
2775     {
2776     do_alg_add_t2_m:
2777       q = t - 1;
2778       q = q & -q;
2779       m = exact_log2 (q);
2780       if (m >= 0 && m < maxm)
2781         {
2782           op_cost = shiftadd_cost (speed, mode, m);
2783           new_limit.cost = best_cost.cost - op_cost;
2784           new_limit.latency = best_cost.latency - op_cost;
2785           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2786
2787           alg_in->cost.cost += op_cost;
2788           alg_in->cost.latency += op_cost;
2789           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2790             {
2791               struct algorithm *x;
2792               best_cost = alg_in->cost;
2793               x = alg_in, alg_in = best_alg, best_alg = x;
2794               best_alg->log[best_alg->ops] = m;
2795               best_alg->op[best_alg->ops] = alg_add_t2_m;
2796             }
2797         }
2798       if (cache_hit)
2799         goto done;
2800
2801     do_alg_sub_t2_m:
2802       q = t + 1;
2803       q = q & -q;
2804       m = exact_log2 (q);
2805       if (m >= 0 && m < maxm)
2806         {
2807           op_cost = shiftsub0_cost (speed, mode, m);
2808           new_limit.cost = best_cost.cost - op_cost;
2809           new_limit.latency = best_cost.latency - op_cost;
2810           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2811
2812           alg_in->cost.cost += op_cost;
2813           alg_in->cost.latency += op_cost;
2814           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2815             {
2816               struct algorithm *x;
2817               best_cost = alg_in->cost;
2818               x = alg_in, alg_in = best_alg, best_alg = x;
2819               best_alg->log[best_alg->ops] = m;
2820               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2821             }
2822         }
2823       if (cache_hit)
2824         goto done;
2825     }
2826
2827  done:
2828   /* If best_cost has not decreased, we have not found any algorithm.  */
2829   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2830     {
2831       /* We failed to find an algorithm.  Record alg_impossible for
2832          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2833          we are asked to find an algorithm for T within the same or
2834          lower COST_LIMIT, we can immediately return to the
2835          caller.  */
2836       entry_ptr->t = t;
2837       entry_ptr->mode = mode;
2838       entry_ptr->speed = speed;
2839       entry_ptr->alg = alg_impossible;
2840       entry_ptr->cost = *cost_limit;
2841       return;
2842     }
2843
2844   /* Cache the result.  */
2845   if (!cache_hit)
2846     {
2847       entry_ptr->t = t;
2848       entry_ptr->mode = mode;
2849       entry_ptr->speed = speed;
2850       entry_ptr->alg = best_alg->op[best_alg->ops];
2851       entry_ptr->cost.cost = best_cost.cost;
2852       entry_ptr->cost.latency = best_cost.latency;
2853     }
2854
2855   /* If we are getting a too long sequence for `struct algorithm'
2856      to record, make this search fail.  */
2857   if (best_alg->ops == MAX_BITS_PER_WORD)
2858     return;
2859
2860   /* Copy the algorithm from temporary space to the space at alg_out.
2861      We avoid using structure assignment because the majority of
2862      best_alg is normally undefined, and this is a critical function.  */
2863   alg_out->ops = best_alg->ops + 1;
2864   alg_out->cost = best_cost;
2865   memcpy (alg_out->op, best_alg->op,
2866           alg_out->ops * sizeof *alg_out->op);
2867   memcpy (alg_out->log, best_alg->log,
2868           alg_out->ops * sizeof *alg_out->log);
2869 }
2870 \f
2871 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2872    Try three variations:
2873
2874        - a shift/add sequence based on VAL itself
2875        - a shift/add sequence based on -VAL, followed by a negation
2876        - a shift/add sequence based on VAL - 1, followed by an addition.
2877
2878    Return true if the cheapest of these cost less than MULT_COST,
2879    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2880
2881 static bool
2882 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2883                      struct algorithm *alg, enum mult_variant *variant,
2884                      int mult_cost)
2885 {
2886   struct algorithm alg2;
2887   struct mult_cost limit;
2888   int op_cost;
2889   bool speed = optimize_insn_for_speed_p ();
2890
2891   /* Fail quickly for impossible bounds.  */
2892   if (mult_cost < 0)
2893     return false;
2894
2895   /* Ensure that mult_cost provides a reasonable upper bound.
2896      Any constant multiplication can be performed with less
2897      than 2 * bits additions.  */
2898   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
2899   if (mult_cost > op_cost)
2900     mult_cost = op_cost;
2901
2902   *variant = basic_variant;
2903   limit.cost = mult_cost;
2904   limit.latency = mult_cost;
2905   synth_mult (alg, val, &limit, mode);
2906
2907   /* This works only if the inverted value actually fits in an
2908      `unsigned int' */
2909   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
2910     {
2911       op_cost = neg_cost (speed, mode);
2912       if (MULT_COST_LESS (&alg->cost, mult_cost))
2913         {
2914           limit.cost = alg->cost.cost - op_cost;
2915           limit.latency = alg->cost.latency - op_cost;
2916         }
2917       else
2918         {
2919           limit.cost = mult_cost - op_cost;
2920           limit.latency = mult_cost - op_cost;
2921         }
2922
2923       synth_mult (&alg2, -val, &limit, mode);
2924       alg2.cost.cost += op_cost;
2925       alg2.cost.latency += op_cost;
2926       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2927         *alg = alg2, *variant = negate_variant;
2928     }
2929
2930   /* This proves very useful for division-by-constant.  */
2931   op_cost = add_cost (speed, mode);
2932   if (MULT_COST_LESS (&alg->cost, mult_cost))
2933     {
2934       limit.cost = alg->cost.cost - op_cost;
2935       limit.latency = alg->cost.latency - op_cost;
2936     }
2937   else
2938     {
2939       limit.cost = mult_cost - op_cost;
2940       limit.latency = mult_cost - op_cost;
2941     }
2942
2943   synth_mult (&alg2, val - 1, &limit, mode);
2944   alg2.cost.cost += op_cost;
2945   alg2.cost.latency += op_cost;
2946   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2947     *alg = alg2, *variant = add_variant;
2948
2949   return MULT_COST_LESS (&alg->cost, mult_cost);
2950 }
2951
2952 /* A subroutine of expand_mult, used for constant multiplications.
2953    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2954    convenient.  Use the shift/add sequence described by ALG and apply
2955    the final fixup specified by VARIANT.  */
2956
2957 static rtx
2958 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2959                    rtx target, const struct algorithm *alg,
2960                    enum mult_variant variant)
2961 {
2962   HOST_WIDE_INT val_so_far;
2963   rtx insn, accum, tem;
2964   int opno;
2965   enum machine_mode nmode;
2966
2967   /* Avoid referencing memory over and over and invalid sharing
2968      on SUBREGs.  */
2969   op0 = force_reg (mode, op0);
2970
2971   /* ACCUM starts out either as OP0 or as a zero, depending on
2972      the first operation.  */
2973
2974   if (alg->op[0] == alg_zero)
2975     {
2976       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
2977       val_so_far = 0;
2978     }
2979   else if (alg->op[0] == alg_m)
2980     {
2981       accum = copy_to_mode_reg (mode, op0);
2982       val_so_far = 1;
2983     }
2984   else
2985     gcc_unreachable ();
2986
2987   for (opno = 1; opno < alg->ops; opno++)
2988     {
2989       int log = alg->log[opno];
2990       rtx shift_subtarget = optimize ? 0 : accum;
2991       rtx add_target
2992         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2993            && !optimize)
2994           ? target : 0;
2995       rtx accum_target = optimize ? 0 : accum;
2996       rtx accum_inner;
2997
2998       switch (alg->op[opno])
2999         {
3000         case alg_shift:
3001           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3002           /* REG_EQUAL note will be attached to the following insn.  */
3003           emit_move_insn (accum, tem);
3004           val_so_far <<= log;
3005           break;
3006
3007         case alg_add_t_m2:
3008           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3009           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3010                                  add_target ? add_target : accum_target);
3011           val_so_far += (HOST_WIDE_INT) 1 << log;
3012           break;
3013
3014         case alg_sub_t_m2:
3015           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3016           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3017                                  add_target ? add_target : accum_target);
3018           val_so_far -= (HOST_WIDE_INT) 1 << log;
3019           break;
3020
3021         case alg_add_t2_m:
3022           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3023                                 log, shift_subtarget, 0);
3024           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3025                                  add_target ? add_target : accum_target);
3026           val_so_far = (val_so_far << log) + 1;
3027           break;
3028
3029         case alg_sub_t2_m:
3030           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3031                                 log, shift_subtarget, 0);
3032           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3033                                  add_target ? add_target : accum_target);
3034           val_so_far = (val_so_far << log) - 1;
3035           break;
3036
3037         case alg_add_factor:
3038           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3039           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3040                                  add_target ? add_target : accum_target);
3041           val_so_far += val_so_far << log;
3042           break;
3043
3044         case alg_sub_factor:
3045           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3046           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3047                                  (add_target
3048                                   ? add_target : (optimize ? 0 : tem)));
3049           val_so_far = (val_so_far << log) - val_so_far;
3050           break;
3051
3052         default:
3053           gcc_unreachable ();
3054         }
3055
3056       if (SCALAR_INT_MODE_P (mode))
3057         {
3058           /* Write a REG_EQUAL note on the last insn so that we can cse
3059              multiplication sequences.  Note that if ACCUM is a SUBREG,
3060              we've set the inner register and must properly indicate that.  */
3061           tem = op0, nmode = mode;
3062           accum_inner = accum;
3063           if (GET_CODE (accum) == SUBREG)
3064             {
3065               accum_inner = SUBREG_REG (accum);
3066               nmode = GET_MODE (accum_inner);
3067               tem = gen_lowpart (nmode, op0);
3068             }
3069
3070           insn = get_last_insn ();
3071           set_dst_reg_note (insn, REG_EQUAL,
3072                             gen_rtx_MULT (nmode, tem,
3073                                           gen_int_mode (val_so_far, nmode)),
3074                             accum_inner);
3075         }
3076     }
3077
3078   if (variant == negate_variant)
3079     {
3080       val_so_far = -val_so_far;
3081       accum = expand_unop (mode, neg_optab, accum, target, 0);
3082     }
3083   else if (variant == add_variant)
3084     {
3085       val_so_far = val_so_far + 1;
3086       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3087     }
3088
3089   /* Compare only the bits of val and val_so_far that are significant
3090      in the result mode, to avoid sign-/zero-extension confusion.  */
3091   nmode = GET_MODE_INNER (mode);
3092   if (nmode == VOIDmode)
3093     nmode = mode;
3094   val &= GET_MODE_MASK (nmode);
3095   val_so_far &= GET_MODE_MASK (nmode);
3096   gcc_assert (val == val_so_far);
3097
3098   return accum;
3099 }
3100
3101 /* Perform a multiplication and return an rtx for the result.
3102    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3103    TARGET is a suggestion for where to store the result (an rtx).
3104
3105    We check specially for a constant integer as OP1.
3106    If you want this check for OP0 as well, then before calling
3107    you should swap the two operands if OP0 would be constant.  */
3108
3109 rtx
3110 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3111              int unsignedp)
3112 {
3113   enum mult_variant variant;
3114   struct algorithm algorithm;
3115   rtx scalar_op1;
3116   int max_cost;
3117   bool speed = optimize_insn_for_speed_p ();
3118   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3119
3120   if (CONSTANT_P (op0))
3121     {
3122       rtx temp = op0;
3123       op0 = op1;
3124       op1 = temp;
3125     }
3126
3127   /* For vectors, there are several simplifications that can be made if
3128      all elements of the vector constant are identical.  */
3129   scalar_op1 = op1;
3130   if (GET_CODE (op1) == CONST_VECTOR)
3131     {
3132       int i, n = CONST_VECTOR_NUNITS (op1);
3133       scalar_op1 = CONST_VECTOR_ELT (op1, 0);
3134       for (i = 1; i < n; ++i)
3135         if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i)))
3136           goto skip_scalar;
3137     }
3138
3139   if (INTEGRAL_MODE_P (mode))
3140     {
3141       rtx fake_reg;
3142       HOST_WIDE_INT coeff;
3143       bool is_neg;
3144       int mode_bitsize;
3145
3146       if (op1 == CONST0_RTX (mode))
3147         return op1;
3148       if (op1 == CONST1_RTX (mode))
3149         return op0;
3150       if (op1 == CONSTM1_RTX (mode))
3151         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3152                             op0, target, 0);
3153
3154       if (do_trapv)
3155         goto skip_synth;
3156
3157       /* If mode is integer vector mode, check if the backend supports
3158          vector lshift (by scalar or vector) at all.  If not, we can't use
3159          synthetized multiply.  */
3160       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3161           && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3162           && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3163         goto skip_synth;
3164
3165       /* These are the operations that are potentially turned into
3166          a sequence of shifts and additions.  */
3167       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3168
3169       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3170          less than or equal in size to `unsigned int' this doesn't matter.
3171          If the mode is larger than `unsigned int', then synth_mult works
3172          only if the constant value exactly fits in an `unsigned int' without
3173          any truncation.  This means that multiplying by negative values does
3174          not work; results are off by 2^32 on a 32 bit machine.  */
3175
3176       if (CONST_INT_P (scalar_op1))
3177         {
3178           coeff = INTVAL (scalar_op1);
3179           is_neg = coeff < 0;
3180         }
3181       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3182         {
3183           /* If we are multiplying in DImode, it may still be a win
3184              to try to work with shifts and adds.  */
3185           if (CONST_DOUBLE_HIGH (scalar_op1) == 0
3186               && (CONST_DOUBLE_LOW (scalar_op1) > 0
3187                   || (CONST_DOUBLE_LOW (scalar_op1) < 0
3188                       && EXACT_POWER_OF_2_OR_ZERO_P
3189                            (CONST_DOUBLE_LOW (scalar_op1)))))
3190             {
3191               coeff = CONST_DOUBLE_LOW (scalar_op1);
3192               is_neg = false;
3193             }
3194           else if (CONST_DOUBLE_LOW (scalar_op1) == 0)
3195             {
3196               coeff = CONST_DOUBLE_HIGH (scalar_op1);
3197               if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3198                 {
3199                   int shift = floor_log2 (coeff) + HOST_BITS_PER_WIDE_INT;
3200                   if (shift < HOST_BITS_PER_DOUBLE_INT - 1
3201                       || mode_bitsize <= HOST_BITS_PER_DOUBLE_INT)
3202                     return expand_shift (LSHIFT_EXPR, mode, op0,
3203                                          shift, target, unsignedp);
3204                 }
3205               goto skip_synth;
3206             }
3207           else
3208             goto skip_synth;
3209         }
3210       else
3211         goto skip_synth;
3212
3213       /* We used to test optimize here, on the grounds that it's better to
3214          produce a smaller program when -O is not used.  But this causes
3215          such a terrible slowdown sometimes that it seems better to always
3216          use synth_mult.  */
3217
3218       /* Special case powers of two.  */
3219       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3220           && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3221         return expand_shift (LSHIFT_EXPR, mode, op0,
3222                              floor_log2 (coeff), target, unsignedp);
3223
3224       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3225
3226       /* Attempt to handle multiplication of DImode values by negative
3227          coefficients, by performing the multiplication by a positive
3228          multiplier and then inverting the result.  */
3229       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3230         {
3231           /* Its safe to use -coeff even for INT_MIN, as the
3232              result is interpreted as an unsigned coefficient.
3233              Exclude cost of op0 from max_cost to match the cost
3234              calculation of the synth_mult.  */
3235           coeff = -(unsigned HOST_WIDE_INT) coeff;
3236           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
3237                       - neg_cost (speed, mode));
3238           if (max_cost <= 0)
3239             goto skip_synth;
3240
3241           /* Special case powers of two.  */
3242           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3243             {
3244               rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3245                                        floor_log2 (coeff), target, unsignedp);
3246               return expand_unop (mode, neg_optab, temp, target, 0);
3247             }
3248
3249           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3250                                    max_cost))
3251             {
3252               rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3253                                             &algorithm, variant);
3254               return expand_unop (mode, neg_optab, temp, target, 0);
3255             }
3256           goto skip_synth;
3257         }
3258
3259       /* Exclude cost of op0 from max_cost to match the cost
3260          calculation of the synth_mult.  */
3261       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3262       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3263         return expand_mult_const (mode, op0, coeff, target,
3264                                   &algorithm, variant);
3265     }
3266  skip_synth:
3267
3268   /* Expand x*2.0 as x+x.  */
3269   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1))
3270     {
3271       REAL_VALUE_TYPE d;
3272       REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1);
3273
3274       if (REAL_VALUES_EQUAL (d, dconst2))
3275         {
3276           op0 = force_reg (GET_MODE (op0), op0);
3277           return expand_binop (mode, add_optab, op0, op0,
3278                                target, unsignedp, OPTAB_LIB_WIDEN);
3279         }
3280     }
3281  skip_scalar:
3282
3283   /* This used to use umul_optab if unsigned, but for non-widening multiply
3284      there is no difference between signed and unsigned.  */
3285   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3286                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3287   gcc_assert (op0);
3288   return op0;
3289 }
3290
3291 /* Return a cost estimate for multiplying a register by the given
3292    COEFFicient in the given MODE and SPEED.  */
3293
3294 int
3295 mult_by_coeff_cost (HOST_WIDE_INT coeff, enum machine_mode mode, bool speed)
3296 {
3297   int max_cost;
3298   struct algorithm algorithm;
3299   enum mult_variant variant;
3300
3301   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3302   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), speed);
3303   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3304     return algorithm.cost.cost;
3305   else
3306     return max_cost;
3307 }
3308
3309 /* Perform a widening multiplication and return an rtx for the result.
3310    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3311    TARGET is a suggestion for where to store the result (an rtx).
3312    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3313    or smul_widen_optab.
3314
3315    We check specially for a constant integer as OP1, comparing the
3316    cost of a widening multiply against the cost of a sequence of shifts
3317    and adds.  */
3318
3319 rtx
3320 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3321                       int unsignedp, optab this_optab)
3322 {
3323   bool speed = optimize_insn_for_speed_p ();
3324   rtx cop1;
3325
3326   if (CONST_INT_P (op1)
3327       && GET_MODE (op0) != VOIDmode
3328       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3329                                 this_optab == umul_widen_optab))
3330       && CONST_INT_P (cop1)
3331       && (INTVAL (cop1) >= 0
3332           || HWI_COMPUTABLE_MODE_P (mode)))
3333     {
3334       HOST_WIDE_INT coeff = INTVAL (cop1);
3335       int max_cost;
3336       enum mult_variant variant;
3337       struct algorithm algorithm;
3338
3339       if (coeff == 0)
3340         return CONST0_RTX (mode);
3341
3342       /* Special case powers of two.  */
3343       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3344         {
3345           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3346           return expand_shift (LSHIFT_EXPR, mode, op0,
3347                                floor_log2 (coeff), target, unsignedp);
3348         }
3349
3350       /* Exclude cost of op0 from max_cost to match the cost
3351          calculation of the synth_mult.  */
3352       max_cost = mul_widen_cost (speed, mode);
3353       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3354                                max_cost))
3355         {
3356           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3357           return expand_mult_const (mode, op0, coeff, target,
3358                                     &algorithm, variant);
3359         }
3360     }
3361   return expand_binop (mode, this_optab, op0, op1, target,
3362                        unsignedp, OPTAB_LIB_WIDEN);
3363 }
3364 \f
3365 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3366    replace division by D, and put the least significant N bits of the result
3367    in *MULTIPLIER_PTR and return the most significant bit.
3368
3369    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3370    needed precision is in PRECISION (should be <= N).
3371
3372    PRECISION should be as small as possible so this function can choose
3373    multiplier more freely.
3374
3375    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3376    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3377
3378    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3379    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3380
3381 unsigned HOST_WIDE_INT
3382 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3383                    unsigned HOST_WIDE_INT *multiplier_ptr,
3384                    int *post_shift_ptr, int *lgup_ptr)
3385 {
3386   double_int mhigh, mlow;
3387   int lgup, post_shift;
3388   int pow, pow2;
3389
3390   /* lgup = ceil(log2(divisor)); */
3391   lgup = ceil_log2 (d);
3392
3393   gcc_assert (lgup <= n);
3394
3395   pow = n + lgup;
3396   pow2 = n + lgup - precision;
3397
3398   /* We could handle this with some effort, but this case is much
3399      better handled directly with a scc insn, so rely on caller using
3400      that.  */
3401   gcc_assert (pow != HOST_BITS_PER_DOUBLE_INT);
3402
3403   /* mlow = 2^(N + lgup)/d */
3404   double_int val = double_int_zero.set_bit (pow);
3405   mlow = val.div (double_int::from_uhwi (d), true, TRUNC_DIV_EXPR);
3406
3407   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3408   val |= double_int_zero.set_bit (pow2);
3409   mhigh = val.div (double_int::from_uhwi (d), true, TRUNC_DIV_EXPR);
3410
3411   gcc_assert (!mhigh.high || val.high - d < d);
3412   gcc_assert (mhigh.high <= 1 && mlow.high <= 1);
3413   /* Assert that mlow < mhigh.  */
3414   gcc_assert (mlow.ult (mhigh));
3415
3416   /* If precision == N, then mlow, mhigh exceed 2^N
3417      (but they do not exceed 2^(N+1)).  */
3418
3419   /* Reduce to lowest terms.  */
3420   for (post_shift = lgup; post_shift > 0; post_shift--)
3421     {
3422       int shft = HOST_BITS_PER_WIDE_INT - 1;
3423       unsigned HOST_WIDE_INT ml_lo = (mlow.high << shft) | (mlow.low >> 1);
3424       unsigned HOST_WIDE_INT mh_lo = (mhigh.high << shft) | (mhigh.low >> 1);
3425       if (ml_lo >= mh_lo)
3426         break;
3427
3428       mlow = double_int::from_uhwi (ml_lo);
3429       mhigh = double_int::from_uhwi (mh_lo);
3430     }
3431
3432   *post_shift_ptr = post_shift;
3433   *lgup_ptr = lgup;
3434   if (n < HOST_BITS_PER_WIDE_INT)
3435     {
3436       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3437       *multiplier_ptr = mhigh.low & mask;
3438       return mhigh.low >= mask;
3439     }
3440   else
3441     {
3442       *multiplier_ptr = mhigh.low;
3443       return mhigh.high;
3444     }
3445 }
3446
3447 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3448    congruent to 1 (mod 2**N).  */
3449
3450 static unsigned HOST_WIDE_INT
3451 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3452 {
3453   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3454
3455   /* The algorithm notes that the choice y = x satisfies
3456      x*y == 1 mod 2^3, since x is assumed odd.
3457      Each iteration doubles the number of bits of significance in y.  */
3458
3459   unsigned HOST_WIDE_INT mask;
3460   unsigned HOST_WIDE_INT y = x;
3461   int nbit = 3;
3462
3463   mask = (n == HOST_BITS_PER_WIDE_INT
3464           ? ~(unsigned HOST_WIDE_INT) 0
3465           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3466
3467   while (nbit < n)
3468     {
3469       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3470       nbit *= 2;
3471     }
3472   return y;
3473 }
3474
3475 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3476    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3477    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3478    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3479    become signed.
3480
3481    The result is put in TARGET if that is convenient.
3482
3483    MODE is the mode of operation.  */
3484
3485 rtx
3486 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3487                              rtx op1, rtx target, int unsignedp)
3488 {
3489   rtx tem;
3490   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3491
3492   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3493                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3494   tem = expand_and (mode, tem, op1, NULL_RTX);
3495   adj_operand
3496     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3497                      adj_operand);
3498
3499   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3500                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3501   tem = expand_and (mode, tem, op0, NULL_RTX);
3502   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3503                           target);
3504
3505   return target;
3506 }
3507
3508 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3509
3510 static rtx
3511 extract_high_half (enum machine_mode mode, rtx op)
3512 {
3513   enum machine_mode wider_mode;
3514
3515   if (mode == word_mode)
3516     return gen_highpart (mode, op);
3517
3518   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3519
3520   wider_mode = GET_MODE_WIDER_MODE (mode);
3521   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3522                      GET_MODE_BITSIZE (mode), 0, 1);
3523   return convert_modes (mode, wider_mode, op, 0);
3524 }
3525
3526 /* Like expmed_mult_highpart, but only consider using a multiplication
3527    optab.  OP1 is an rtx for the constant operand.  */
3528
3529 static rtx
3530 expmed_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3531                             rtx target, int unsignedp, int max_cost)
3532 {
3533   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3534   enum machine_mode wider_mode;
3535   optab moptab;
3536   rtx tem;
3537   int size;
3538   bool speed = optimize_insn_for_speed_p ();
3539
3540   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3541
3542   wider_mode = GET_MODE_WIDER_MODE (mode);
3543   size = GET_MODE_BITSIZE (mode);
3544
3545   /* Firstly, try using a multiplication insn that only generates the needed
3546      high part of the product, and in the sign flavor of unsignedp.  */
3547   if (mul_highpart_cost (speed, mode) < max_cost)
3548     {
3549       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3550       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3551                           unsignedp, OPTAB_DIRECT);
3552       if (tem)
3553         return tem;
3554     }
3555
3556   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3557      Need to adjust the result after the multiplication.  */
3558   if (size - 1 < BITS_PER_WORD
3559       && (mul_highpart_cost (speed, mode)
3560           + 2 * shift_cost (speed, mode, size-1)
3561           + 4 * add_cost (speed, mode) < max_cost))
3562     {
3563       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3564       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3565                           unsignedp, OPTAB_DIRECT);
3566       if (tem)
3567         /* We used the wrong signedness.  Adjust the result.  */
3568         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3569                                             tem, unsignedp);
3570     }
3571
3572   /* Try widening multiplication.  */
3573   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3574   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3575       && mul_widen_cost (speed, wider_mode) < max_cost)
3576     {
3577       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3578                           unsignedp, OPTAB_WIDEN);
3579       if (tem)
3580         return extract_high_half (mode, tem);
3581     }
3582
3583   /* Try widening the mode and perform a non-widening multiplication.  */
3584   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3585       && size - 1 < BITS_PER_WORD
3586       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3587           < max_cost))
3588     {
3589       rtx insns, wop0, wop1;
3590
3591       /* We need to widen the operands, for example to ensure the
3592          constant multiplier is correctly sign or zero extended.
3593          Use a sequence to clean-up any instructions emitted by
3594          the conversions if things don't work out.  */
3595       start_sequence ();
3596       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3597       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3598       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3599                           unsignedp, OPTAB_WIDEN);
3600       insns = get_insns ();
3601       end_sequence ();
3602
3603       if (tem)
3604         {
3605           emit_insn (insns);
3606           return extract_high_half (mode, tem);
3607         }
3608     }
3609
3610   /* Try widening multiplication of opposite signedness, and adjust.  */
3611   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3612   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3613       && size - 1 < BITS_PER_WORD
3614       && (mul_widen_cost (speed, wider_mode)
3615           + 2 * shift_cost (speed, mode, size-1)
3616           + 4 * add_cost (speed, mode) < max_cost))
3617     {
3618       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3619                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3620       if (tem != 0)
3621         {
3622           tem = extract_high_half (mode, tem);
3623           /* We used the wrong signedness.  Adjust the result.  */
3624           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3625                                               target, unsignedp);
3626         }
3627     }
3628
3629   return 0;
3630 }
3631
3632 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3633    putting the high half of the result in TARGET if that is convenient,
3634    and return where the result is.  If the operation can not be performed,
3635    0 is returned.
3636
3637    MODE is the mode of operation and result.
3638
3639    UNSIGNEDP nonzero means unsigned multiply.
3640
3641    MAX_COST is the total allowed cost for the expanded RTL.  */
3642
3643 static rtx
3644 expmed_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3645                       rtx target, int unsignedp, int max_cost)
3646 {
3647   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3648   unsigned HOST_WIDE_INT cnst1;
3649   int extra_cost;
3650   bool sign_adjust = false;
3651   enum mult_variant variant;
3652   struct algorithm alg;
3653   rtx tem;
3654   bool speed = optimize_insn_for_speed_p ();
3655
3656   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3657   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3658   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3659
3660   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3661
3662   /* We can't optimize modes wider than BITS_PER_WORD.
3663      ??? We might be able to perform double-word arithmetic if
3664      mode == word_mode, however all the cost calculations in
3665      synth_mult etc. assume single-word operations.  */
3666   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3667     return expmed_mult_highpart_optab (mode, op0, op1, target,
3668                                        unsignedp, max_cost);
3669
3670   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3671
3672   /* Check whether we try to multiply by a negative constant.  */
3673   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3674     {
3675       sign_adjust = true;
3676       extra_cost += add_cost (speed, mode);
3677     }
3678
3679   /* See whether shift/add multiplication is cheap enough.  */
3680   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3681                            max_cost - extra_cost))
3682     {
3683       /* See whether the specialized multiplication optabs are
3684          cheaper than the shift/add version.  */
3685       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3686                                         alg.cost.cost + extra_cost);
3687       if (tem)
3688         return tem;
3689
3690       tem = convert_to_mode (wider_mode, op0, unsignedp);
3691       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3692       tem = extract_high_half (mode, tem);
3693
3694       /* Adjust result for signedness.  */
3695       if (sign_adjust)
3696         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3697
3698       return tem;
3699     }
3700   return expmed_mult_highpart_optab (mode, op0, op1, target,
3701                                      unsignedp, max_cost);
3702 }
3703
3704
3705 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3706
3707 static rtx
3708 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3709 {
3710   unsigned HOST_WIDE_INT masklow, maskhigh;
3711   rtx result, temp, shift, label;
3712   int logd;
3713
3714   logd = floor_log2 (d);
3715   result = gen_reg_rtx (mode);
3716
3717   /* Avoid conditional branches when they're expensive.  */
3718   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3719       && optimize_insn_for_speed_p ())
3720     {
3721       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3722                                       mode, 0, -1);
3723       if (signmask)
3724         {
3725           signmask = force_reg (mode, signmask);
3726           masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3727           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3728
3729           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3730              which instruction sequence to use.  If logical right shifts
3731              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3732              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3733
3734           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3735           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3736               || (set_src_cost (temp, optimize_insn_for_speed_p ())
3737                   > COSTS_N_INSNS (2)))
3738             {
3739               temp = expand_binop (mode, xor_optab, op0, signmask,
3740                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3741               temp = expand_binop (mode, sub_optab, temp, signmask,
3742                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3743               temp = expand_binop (mode, and_optab, temp,
3744                                    gen_int_mode (masklow, mode),
3745                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3746               temp = expand_binop (mode, xor_optab, temp, signmask,
3747                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3748               temp = expand_binop (mode, sub_optab, temp, signmask,
3749                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3750             }
3751           else
3752             {
3753               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3754                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3755               signmask = force_reg (mode, signmask);
3756
3757               temp = expand_binop (mode, add_optab, op0, signmask,
3758                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3759               temp = expand_binop (mode, and_optab, temp,
3760                                    gen_int_mode (masklow, mode),
3761                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3762               temp = expand_binop (mode, sub_optab, temp, signmask,
3763                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3764             }
3765           return temp;
3766         }
3767     }
3768
3769   /* Mask contains the mode's signbit and the significant bits of the
3770      modulus.  By including the signbit in the operation, many targets
3771      can avoid an explicit compare operation in the following comparison
3772      against zero.  */
3773
3774   masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3775   if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3776     {
3777       masklow |= HOST_WIDE_INT_M1U << (GET_MODE_BITSIZE (mode) - 1);
3778       maskhigh = -1;
3779     }
3780   else
3781     maskhigh = HOST_WIDE_INT_M1U
3782                  << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3783
3784   temp = expand_binop (mode, and_optab, op0,
3785                        immed_double_const (masklow, maskhigh, mode),
3786                        result, 1, OPTAB_LIB_WIDEN);
3787   if (temp != result)
3788     emit_move_insn (result, temp);
3789
3790   label = gen_label_rtx ();
3791   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3792
3793   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3794                        0, OPTAB_LIB_WIDEN);
3795   masklow = HOST_WIDE_INT_M1U << logd;
3796   maskhigh = -1;
3797   temp = expand_binop (mode, ior_optab, temp,
3798                        immed_double_const (masklow, maskhigh, mode),
3799                        result, 1, OPTAB_LIB_WIDEN);
3800   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3801                        0, OPTAB_LIB_WIDEN);
3802   if (temp != result)
3803     emit_move_insn (result, temp);
3804   emit_label (label);
3805   return result;
3806 }
3807
3808 /* Expand signed division of OP0 by a power of two D in mode MODE.
3809    This routine is only called for positive values of D.  */
3810
3811 static rtx
3812 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3813 {
3814   rtx temp, label;
3815   int logd;
3816
3817   logd = floor_log2 (d);
3818
3819   if (d == 2
3820       && BRANCH_COST (optimize_insn_for_speed_p (),
3821                       false) >= 1)
3822     {
3823       temp = gen_reg_rtx (mode);
3824       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3825       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3826                            0, OPTAB_LIB_WIDEN);
3827       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3828     }
3829
3830 #ifdef HAVE_conditional_move
3831   if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3832       >= 2)
3833     {
3834       rtx temp2;
3835
3836       start_sequence ();
3837       temp2 = copy_to_mode_reg (mode, op0);
3838       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3839                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3840       temp = force_reg (mode, temp);
3841
3842       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3843       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3844                                      mode, temp, temp2, mode, 0);
3845       if (temp2)
3846         {
3847           rtx seq = get_insns ();
3848           end_sequence ();
3849           emit_insn (seq);
3850           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3851         }
3852       end_sequence ();
3853     }
3854 #endif
3855
3856   if (BRANCH_COST (optimize_insn_for_speed_p (),
3857                    false) >= 2)
3858     {
3859       int ushift = GET_MODE_BITSIZE (mode) - logd;
3860
3861       temp = gen_reg_rtx (mode);
3862       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3863       if (shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3864           > COSTS_N_INSNS (1))
3865         temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3866                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3867       else
3868         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3869                              ushift, NULL_RTX, 1);
3870       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3871                            0, OPTAB_LIB_WIDEN);
3872       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3873     }
3874
3875   label = gen_label_rtx ();
3876   temp = copy_to_mode_reg (mode, op0);
3877   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3878   expand_inc (temp, gen_int_mode (d - 1, mode));
3879   emit_label (label);
3880   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3881 }
3882 \f
3883 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3884    if that is convenient, and returning where the result is.
3885    You may request either the quotient or the remainder as the result;
3886    specify REM_FLAG nonzero to get the remainder.
3887
3888    CODE is the expression code for which kind of division this is;
3889    it controls how rounding is done.  MODE is the machine mode to use.
3890    UNSIGNEDP nonzero means do unsigned division.  */
3891
3892 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3893    and then correct it by or'ing in missing high bits
3894    if result of ANDI is nonzero.
3895    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3896    This could optimize to a bfexts instruction.
3897    But C doesn't use these operations, so their optimizations are
3898    left for later.  */
3899 /* ??? For modulo, we don't actually need the highpart of the first product,
3900    the low part will do nicely.  And for small divisors, the second multiply
3901    can also be a low-part only multiply or even be completely left out.
3902    E.g. to calculate the remainder of a division by 3 with a 32 bit
3903    multiply, multiply with 0x55555556 and extract the upper two bits;
3904    the result is exact for inputs up to 0x1fffffff.
3905    The input range can be reduced by using cross-sum rules.
3906    For odd divisors >= 3, the following table gives right shift counts
3907    so that if a number is shifted by an integer multiple of the given
3908    amount, the remainder stays the same:
3909    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3910    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3911    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3912    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3913    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3914
3915    Cross-sum rules for even numbers can be derived by leaving as many bits
3916    to the right alone as the divisor has zeros to the right.
3917    E.g. if x is an unsigned 32 bit number:
3918    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3919    */
3920
3921 rtx
3922 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3923                rtx op0, rtx op1, rtx target, int unsignedp)
3924 {
3925   enum machine_mode compute_mode;
3926   rtx tquotient;
3927   rtx quotient = 0, remainder = 0;
3928   rtx last;
3929   int size;
3930   rtx insn;
3931   optab optab1, optab2;
3932   int op1_is_constant, op1_is_pow2 = 0;
3933   int max_cost, extra_cost;
3934   static HOST_WIDE_INT last_div_const = 0;
3935   bool speed = optimize_insn_for_speed_p ();
3936
3937   op1_is_constant = CONST_INT_P (op1);
3938   if (op1_is_constant)
3939     {
3940       unsigned HOST_WIDE_INT ext_op1 = UINTVAL (op1);
3941       if (unsignedp)
3942         ext_op1 &= GET_MODE_MASK (mode);
3943       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3944                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3945     }
3946
3947   /*
3948      This is the structure of expand_divmod:
3949
3950      First comes code to fix up the operands so we can perform the operations
3951      correctly and efficiently.
3952
3953      Second comes a switch statement with code specific for each rounding mode.
3954      For some special operands this code emits all RTL for the desired
3955      operation, for other cases, it generates only a quotient and stores it in
3956      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3957      to indicate that it has not done anything.
3958
3959      Last comes code that finishes the operation.  If QUOTIENT is set and
3960      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3961      QUOTIENT is not set, it is computed using trunc rounding.
3962
3963      We try to generate special code for division and remainder when OP1 is a
3964      constant.  If |OP1| = 2**n we can use shifts and some other fast
3965      operations.  For other values of OP1, we compute a carefully selected
3966      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3967      by m.
3968
3969      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3970      half of the product.  Different strategies for generating the product are
3971      implemented in expmed_mult_highpart.
3972
3973      If what we actually want is the remainder, we generate that by another
3974      by-constant multiplication and a subtraction.  */
3975
3976   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3977      code below will malfunction if we are, so check here and handle
3978      the special case if so.  */
3979   if (op1 == const1_rtx)
3980     return rem_flag ? const0_rtx : op0;
3981
3982     /* When dividing by -1, we could get an overflow.
3983      negv_optab can handle overflows.  */
3984   if (! unsignedp && op1 == constm1_rtx)
3985     {
3986       if (rem_flag)
3987         return const0_rtx;
3988       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
3989                           ? negv_optab : neg_optab, op0, target, 0);
3990     }
3991
3992   if (target
3993       /* Don't use the function value register as a target
3994          since we have to read it as well as write it,
3995          and function-inlining gets confused by this.  */
3996       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3997           /* Don't clobber an operand while doing a multi-step calculation.  */
3998           || ((rem_flag || op1_is_constant)
3999               && (reg_mentioned_p (target, op0)
4000                   || (MEM_P (op0) && MEM_P (target))))
4001           || reg_mentioned_p (target, op1)
4002           || (MEM_P (op1) && MEM_P (target))))
4003     target = 0;
4004
4005   /* Get the mode in which to perform this computation.  Normally it will
4006      be MODE, but sometimes we can't do the desired operation in MODE.
4007      If so, pick a wider mode in which we can do the operation.  Convert
4008      to that mode at the start to avoid repeated conversions.
4009
4010      First see what operations we need.  These depend on the expression
4011      we are evaluating.  (We assume that divxx3 insns exist under the
4012      same conditions that modxx3 insns and that these insns don't normally
4013      fail.  If these assumptions are not correct, we may generate less
4014      efficient code in some cases.)
4015
4016      Then see if we find a mode in which we can open-code that operation
4017      (either a division, modulus, or shift).  Finally, check for the smallest
4018      mode for which we can do the operation with a library call.  */
4019
4020   /* We might want to refine this now that we have division-by-constant
4021      optimization.  Since expmed_mult_highpart tries so many variants, it is
4022      not straightforward to generalize this.  Maybe we should make an array
4023      of possible modes in init_expmed?  Save this for GCC 2.7.  */
4024
4025   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
4026             ? (unsignedp ? lshr_optab : ashr_optab)
4027             : (unsignedp ? udiv_optab : sdiv_optab));
4028   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
4029             ? optab1
4030             : (unsignedp ? udivmod_optab : sdivmod_optab));
4031
4032   for (compute_mode = mode; compute_mode != VOIDmode;
4033        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4034     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4035         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4036       break;
4037
4038   if (compute_mode == VOIDmode)
4039     for (compute_mode = mode; compute_mode != VOIDmode;
4040          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4041       if (optab_libfunc (optab1, compute_mode)
4042           || optab_libfunc (optab2, compute_mode))
4043         break;
4044
4045   /* If we still couldn't find a mode, use MODE, but expand_binop will
4046      probably die.  */
4047   if (compute_mode == VOIDmode)
4048     compute_mode = mode;
4049
4050   if (target && GET_MODE (target) == compute_mode)
4051     tquotient = target;
4052   else
4053     tquotient = gen_reg_rtx (compute_mode);
4054
4055   size = GET_MODE_BITSIZE (compute_mode);
4056 #if 0
4057   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4058      (mode), and thereby get better code when OP1 is a constant.  Do that
4059      later.  It will require going over all usages of SIZE below.  */
4060   size = GET_MODE_BITSIZE (mode);
4061 #endif
4062
4063   /* Only deduct something for a REM if the last divide done was
4064      for a different constant.   Then set the constant of the last
4065      divide.  */
4066   max_cost = (unsignedp
4067               ? udiv_cost (speed, compute_mode)
4068               : sdiv_cost (speed, compute_mode));
4069   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4070                      && INTVAL (op1) == last_div_const))
4071     max_cost -= (mul_cost (speed, compute_mode)
4072                  + add_cost (speed, compute_mode));
4073
4074   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4075
4076   /* Now convert to the best mode to use.  */
4077   if (compute_mode != mode)
4078     {
4079       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4080       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4081
4082       /* convert_modes may have placed op1 into a register, so we
4083          must recompute the following.  */
4084       op1_is_constant = CONST_INT_P (op1);
4085       op1_is_pow2 = (op1_is_constant
4086                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4087                           || (! unsignedp
4088                               && EXACT_POWER_OF_2_OR_ZERO_P (-UINTVAL (op1))))));
4089     }
4090
4091   /* If one of the operands is a volatile MEM, copy it into a register.  */
4092
4093   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4094     op0 = force_reg (compute_mode, op0);
4095   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4096     op1 = force_reg (compute_mode, op1);
4097
4098   /* If we need the remainder or if OP1 is constant, we need to
4099      put OP0 in a register in case it has any queued subexpressions.  */
4100   if (rem_flag || op1_is_constant)
4101     op0 = force_reg (compute_mode, op0);
4102
4103   last = get_last_insn ();
4104
4105   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4106   if (unsignedp)
4107     {
4108       if (code == FLOOR_DIV_EXPR)
4109         code = TRUNC_DIV_EXPR;
4110       if (code == FLOOR_MOD_EXPR)
4111         code = TRUNC_MOD_EXPR;
4112       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4113         code = TRUNC_DIV_EXPR;
4114     }
4115
4116   if (op1 != const0_rtx)
4117     switch (code)
4118       {
4119       case TRUNC_MOD_EXPR:
4120       case TRUNC_DIV_EXPR:
4121         if (op1_is_constant)
4122           {
4123             if (unsignedp)
4124               {
4125                 unsigned HOST_WIDE_INT mh, ml;
4126                 int pre_shift, post_shift;
4127                 int dummy;
4128                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4129                                             & GET_MODE_MASK (compute_mode));
4130
4131                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4132                   {
4133                     pre_shift = floor_log2 (d);
4134                     if (rem_flag)
4135                       {
4136                         unsigned HOST_WIDE_INT mask
4137                           = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4138                         remainder
4139                           = expand_binop (compute_mode, and_optab, op0,
4140                                           gen_int_mode (mask, compute_mode),
4141                                           remainder, 1,
4142                                           OPTAB_LIB_WIDEN);
4143                         if (remainder)
4144                           return gen_lowpart (mode, remainder);
4145                       }
4146                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4147                                              pre_shift, tquotient, 1);
4148                   }
4149                 else if (size <= HOST_BITS_PER_WIDE_INT)
4150                   {
4151                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4152                       {
4153                         /* Most significant bit of divisor is set; emit an scc
4154                            insn.  */
4155                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4156                                                           compute_mode, 1, 1);
4157                       }
4158                     else
4159                       {
4160                         /* Find a suitable multiplier and right shift count
4161                            instead of multiplying with D.  */
4162
4163                         mh = choose_multiplier (d, size, size,
4164                                                 &ml, &post_shift, &dummy);
4165
4166                         /* If the suggested multiplier is more than SIZE bits,
4167                            we can do better for even divisors, using an
4168                            initial right shift.  */
4169                         if (mh != 0 && (d & 1) == 0)
4170                           {
4171                             pre_shift = floor_log2 (d & -d);
4172                             mh = choose_multiplier (d >> pre_shift, size,
4173                                                     size - pre_shift,
4174                                                     &ml, &post_shift, &dummy);
4175                             gcc_assert (!mh);
4176                           }
4177                         else
4178                           pre_shift = 0;
4179
4180                         if (mh != 0)
4181                           {
4182                             rtx t1, t2, t3, t4;
4183
4184                             if (post_shift - 1 >= BITS_PER_WORD)
4185                               goto fail1;
4186
4187                             extra_cost
4188                               = (shift_cost (speed, compute_mode, post_shift - 1)
4189                                  + shift_cost (speed, compute_mode, 1)
4190                                  + 2 * add_cost (speed, compute_mode));
4191                             t1 = expmed_mult_highpart
4192                               (compute_mode, op0,
4193                                gen_int_mode (ml, compute_mode),
4194                                NULL_RTX, 1, max_cost - extra_cost);
4195                             if (t1 == 0)
4196                               goto fail1;
4197                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4198                                                                op0, t1),
4199                                                 NULL_RTX);
4200                             t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4201                                                t2, 1, NULL_RTX, 1);
4202                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4203                                                               t1, t3),
4204                                                 NULL_RTX);
4205                             quotient = expand_shift
4206                               (RSHIFT_EXPR, compute_mode, t4,
4207                                post_shift - 1, tquotient, 1);
4208                           }
4209                         else
4210                           {
4211                             rtx t1, t2;
4212
4213                             if (pre_shift >= BITS_PER_WORD
4214                                 || post_shift >= BITS_PER_WORD)
4215                               goto fail1;
4216
4217                             t1 = expand_shift
4218                               (RSHIFT_EXPR, compute_mode, op0,
4219                                pre_shift, NULL_RTX, 1);
4220                             extra_cost
4221                               = (shift_cost (speed, compute_mode, pre_shift)
4222                                  + shift_cost (speed, compute_mode, post_shift));
4223                             t2 = expmed_mult_highpart
4224                               (compute_mode, t1,
4225                                gen_int_mode (ml, compute_mode),
4226                                NULL_RTX, 1, max_cost - extra_cost);
4227                             if (t2 == 0)
4228                               goto fail1;
4229                             quotient = expand_shift
4230                               (RSHIFT_EXPR, compute_mode, t2,
4231                                post_shift, tquotient, 1);
4232                           }
4233                       }
4234                   }
4235                 else            /* Too wide mode to use tricky code */
4236                   break;
4237
4238                 insn = get_last_insn ();
4239                 if (insn != last)
4240                   set_dst_reg_note (insn, REG_EQUAL,
4241                                     gen_rtx_UDIV (compute_mode, op0, op1),
4242                                     quotient);
4243               }
4244             else                /* TRUNC_DIV, signed */
4245               {
4246                 unsigned HOST_WIDE_INT ml;
4247                 int lgup, post_shift;
4248                 rtx mlr;
4249                 HOST_WIDE_INT d = INTVAL (op1);
4250                 unsigned HOST_WIDE_INT abs_d;
4251
4252                 /* Since d might be INT_MIN, we have to cast to
4253                    unsigned HOST_WIDE_INT before negating to avoid
4254                    undefined signed overflow.  */
4255                 abs_d = (d >= 0
4256                          ? (unsigned HOST_WIDE_INT) d
4257                          : - (unsigned HOST_WIDE_INT) d);
4258
4259                 /* n rem d = n rem -d */
4260                 if (rem_flag && d < 0)
4261                   {
4262                     d = abs_d;
4263                     op1 = gen_int_mode (abs_d, compute_mode);
4264                   }
4265
4266                 if (d == 1)
4267                   quotient = op0;
4268                 else if (d == -1)
4269                   quotient = expand_unop (compute_mode, neg_optab, op0,
4270                                           tquotient, 0);
4271                 else if (HOST_BITS_PER_WIDE_INT >= size
4272                          && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4273                   {
4274                     /* This case is not handled correctly below.  */
4275                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4276                                                 compute_mode, 1, 1);
4277                     if (quotient == 0)
4278                       goto fail1;
4279                   }
4280                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4281                          && (rem_flag
4282                              ? smod_pow2_cheap (speed, compute_mode)
4283                              : sdiv_pow2_cheap (speed, compute_mode))
4284                          /* We assume that cheap metric is true if the
4285                             optab has an expander for this mode.  */
4286                          && ((optab_handler ((rem_flag ? smod_optab
4287                                               : sdiv_optab),
4288                                              compute_mode)
4289                               != CODE_FOR_nothing)
4290                              || (optab_handler (sdivmod_optab,
4291                                                 compute_mode)
4292                                  != CODE_FOR_nothing)))
4293                   ;
4294                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4295                   {
4296                     if (rem_flag)
4297                       {
4298                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4299                         if (remainder)
4300                           return gen_lowpart (mode, remainder);
4301                       }
4302
4303                     if (sdiv_pow2_cheap (speed, compute_mode)
4304                         && ((optab_handler (sdiv_optab, compute_mode)
4305                              != CODE_FOR_nothing)
4306                             || (optab_handler (sdivmod_optab, compute_mode)
4307                                 != CODE_FOR_nothing)))
4308                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4309                                                 compute_mode, op0,
4310                                                 gen_int_mode (abs_d,
4311                                                               compute_mode),
4312                                                 NULL_RTX, 0);
4313                     else
4314                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4315
4316                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4317                        negate the quotient.  */
4318                     if (d < 0)
4319                       {
4320                         insn = get_last_insn ();
4321                         if (insn != last
4322                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4323                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4324                           set_dst_reg_note (insn, REG_EQUAL,
4325                                             gen_rtx_DIV (compute_mode, op0,
4326                                                          gen_int_mode
4327                                                            (abs_d,
4328                                                             compute_mode)),
4329                                             quotient);
4330
4331                         quotient = expand_unop (compute_mode, neg_optab,
4332                                                 quotient, quotient, 0);
4333                       }
4334                   }
4335                 else if (size <= HOST_BITS_PER_WIDE_INT)
4336                   {
4337                     choose_multiplier (abs_d, size, size - 1,
4338                                        &ml, &post_shift, &lgup);
4339                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4340                       {
4341                         rtx t1, t2, t3;
4342
4343                         if (post_shift >= BITS_PER_WORD
4344                             || size - 1 >= BITS_PER_WORD)
4345                           goto fail1;
4346
4347                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4348                                       + shift_cost (speed, compute_mode, size - 1)
4349                                       + add_cost (speed, compute_mode));
4350                         t1 = expmed_mult_highpart
4351                           (compute_mode, op0, gen_int_mode (ml, compute_mode),
4352                            NULL_RTX, 0, max_cost - extra_cost);
4353                         if (t1 == 0)
4354                           goto fail1;
4355                         t2 = expand_shift
4356                           (RSHIFT_EXPR, compute_mode, t1,
4357                            post_shift, NULL_RTX, 0);
4358                         t3 = expand_shift
4359                           (RSHIFT_EXPR, compute_mode, op0,
4360                            size - 1, NULL_RTX, 0);
4361                         if (d < 0)
4362                           quotient
4363                             = force_operand (gen_rtx_MINUS (compute_mode,
4364                                                             t3, t2),
4365                                              tquotient);
4366                         else
4367                           quotient
4368                             = force_operand (gen_rtx_MINUS (compute_mode,
4369                                                             t2, t3),
4370                                              tquotient);
4371                       }
4372                     else
4373                       {
4374                         rtx t1, t2, t3, t4;
4375
4376                         if (post_shift >= BITS_PER_WORD
4377                             || size - 1 >= BITS_PER_WORD)
4378                           goto fail1;
4379
4380                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4381                         mlr = gen_int_mode (ml, compute_mode);
4382                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4383                                       + shift_cost (speed, compute_mode, size - 1)
4384                                       + 2 * add_cost (speed, compute_mode));
4385                         t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4386                                                    NULL_RTX, 0,
4387                                                    max_cost - extra_cost);
4388                         if (t1 == 0)
4389                           goto fail1;
4390                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4391                                                           t1, op0),
4392                                             NULL_RTX);
4393                         t3 = expand_shift
4394                           (RSHIFT_EXPR, compute_mode, t2,
4395                            post_shift, NULL_RTX, 0);
4396                         t4 = expand_shift
4397                           (RSHIFT_EXPR, compute_mode, op0,
4398                            size - 1, NULL_RTX, 0);
4399                         if (d < 0)
4400                           quotient
4401                             = force_operand (gen_rtx_MINUS (compute_mode,
4402                                                             t4, t3),
4403                                              tquotient);
4404                         else
4405                           quotient
4406                             = force_operand (gen_rtx_MINUS (compute_mode,
4407                                                             t3, t4),
4408                                              tquotient);
4409                       }
4410                   }
4411                 else            /* Too wide mode to use tricky code */
4412                   break;
4413
4414                 insn = get_last_insn ();
4415                 if (insn != last)
4416                   set_dst_reg_note (insn, REG_EQUAL,
4417                                     gen_rtx_DIV (compute_mode, op0, op1),
4418                                     quotient);
4419               }
4420             break;
4421           }
4422       fail1:
4423         delete_insns_since (last);
4424         break;
4425
4426       case FLOOR_DIV_EXPR:
4427       case FLOOR_MOD_EXPR:
4428       /* We will come here only for signed operations.  */
4429         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4430           {
4431             unsigned HOST_WIDE_INT mh, ml;
4432             int pre_shift, lgup, post_shift;
4433             HOST_WIDE_INT d = INTVAL (op1);
4434
4435             if (d > 0)
4436               {
4437                 /* We could just as easily deal with negative constants here,
4438                    but it does not seem worth the trouble for GCC 2.6.  */
4439                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4440                   {
4441                     pre_shift = floor_log2 (d);
4442                     if (rem_flag)
4443                       {
4444                         unsigned HOST_WIDE_INT mask
4445                           = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4446                         remainder = expand_binop
4447                           (compute_mode, and_optab, op0,
4448                            gen_int_mode (mask, compute_mode),
4449                            remainder, 0, OPTAB_LIB_WIDEN);
4450                         if (remainder)
4451                           return gen_lowpart (mode, remainder);
4452                       }
4453                     quotient = expand_shift
4454                       (RSHIFT_EXPR, compute_mode, op0,
4455                        pre_shift, tquotient, 0);
4456                   }
4457                 else
4458                   {
4459                     rtx t1, t2, t3, t4;
4460
4461                     mh = choose_multiplier (d, size, size - 1,
4462                                             &ml, &post_shift, &lgup);
4463                     gcc_assert (!mh);
4464
4465                     if (post_shift < BITS_PER_WORD
4466                         && size - 1 < BITS_PER_WORD)
4467                       {
4468                         t1 = expand_shift
4469                           (RSHIFT_EXPR, compute_mode, op0,
4470                            size - 1, NULL_RTX, 0);
4471                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4472                                            NULL_RTX, 0, OPTAB_WIDEN);
4473                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4474                                       + shift_cost (speed, compute_mode, size - 1)
4475                                       + 2 * add_cost (speed, compute_mode));
4476                         t3 = expmed_mult_highpart
4477                           (compute_mode, t2, gen_int_mode (ml, compute_mode),
4478                            NULL_RTX, 1, max_cost - extra_cost);
4479                         if (t3 != 0)
4480                           {
4481                             t4 = expand_shift
4482                               (RSHIFT_EXPR, compute_mode, t3,
4483                                post_shift, NULL_RTX, 1);
4484                             quotient = expand_binop (compute_mode, xor_optab,
4485                                                      t4, t1, tquotient, 0,
4486                                                      OPTAB_WIDEN);
4487                           }
4488                       }
4489                   }
4490               }
4491             else
4492               {
4493                 rtx nsign, t1, t2, t3, t4;
4494                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4495                                                   op0, constm1_rtx), NULL_RTX);
4496                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4497                                    0, OPTAB_WIDEN);
4498                 nsign = expand_shift
4499                   (RSHIFT_EXPR, compute_mode, t2,
4500                    size - 1, NULL_RTX, 0);
4501                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4502                                     NULL_RTX);
4503                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4504                                     NULL_RTX, 0);
4505                 if (t4)
4506                   {
4507                     rtx t5;
4508                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4509                                       NULL_RTX, 0);
4510                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4511                                                             t4, t5),
4512                                               tquotient);
4513                   }
4514               }
4515           }
4516
4517         if (quotient != 0)
4518           break;
4519         delete_insns_since (last);
4520
4521         /* Try using an instruction that produces both the quotient and
4522            remainder, using truncation.  We can easily compensate the quotient
4523            or remainder to get floor rounding, once we have the remainder.
4524            Notice that we compute also the final remainder value here,
4525            and return the result right away.  */
4526         if (target == 0 || GET_MODE (target) != compute_mode)
4527           target = gen_reg_rtx (compute_mode);
4528
4529         if (rem_flag)
4530           {
4531             remainder
4532               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4533             quotient = gen_reg_rtx (compute_mode);
4534           }
4535         else
4536           {
4537             quotient
4538               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4539             remainder = gen_reg_rtx (compute_mode);
4540           }
4541
4542         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4543                                  quotient, remainder, 0))
4544           {
4545             /* This could be computed with a branch-less sequence.
4546                Save that for later.  */
4547             rtx tem;
4548             rtx label = gen_label_rtx ();
4549             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4550             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4551                                 NULL_RTX, 0, OPTAB_WIDEN);
4552             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4553             expand_dec (quotient, const1_rtx);
4554             expand_inc (remainder, op1);
4555             emit_label (label);
4556             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4557           }
4558
4559         /* No luck with division elimination or divmod.  Have to do it
4560            by conditionally adjusting op0 *and* the result.  */
4561         {
4562           rtx label1, label2, label3, label4, label5;
4563           rtx adjusted_op0;
4564           rtx tem;
4565
4566           quotient = gen_reg_rtx (compute_mode);
4567           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4568           label1 = gen_label_rtx ();
4569           label2 = gen_label_rtx ();
4570           label3 = gen_label_rtx ();
4571           label4 = gen_label_rtx ();
4572           label5 = gen_label_rtx ();
4573           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4574           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4575           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4576                               quotient, 0, OPTAB_LIB_WIDEN);
4577           if (tem != quotient)
4578             emit_move_insn (quotient, tem);
4579           emit_jump_insn (gen_jump (label5));
4580           emit_barrier ();
4581           emit_label (label1);
4582           expand_inc (adjusted_op0, const1_rtx);
4583           emit_jump_insn (gen_jump (label4));
4584           emit_barrier ();
4585           emit_label (label2);
4586           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4587           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4588                               quotient, 0, OPTAB_LIB_WIDEN);
4589           if (tem != quotient)
4590             emit_move_insn (quotient, tem);
4591           emit_jump_insn (gen_jump (label5));
4592           emit_barrier ();
4593           emit_label (label3);
4594           expand_dec (adjusted_op0, const1_rtx);
4595           emit_label (label4);
4596           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4597                               quotient, 0, OPTAB_LIB_WIDEN);
4598           if (tem != quotient)
4599             emit_move_insn (quotient, tem);
4600           expand_dec (quotient, const1_rtx);
4601           emit_label (label5);
4602         }
4603         break;
4604
4605       case CEIL_DIV_EXPR:
4606       case CEIL_MOD_EXPR:
4607         if (unsignedp)
4608           {
4609             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4610               {
4611                 rtx t1, t2, t3;
4612                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4613                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4614                                    floor_log2 (d), tquotient, 1);
4615                 t2 = expand_binop (compute_mode, and_optab, op0,
4616                                    gen_int_mode (d - 1, compute_mode),
4617                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4618                 t3 = gen_reg_rtx (compute_mode);
4619                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4620                                       compute_mode, 1, 1);
4621                 if (t3 == 0)
4622                   {
4623                     rtx lab;
4624                     lab = gen_label_rtx ();
4625                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4626                     expand_inc (t1, const1_rtx);
4627                     emit_label (lab);
4628                     quotient = t1;
4629                   }
4630                 else
4631                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4632                                                           t1, t3),
4633                                             tquotient);
4634                 break;
4635               }
4636
4637             /* Try using an instruction that produces both the quotient and
4638                remainder, using truncation.  We can easily compensate the
4639                quotient or remainder to get ceiling rounding, once we have the
4640                remainder.  Notice that we compute also the final remainder
4641                value here, and return the result right away.  */
4642             if (target == 0 || GET_MODE (target) != compute_mode)
4643               target = gen_reg_rtx (compute_mode);
4644
4645             if (rem_flag)
4646               {
4647                 remainder = (REG_P (target)
4648                              ? target : gen_reg_rtx (compute_mode));
4649                 quotient = gen_reg_rtx (compute_mode);
4650               }
4651             else
4652               {
4653                 quotient = (REG_P (target)
4654                             ? target : gen_reg_rtx (compute_mode));
4655                 remainder = gen_reg_rtx (compute_mode);
4656               }
4657
4658             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4659                                      remainder, 1))
4660               {
4661                 /* This could be computed with a branch-less sequence.
4662                    Save that for later.  */
4663                 rtx label = gen_label_rtx ();
4664                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4665                                  compute_mode, label);
4666                 expand_inc (quotient, const1_rtx);
4667                 expand_dec (remainder, op1);
4668                 emit_label (label);
4669                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4670               }
4671
4672             /* No luck with division elimination or divmod.  Have to do it
4673                by conditionally adjusting op0 *and* the result.  */
4674             {
4675               rtx label1, label2;
4676               rtx adjusted_op0, tem;
4677
4678               quotient = gen_reg_rtx (compute_mode);
4679               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4680               label1 = gen_label_rtx ();
4681               label2 = gen_label_rtx ();
4682               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4683                                compute_mode, label1);
4684               emit_move_insn  (quotient, const0_rtx);
4685               emit_jump_insn (gen_jump (label2));
4686               emit_barrier ();
4687               emit_label (label1);
4688               expand_dec (adjusted_op0, const1_rtx);
4689               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4690                                   quotient, 1, OPTAB_LIB_WIDEN);
4691               if (tem != quotient)
4692                 emit_move_insn (quotient, tem);
4693               expand_inc (quotient, const1_rtx);
4694               emit_label (label2);
4695             }
4696           }
4697         else /* signed */
4698           {
4699             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4700                 && INTVAL (op1) >= 0)
4701               {
4702                 /* This is extremely similar to the code for the unsigned case
4703                    above.  For 2.7 we should merge these variants, but for
4704                    2.6.1 I don't want to touch the code for unsigned since that
4705                    get used in C.  The signed case will only be used by other
4706                    languages (Ada).  */
4707
4708                 rtx t1, t2, t3;
4709                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4710                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4711                                    floor_log2 (d), tquotient, 0);
4712                 t2 = expand_binop (compute_mode, and_optab, op0,
4713                                    gen_int_mode (d - 1, compute_mode),
4714                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4715                 t3 = gen_reg_rtx (compute_mode);
4716                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4717                                       compute_mode, 1, 1);
4718                 if (t3 == 0)
4719                   {
4720                     rtx lab;
4721                     lab = gen_label_rtx ();
4722                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4723                     expand_inc (t1, const1_rtx);
4724                     emit_label (lab);
4725                     quotient = t1;
4726                   }
4727                 else
4728                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4729                                                           t1, t3),
4730                                             tquotient);
4731                 break;
4732               }
4733
4734             /* Try using an instruction that produces both the quotient and
4735                remainder, using truncation.  We can easily compensate the
4736                quotient or remainder to get ceiling rounding, once we have the
4737                remainder.  Notice that we compute also the final remainder
4738                value here, and return the result right away.  */
4739             if (target == 0 || GET_MODE (target) != compute_mode)
4740               target = gen_reg_rtx (compute_mode);
4741             if (rem_flag)
4742               {
4743                 remainder= (REG_P (target)
4744                             ? target : gen_reg_rtx (compute_mode));
4745                 quotient = gen_reg_rtx (compute_mode);
4746               }
4747             else
4748               {
4749                 quotient = (REG_P (target)
4750                             ? target : gen_reg_rtx (compute_mode));
4751                 remainder = gen_reg_rtx (compute_mode);
4752               }
4753
4754             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4755                                      remainder, 0))
4756               {
4757                 /* This could be computed with a branch-less sequence.
4758                    Save that for later.  */
4759                 rtx tem;
4760                 rtx label = gen_label_rtx ();
4761                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4762                                  compute_mode, label);
4763                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4764                                     NULL_RTX, 0, OPTAB_WIDEN);
4765                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4766                 expand_inc (quotient, const1_rtx);
4767                 expand_dec (remainder, op1);
4768                 emit_label (label);
4769                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4770               }
4771
4772             /* No luck with division elimination or divmod.  Have to do it
4773                by conditionally adjusting op0 *and* the result.  */
4774             {
4775               rtx label1, label2, label3, label4, label5;
4776               rtx adjusted_op0;
4777               rtx tem;
4778
4779               quotient = gen_reg_rtx (compute_mode);
4780               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4781               label1 = gen_label_rtx ();
4782               label2 = gen_label_rtx ();
4783               label3 = gen_label_rtx ();
4784               label4 = gen_label_rtx ();
4785               label5 = gen_label_rtx ();
4786               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4787               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4788                                compute_mode, label1);
4789               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4790                                   quotient, 0, OPTAB_LIB_WIDEN);
4791               if (tem != quotient)
4792                 emit_move_insn (quotient, tem);
4793               emit_jump_insn (gen_jump (label5));
4794               emit_barrier ();
4795               emit_label (label1);
4796               expand_dec (adjusted_op0, const1_rtx);
4797               emit_jump_insn (gen_jump (label4));
4798               emit_barrier ();
4799               emit_label (label2);
4800               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4801                                compute_mode, label3);
4802               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4803                                   quotient, 0, OPTAB_LIB_WIDEN);
4804               if (tem != quotient)
4805                 emit_move_insn (quotient, tem);
4806               emit_jump_insn (gen_jump (label5));
4807               emit_barrier ();
4808               emit_label (label3);
4809               expand_inc (adjusted_op0, const1_rtx);
4810               emit_label (label4);
4811               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4812                                   quotient, 0, OPTAB_LIB_WIDEN);
4813               if (tem != quotient)
4814                 emit_move_insn (quotient, tem);
4815               expand_inc (quotient, const1_rtx);
4816               emit_label (label5);
4817             }
4818           }
4819         break;
4820
4821       case EXACT_DIV_EXPR:
4822         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4823           {
4824             HOST_WIDE_INT d = INTVAL (op1);
4825             unsigned HOST_WIDE_INT ml;
4826             int pre_shift;
4827             rtx t1;
4828
4829             pre_shift = floor_log2 (d & -d);
4830             ml = invert_mod2n (d >> pre_shift, size);
4831             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4832                                pre_shift, NULL_RTX, unsignedp);
4833             quotient = expand_mult (compute_mode, t1,
4834                                     gen_int_mode (ml, compute_mode),
4835                                     NULL_RTX, 1);
4836
4837             insn = get_last_insn ();
4838             set_dst_reg_note (insn, REG_EQUAL,
4839                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4840                                               compute_mode, op0, op1),
4841                               quotient);
4842           }
4843         break;
4844
4845       case ROUND_DIV_EXPR:
4846       case ROUND_MOD_EXPR:
4847         if (unsignedp)
4848           {
4849             rtx tem;
4850             rtx label;
4851             label = gen_label_rtx ();
4852             quotient = gen_reg_rtx (compute_mode);
4853             remainder = gen_reg_rtx (compute_mode);
4854             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4855               {
4856                 rtx tem;
4857                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4858                                          quotient, 1, OPTAB_LIB_WIDEN);
4859                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4860                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4861                                           remainder, 1, OPTAB_LIB_WIDEN);
4862               }
4863             tem = plus_constant (compute_mode, op1, -1);
4864             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4865             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4866             expand_inc (quotient, const1_rtx);
4867             expand_dec (remainder, op1);
4868             emit_label (label);
4869           }
4870         else
4871           {
4872             rtx abs_rem, abs_op1, tem, mask;
4873             rtx label;
4874             label = gen_label_rtx ();
4875             quotient = gen_reg_rtx (compute_mode);
4876             remainder = gen_reg_rtx (compute_mode);
4877             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4878               {
4879                 rtx tem;
4880                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4881                                          quotient, 0, OPTAB_LIB_WIDEN);
4882                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4883                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4884                                           remainder, 0, OPTAB_LIB_WIDEN);
4885               }
4886             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4887             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4888             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4889                                 1, NULL_RTX, 1);
4890             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4891             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4892                                 NULL_RTX, 0, OPTAB_WIDEN);
4893             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4894                                  size - 1, NULL_RTX, 0);
4895             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4896                                 NULL_RTX, 0, OPTAB_WIDEN);
4897             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4898                                 NULL_RTX, 0, OPTAB_WIDEN);
4899             expand_inc (quotient, tem);
4900             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4901                                 NULL_RTX, 0, OPTAB_WIDEN);
4902             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4903                                 NULL_RTX, 0, OPTAB_WIDEN);
4904             expand_dec (remainder, tem);
4905             emit_label (label);
4906           }
4907         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4908
4909       default:
4910         gcc_unreachable ();
4911       }
4912
4913   if (quotient == 0)
4914     {
4915       if (target && GET_MODE (target) != compute_mode)
4916         target = 0;
4917
4918       if (rem_flag)
4919         {
4920           /* Try to produce the remainder without producing the quotient.
4921              If we seem to have a divmod pattern that does not require widening,
4922              don't try widening here.  We should really have a WIDEN argument
4923              to expand_twoval_binop, since what we'd really like to do here is
4924              1) try a mod insn in compute_mode
4925              2) try a divmod insn in compute_mode
4926              3) try a div insn in compute_mode and multiply-subtract to get
4927                 remainder
4928              4) try the same things with widening allowed.  */
4929           remainder
4930             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4931                                  op0, op1, target,
4932                                  unsignedp,
4933                                  ((optab_handler (optab2, compute_mode)
4934                                    != CODE_FOR_nothing)
4935                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4936           if (remainder == 0)
4937             {
4938               /* No luck there.  Can we do remainder and divide at once
4939                  without a library call?  */
4940               remainder = gen_reg_rtx (compute_mode);
4941               if (! expand_twoval_binop ((unsignedp
4942                                           ? udivmod_optab
4943                                           : sdivmod_optab),
4944                                          op0, op1,
4945                                          NULL_RTX, remainder, unsignedp))
4946                 remainder = 0;
4947             }
4948
4949           if (remainder)
4950             return gen_lowpart (mode, remainder);
4951         }
4952
4953       /* Produce the quotient.  Try a quotient insn, but not a library call.
4954          If we have a divmod in this mode, use it in preference to widening
4955          the div (for this test we assume it will not fail). Note that optab2
4956          is set to the one of the two optabs that the call below will use.  */
4957       quotient
4958         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4959                              op0, op1, rem_flag ? NULL_RTX : target,
4960                              unsignedp,
4961                              ((optab_handler (optab2, compute_mode)
4962                                != CODE_FOR_nothing)
4963                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4964
4965       if (quotient == 0)
4966         {
4967           /* No luck there.  Try a quotient-and-remainder insn,
4968              keeping the quotient alone.  */
4969           quotient = gen_reg_rtx (compute_mode);
4970           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4971                                      op0, op1,
4972                                      quotient, NULL_RTX, unsignedp))
4973             {
4974               quotient = 0;
4975               if (! rem_flag)
4976                 /* Still no luck.  If we are not computing the remainder,
4977                    use a library call for the quotient.  */
4978                 quotient = sign_expand_binop (compute_mode,
4979                                               udiv_optab, sdiv_optab,
4980                                               op0, op1, target,
4981                                               unsignedp, OPTAB_LIB_WIDEN);
4982             }
4983         }
4984     }
4985
4986   if (rem_flag)
4987     {
4988       if (target && GET_MODE (target) != compute_mode)
4989         target = 0;
4990
4991       if (quotient == 0)
4992         {
4993           /* No divide instruction either.  Use library for remainder.  */
4994           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4995                                          op0, op1, target,
4996                                          unsignedp, OPTAB_LIB_WIDEN);
4997           /* No remainder function.  Try a quotient-and-remainder
4998              function, keeping the remainder.  */
4999           if (!remainder)
5000             {
5001               remainder = gen_reg_rtx (compute_mode);
5002               if (!expand_twoval_binop_libfunc
5003                   (unsignedp ? udivmod_optab : sdivmod_optab,
5004                    op0, op1,
5005                    NULL_RTX, remainder,
5006                    unsignedp ? UMOD : MOD))
5007                 remainder = NULL_RTX;
5008             }
5009         }
5010       else
5011         {
5012           /* We divided.  Now finish doing X - Y * (X / Y).  */
5013           remainder = expand_mult (compute_mode, quotient, op1,
5014                                    NULL_RTX, unsignedp);
5015           remainder = expand_binop (compute_mode, sub_optab, op0,
5016                                     remainder, target, unsignedp,
5017                                     OPTAB_LIB_WIDEN);
5018         }
5019     }
5020
5021   return gen_lowpart (mode, rem_flag ? remainder : quotient);
5022 }
5023 \f
5024 /* Return a tree node with data type TYPE, describing the value of X.
5025    Usually this is an VAR_DECL, if there is no obvious better choice.
5026    X may be an expression, however we only support those expressions
5027    generated by loop.c.  */
5028
5029 tree
5030 make_tree (tree type, rtx x)
5031 {
5032   tree t;
5033
5034   switch (GET_CODE (x))
5035     {
5036     case CONST_INT:
5037       {
5038         HOST_WIDE_INT hi = 0;
5039
5040         if (INTVAL (x) < 0
5041             && !(TYPE_UNSIGNED (type)
5042                  && (GET_MODE_BITSIZE (TYPE_MODE (type))
5043                      < HOST_BITS_PER_WIDE_INT)))
5044           hi = -1;
5045
5046         t = build_int_cst_wide (type, INTVAL (x), hi);
5047
5048         return t;
5049       }
5050
5051     case CONST_DOUBLE:
5052       if (GET_MODE (x) == VOIDmode)
5053         t = build_int_cst_wide (type,
5054                                 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
5055       else
5056         {
5057           REAL_VALUE_TYPE d;
5058
5059           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
5060           t = build_real (type, d);
5061         }
5062
5063       return t;
5064
5065     case CONST_VECTOR:
5066       {
5067         int units = CONST_VECTOR_NUNITS (x);
5068         tree itype = TREE_TYPE (type);
5069         tree *elts;
5070         int i;
5071
5072         /* Build a tree with vector elements.  */
5073         elts = XALLOCAVEC (tree, units);
5074         for (i = units - 1; i >= 0; --i)
5075           {
5076             rtx elt = CONST_VECTOR_ELT (x, i);
5077             elts[i] = make_tree (itype, elt);
5078           }
5079
5080         return build_vector (type, elts);
5081       }
5082
5083     case PLUS:
5084       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5085                           make_tree (type, XEXP (x, 1)));
5086
5087     case MINUS:
5088       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5089                           make_tree (type, XEXP (x, 1)));
5090
5091     case NEG:
5092       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5093
5094     case MULT:
5095       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5096                           make_tree (type, XEXP (x, 1)));
5097
5098     case ASHIFT:
5099       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5100                           make_tree (type, XEXP (x, 1)));
5101
5102     case LSHIFTRT:
5103       t = unsigned_type_for (type);
5104       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5105                                          make_tree (t, XEXP (x, 0)),
5106                                          make_tree (type, XEXP (x, 1))));
5107
5108     case ASHIFTRT:
5109       t = signed_type_for (type);
5110       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5111                                          make_tree (t, XEXP (x, 0)),
5112                                          make_tree (type, XEXP (x, 1))));
5113
5114     case DIV:
5115       if (TREE_CODE (type) != REAL_TYPE)
5116         t = signed_type_for (type);
5117       else
5118         t = type;
5119
5120       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5121                                          make_tree (t, XEXP (x, 0)),
5122                                          make_tree (t, XEXP (x, 1))));
5123     case UDIV:
5124       t = unsigned_type_for (type);
5125       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5126                                          make_tree (t, XEXP (x, 0)),
5127                                          make_tree (t, XEXP (x, 1))));
5128
5129     case SIGN_EXTEND:
5130     case ZERO_EXTEND:
5131       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5132                                           GET_CODE (x) == ZERO_EXTEND);
5133       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5134
5135     case CONST:
5136       return make_tree (type, XEXP (x, 0));
5137
5138     case SYMBOL_REF:
5139       t = SYMBOL_REF_DECL (x);
5140       if (t)
5141         return fold_convert (type, build_fold_addr_expr (t));
5142       /* else fall through.  */
5143
5144     default:
5145       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5146
5147       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5148          address mode to pointer mode.  */
5149       if (POINTER_TYPE_P (type))
5150         x = convert_memory_address_addr_space
5151               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5152
5153       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5154          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5155       t->decl_with_rtl.rtl = x;
5156
5157       return t;
5158     }
5159 }
5160 \f
5161 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5162    and returning TARGET.
5163
5164    If TARGET is 0, a pseudo-register or constant is returned.  */
5165
5166 rtx
5167 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5168 {
5169   rtx tem = 0;
5170
5171   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5172     tem = simplify_binary_operation (AND, mode, op0, op1);
5173   if (tem == 0)
5174     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5175
5176   if (target == 0)
5177     target = tem;
5178   else if (tem != target)
5179     emit_move_insn (target, tem);
5180   return target;
5181 }
5182
5183 /* Helper function for emit_store_flag.  */
5184 static rtx
5185 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5186              enum machine_mode mode, enum machine_mode compare_mode,
5187              int unsignedp, rtx x, rtx y, int normalizep,
5188              enum machine_mode target_mode)
5189 {
5190   struct expand_operand ops[4];
5191   rtx op0, last, comparison, subtarget;
5192   enum machine_mode result_mode = targetm.cstore_mode (icode);
5193
5194   last = get_last_insn ();
5195   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5196   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5197   if (!x || !y)
5198     {
5199       delete_insns_since (last);
5200       return NULL_RTX;
5201     }
5202
5203   if (target_mode == VOIDmode)
5204     target_mode = result_mode;
5205   if (!target)
5206     target = gen_reg_rtx (target_mode);
5207
5208   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5209
5210   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5211   create_fixed_operand (&ops[1], comparison);
5212   create_fixed_operand (&ops[2], x);
5213   create_fixed_operand (&ops[3], y);
5214   if (!maybe_expand_insn (icode, 4, ops))
5215     {
5216       delete_insns_since (last);
5217       return NULL_RTX;
5218     }
5219   subtarget = ops[0].value;
5220
5221   /* If we are converting to a wider mode, first convert to
5222      TARGET_MODE, then normalize.  This produces better combining
5223      opportunities on machines that have a SIGN_EXTRACT when we are
5224      testing a single bit.  This mostly benefits the 68k.
5225
5226      If STORE_FLAG_VALUE does not have the sign bit set when
5227      interpreted in MODE, we can do this conversion as unsigned, which
5228      is usually more efficient.  */
5229   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5230     {
5231       convert_move (target, subtarget,
5232                     val_signbit_known_clear_p (result_mode,
5233                                                STORE_FLAG_VALUE));
5234       op0 = target;
5235       result_mode = target_mode;
5236     }
5237   else
5238     op0 = subtarget;
5239
5240   /* If we want to keep subexpressions around, don't reuse our last
5241      target.  */
5242   if (optimize)
5243     subtarget = 0;
5244
5245   /* Now normalize to the proper value in MODE.  Sometimes we don't
5246      have to do anything.  */
5247   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5248     ;
5249   /* STORE_FLAG_VALUE might be the most negative number, so write
5250      the comparison this way to avoid a compiler-time warning.  */
5251   else if (- normalizep == STORE_FLAG_VALUE)
5252     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5253
5254   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5255      it hard to use a value of just the sign bit due to ANSI integer
5256      constant typing rules.  */
5257   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5258     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5259                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5260                         normalizep == 1);
5261   else
5262     {
5263       gcc_assert (STORE_FLAG_VALUE & 1);
5264
5265       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5266       if (normalizep == -1)
5267         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5268     }
5269
5270   /* If we were converting to a smaller mode, do the conversion now.  */
5271   if (target_mode != result_mode)
5272     {
5273       convert_move (target, op0, 0);
5274       return target;
5275     }
5276   else
5277     return op0;
5278 }
5279
5280
5281 /* A subroutine of emit_store_flag only including "tricks" that do not
5282    need a recursive call.  These are kept separate to avoid infinite
5283    loops.  */
5284
5285 static rtx
5286 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5287                    enum machine_mode mode, int unsignedp, int normalizep,
5288                    enum machine_mode target_mode)
5289 {
5290   rtx subtarget;
5291   enum insn_code icode;
5292   enum machine_mode compare_mode;
5293   enum mode_class mclass;
5294   enum rtx_code scode;
5295   rtx tem;
5296
5297   if (unsignedp)
5298     code = unsigned_condition (code);
5299   scode = swap_condition (code);
5300
5301   /* If one operand is constant, make it the second one.  Only do this
5302      if the other operand is not constant as well.  */
5303
5304   if (swap_commutative_operands_p (op0, op1))
5305     {
5306       tem = op0;
5307       op0 = op1;
5308       op1 = tem;
5309       code = swap_condition (code);
5310     }
5311
5312   if (mode == VOIDmode)
5313     mode = GET_MODE (op0);
5314
5315   /* For some comparisons with 1 and -1, we can convert this to
5316      comparisons with zero.  This will often produce more opportunities for
5317      store-flag insns.  */
5318
5319   switch (code)
5320     {
5321     case LT:
5322       if (op1 == const1_rtx)
5323         op1 = const0_rtx, code = LE;
5324       break;
5325     case LE:
5326       if (op1 == constm1_rtx)
5327         op1 = const0_rtx, code = LT;
5328       break;
5329     case GE:
5330       if (op1 == const1_rtx)
5331         op1 = const0_rtx, code = GT;
5332       break;
5333     case GT:
5334       if (op1 == constm1_rtx)
5335         op1 = const0_rtx, code = GE;
5336       break;
5337     case GEU:
5338       if (op1 == const1_rtx)
5339         op1 = const0_rtx, code = NE;
5340       break;
5341     case LTU:
5342       if (op1 == const1_rtx)
5343         op1 = const0_rtx, code = EQ;
5344       break;
5345     default:
5346       break;
5347     }
5348
5349   /* If we are comparing a double-word integer with zero or -1, we can
5350      convert the comparison into one involving a single word.  */
5351   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5352       && GET_MODE_CLASS (mode) == MODE_INT
5353       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5354     {
5355       if ((code == EQ || code == NE)
5356           && (op1 == const0_rtx || op1 == constm1_rtx))
5357         {
5358           rtx op00, op01;
5359
5360           /* Do a logical OR or AND of the two words and compare the
5361              result.  */
5362           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5363           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5364           tem = expand_binop (word_mode,
5365                               op1 == const0_rtx ? ior_optab : and_optab,
5366                               op00, op01, NULL_RTX, unsignedp,
5367                               OPTAB_DIRECT);
5368
5369           if (tem != 0)
5370             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5371                                    unsignedp, normalizep);
5372         }
5373       else if ((code == LT || code == GE) && op1 == const0_rtx)
5374         {
5375           rtx op0h;
5376
5377           /* If testing the sign bit, can just test on high word.  */
5378           op0h = simplify_gen_subreg (word_mode, op0, mode,
5379                                       subreg_highpart_offset (word_mode,
5380                                                               mode));
5381           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5382                                  unsignedp, normalizep);
5383         }
5384       else
5385         tem = NULL_RTX;
5386
5387       if (tem)
5388         {
5389           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5390             return tem;
5391           if (!target)
5392             target = gen_reg_rtx (target_mode);
5393
5394           convert_move (target, tem,
5395                         !val_signbit_known_set_p (word_mode,
5396                                                   (normalizep ? normalizep
5397                                                    : STORE_FLAG_VALUE)));
5398           return target;
5399         }
5400     }
5401
5402   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5403      complement of A (for GE) and shifting the sign bit to the low bit.  */
5404   if (op1 == const0_rtx && (code == LT || code == GE)
5405       && GET_MODE_CLASS (mode) == MODE_INT
5406       && (normalizep || STORE_FLAG_VALUE == 1
5407           || val_signbit_p (mode, STORE_FLAG_VALUE)))
5408     {
5409       subtarget = target;
5410
5411       if (!target)
5412         target_mode = mode;
5413
5414       /* If the result is to be wider than OP0, it is best to convert it
5415          first.  If it is to be narrower, it is *incorrect* to convert it
5416          first.  */
5417       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5418         {
5419           op0 = convert_modes (target_mode, mode, op0, 0);
5420           mode = target_mode;
5421         }
5422
5423       if (target_mode != mode)
5424         subtarget = 0;
5425
5426       if (code == GE)
5427         op0 = expand_unop (mode, one_cmpl_optab, op0,
5428                            ((STORE_FLAG_VALUE == 1 || normalizep)
5429                             ? 0 : subtarget), 0);
5430
5431       if (STORE_FLAG_VALUE == 1 || normalizep)
5432         /* If we are supposed to produce a 0/1 value, we want to do
5433            a logical shift from the sign bit to the low-order bit; for
5434            a -1/0 value, we do an arithmetic shift.  */
5435         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5436                             GET_MODE_BITSIZE (mode) - 1,
5437                             subtarget, normalizep != -1);
5438
5439       if (mode != target_mode)
5440         op0 = convert_modes (target_mode, mode, op0, 0);
5441
5442       return op0;
5443     }
5444
5445   mclass = GET_MODE_CLASS (mode);
5446   for (compare_mode = mode; compare_mode != VOIDmode;
5447        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5448     {
5449      enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5450      icode = optab_handler (cstore_optab, optab_mode);
5451      if (icode != CODE_FOR_nothing)
5452         {
5453           do_pending_stack_adjust ();
5454           tem = emit_cstore (target, icode, code, mode, compare_mode,
5455                              unsignedp, op0, op1, normalizep, target_mode);
5456           if (tem)
5457             return tem;
5458
5459           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5460             {
5461               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5462                                  unsignedp, op1, op0, normalizep, target_mode);
5463               if (tem)
5464                 return tem;
5465             }
5466           break;
5467         }
5468     }
5469
5470   return 0;
5471 }
5472
5473 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5474    and storing in TARGET.  Normally return TARGET.
5475    Return 0 if that cannot be done.
5476
5477    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5478    it is VOIDmode, they cannot both be CONST_INT.
5479
5480    UNSIGNEDP is for the case where we have to widen the operands
5481    to perform the operation.  It says to use zero-extension.
5482
5483    NORMALIZEP is 1 if we should convert the result to be either zero
5484    or one.  Normalize is -1 if we should convert the result to be
5485    either zero or -1.  If NORMALIZEP is zero, the result will be left
5486    "raw" out of the scc insn.  */
5487
5488 rtx
5489 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5490                  enum machine_mode mode, int unsignedp, int normalizep)
5491 {
5492   enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5493   enum rtx_code rcode;
5494   rtx subtarget;
5495   rtx tem, last, trueval;
5496
5497   /* If we compare constants, we shouldn't use a store-flag operation,
5498      but a constant load.  We can get there via the vanilla route that
5499      usually generates a compare-branch sequence, but will in this case
5500      fold the comparison to a constant, and thus elide the branch.  */
5501   if (CONSTANT_P (op0) && CONSTANT_P (op1))
5502     return NULL_RTX;
5503
5504   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5505                            target_mode);
5506   if (tem)
5507     return tem;
5508
5509   /* If we reached here, we can't do this with a scc insn, however there
5510      are some comparisons that can be done in other ways.  Don't do any
5511      of these cases if branches are very cheap.  */
5512   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5513     return 0;
5514
5515   /* See what we need to return.  We can only return a 1, -1, or the
5516      sign bit.  */
5517
5518   if (normalizep == 0)
5519     {
5520       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5521         normalizep = STORE_FLAG_VALUE;
5522
5523       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5524         ;
5525       else
5526         return 0;
5527     }
5528
5529   last = get_last_insn ();
5530
5531   /* If optimizing, use different pseudo registers for each insn, instead
5532      of reusing the same pseudo.  This leads to better CSE, but slows
5533      down the compiler, since there are more pseudos */
5534   subtarget = (!optimize
5535                && (target_mode == mode)) ? target : NULL_RTX;
5536   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5537
5538   /* For floating-point comparisons, try the reverse comparison or try
5539      changing the "orderedness" of the comparison.  */
5540   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5541     {
5542       enum rtx_code first_code;
5543       bool and_them;
5544
5545       rcode = reverse_condition_maybe_unordered (code);
5546       if (can_compare_p (rcode, mode, ccp_store_flag)
5547           && (code == ORDERED || code == UNORDERED
5548               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5549               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5550         {
5551           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5552                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5553
5554           /* For the reverse comparison, use either an addition or a XOR.  */
5555           if (want_add
5556               && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5557                            optimize_insn_for_speed_p ()) == 0)
5558             {
5559               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5560                                        STORE_FLAG_VALUE, target_mode);
5561               if (tem)
5562                 return expand_binop (target_mode, add_optab, tem,
5563                                      gen_int_mode (normalizep, target_mode),
5564                                      target, 0, OPTAB_WIDEN);
5565             }
5566           else if (!want_add
5567                    && rtx_cost (trueval, XOR, 1,
5568                                 optimize_insn_for_speed_p ()) == 0)
5569             {
5570               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5571                                        normalizep, target_mode);
5572               if (tem)
5573                 return expand_binop (target_mode, xor_optab, tem, trueval,
5574                                      target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5575             }
5576         }
5577
5578       delete_insns_since (last);
5579
5580       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5581       if (code == ORDERED || code == UNORDERED)
5582         return 0;
5583
5584       and_them = split_comparison (code, mode, &first_code, &code);
5585
5586       /* If there are no NaNs, the first comparison should always fall through.
5587          Effectively change the comparison to the other one.  */
5588       if (!HONOR_NANS (mode))
5589         {
5590           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5591           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5592                                     target_mode);
5593         }
5594
5595 #ifdef HAVE_conditional_move
5596       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5597          conditional move.  */
5598       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5599                                normalizep, target_mode);
5600       if (tem == 0)
5601         return 0;
5602
5603       if (and_them)
5604         tem = emit_conditional_move (target, code, op0, op1, mode,
5605                                      tem, const0_rtx, GET_MODE (tem), 0);
5606       else
5607         tem = emit_conditional_move (target, code, op0, op1, mode,
5608                                      trueval, tem, GET_MODE (tem), 0);
5609
5610       if (tem == 0)
5611         delete_insns_since (last);
5612       return tem;
5613 #else
5614       return 0;
5615 #endif
5616     }
5617
5618   /* The remaining tricks only apply to integer comparisons.  */
5619
5620   if (GET_MODE_CLASS (mode) != MODE_INT)
5621     return 0;
5622
5623   /* If this is an equality comparison of integers, we can try to exclusive-or
5624      (or subtract) the two operands and use a recursive call to try the
5625      comparison with zero.  Don't do any of these cases if branches are
5626      very cheap.  */
5627
5628   if ((code == EQ || code == NE) && op1 != const0_rtx)
5629     {
5630       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5631                           OPTAB_WIDEN);
5632
5633       if (tem == 0)
5634         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5635                             OPTAB_WIDEN);
5636       if (tem != 0)
5637         tem = emit_store_flag (target, code, tem, const0_rtx,
5638                                mode, unsignedp, normalizep);
5639       if (tem != 0)
5640         return tem;
5641
5642       delete_insns_since (last);
5643     }
5644
5645   /* For integer comparisons, try the reverse comparison.  However, for
5646      small X and if we'd have anyway to extend, implementing "X != 0"
5647      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5648   rcode = reverse_condition (code);
5649   if (can_compare_p (rcode, mode, ccp_store_flag)
5650       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5651             && code == NE
5652             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5653             && op1 == const0_rtx))
5654     {
5655       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5656                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5657
5658       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5659       if (want_add
5660           && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5661                        optimize_insn_for_speed_p ()) == 0)
5662         {
5663           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5664                                    STORE_FLAG_VALUE, target_mode);
5665           if (tem != 0)
5666             tem = expand_binop (target_mode, add_optab, tem,
5667                                 gen_int_mode (normalizep, target_mode),
5668                                 target, 0, OPTAB_WIDEN);
5669         }
5670       else if (!want_add
5671                && rtx_cost (trueval, XOR, 1,
5672                             optimize_insn_for_speed_p ()) == 0)
5673         {
5674           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5675                                    normalizep, target_mode);
5676           if (tem != 0)
5677             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5678                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5679         }
5680
5681       if (tem != 0)
5682         return tem;
5683       delete_insns_since (last);
5684     }
5685
5686   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5687      the constant zero.  Reject all other comparisons at this point.  Only
5688      do LE and GT if branches are expensive since they are expensive on
5689      2-operand machines.  */
5690
5691   if (op1 != const0_rtx
5692       || (code != EQ && code != NE
5693           && (BRANCH_COST (optimize_insn_for_speed_p (),
5694                            false) <= 1 || (code != LE && code != GT))))
5695     return 0;
5696
5697   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5698      do the necessary operation below.  */
5699
5700   tem = 0;
5701
5702   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5703      the sign bit set.  */
5704
5705   if (code == LE)
5706     {
5707       /* This is destructive, so SUBTARGET can't be OP0.  */
5708       if (rtx_equal_p (subtarget, op0))
5709         subtarget = 0;
5710
5711       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5712                           OPTAB_WIDEN);
5713       if (tem)
5714         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5715                             OPTAB_WIDEN);
5716     }
5717
5718   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5719      number of bits in the mode of OP0, minus one.  */
5720
5721   if (code == GT)
5722     {
5723       if (rtx_equal_p (subtarget, op0))
5724         subtarget = 0;
5725
5726       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5727                           GET_MODE_BITSIZE (mode) - 1,
5728                           subtarget, 0);
5729       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5730                           OPTAB_WIDEN);
5731     }
5732
5733   if (code == EQ || code == NE)
5734     {
5735       /* For EQ or NE, one way to do the comparison is to apply an operation
5736          that converts the operand into a positive number if it is nonzero
5737          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5738          for NE we negate.  This puts the result in the sign bit.  Then we
5739          normalize with a shift, if needed.
5740
5741          Two operations that can do the above actions are ABS and FFS, so try
5742          them.  If that doesn't work, and MODE is smaller than a full word,
5743          we can use zero-extension to the wider mode (an unsigned conversion)
5744          as the operation.  */
5745
5746       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5747          that is compensated by the subsequent overflow when subtracting
5748          one / negating.  */
5749
5750       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5751         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5752       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5753         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5754       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5755         {
5756           tem = convert_modes (word_mode, mode, op0, 1);
5757           mode = word_mode;
5758         }
5759
5760       if (tem != 0)
5761         {
5762           if (code == EQ)
5763             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5764                                 0, OPTAB_WIDEN);
5765           else
5766             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5767         }
5768
5769       /* If we couldn't do it that way, for NE we can "or" the two's complement
5770          of the value with itself.  For EQ, we take the one's complement of
5771          that "or", which is an extra insn, so we only handle EQ if branches
5772          are expensive.  */
5773
5774       if (tem == 0
5775           && (code == NE
5776               || BRANCH_COST (optimize_insn_for_speed_p (),
5777                               false) > 1))
5778         {
5779           if (rtx_equal_p (subtarget, op0))
5780             subtarget = 0;
5781
5782           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5783           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5784                               OPTAB_WIDEN);
5785
5786           if (tem && code == EQ)
5787             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5788         }
5789     }
5790
5791   if (tem && normalizep)
5792     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5793                         GET_MODE_BITSIZE (mode) - 1,
5794                         subtarget, normalizep == 1);
5795
5796   if (tem)
5797     {
5798       if (!target)
5799         ;
5800       else if (GET_MODE (tem) != target_mode)
5801         {
5802           convert_move (target, tem, 0);
5803           tem = target;
5804         }
5805       else if (!subtarget)
5806         {
5807           emit_move_insn (target, tem);
5808           tem = target;
5809         }
5810     }
5811   else
5812     delete_insns_since (last);
5813
5814   return tem;
5815 }
5816
5817 /* Like emit_store_flag, but always succeeds.  */
5818
5819 rtx
5820 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5821                        enum machine_mode mode, int unsignedp, int normalizep)
5822 {
5823   rtx tem, label;
5824   rtx trueval, falseval;
5825
5826   /* First see if emit_store_flag can do the job.  */
5827   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5828   if (tem != 0)
5829     return tem;
5830
5831   if (!target)
5832     target = gen_reg_rtx (word_mode);
5833
5834   /* If this failed, we have to do this with set/compare/jump/set code.
5835      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5836   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5837   if (code == NE
5838       && GET_MODE_CLASS (mode) == MODE_INT
5839       && REG_P (target)
5840       && op0 == target
5841       && op1 == const0_rtx)
5842     {
5843       label = gen_label_rtx ();
5844       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5845                                mode, NULL_RTX, NULL_RTX, label, -1);
5846       emit_move_insn (target, trueval);
5847       emit_label (label);
5848       return target;
5849     }
5850
5851   if (!REG_P (target)
5852       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5853     target = gen_reg_rtx (GET_MODE (target));
5854
5855   /* Jump in the right direction if the target cannot implement CODE
5856      but can jump on its reverse condition.  */
5857   falseval = const0_rtx;
5858   if (! can_compare_p (code, mode, ccp_jump)
5859       && (! FLOAT_MODE_P (mode)
5860           || code == ORDERED || code == UNORDERED
5861           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5862           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5863     {
5864       enum rtx_code rcode;
5865       if (FLOAT_MODE_P (mode))
5866         rcode = reverse_condition_maybe_unordered (code);
5867       else
5868         rcode = reverse_condition (code);
5869
5870       /* Canonicalize to UNORDERED for the libcall.  */
5871       if (can_compare_p (rcode, mode, ccp_jump)
5872           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5873         {
5874           falseval = trueval;
5875           trueval = const0_rtx;
5876           code = rcode;
5877         }
5878     }
5879
5880   emit_move_insn (target, trueval);
5881   label = gen_label_rtx ();
5882   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5883                            NULL_RTX, label, -1);
5884
5885   emit_move_insn (target, falseval);
5886   emit_label (label);
5887
5888   return target;
5889 }
5890 \f
5891 /* Perform possibly multi-word comparison and conditional jump to LABEL
5892    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5893    now a thin wrapper around do_compare_rtx_and_jump.  */
5894
5895 static void
5896 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5897                  rtx label)
5898 {
5899   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5900   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5901                            NULL_RTX, NULL_RTX, label, -1);
5902 }