gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2014 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "tm.h"
  26 #include "diagnostic-core.h"
  27 #include "rtl.h"
  28 #include "tree.h"
  29 #include "stor-layout.h"
  30 #include "tm_p.h"
  31 #include "flags.h"
  32 #include "insn-config.h"
  33 #include "expr.h"
  34 #include "optabs.h"
  35 #include "recog.h"
  36 #include "langhooks.h"
  37 #include "df.h"
  38 #include "target.h"
  39 #include "expmed.h"
  40
  41 struct target_expmed default_target_expmed;
  42 #if SWITCHABLE_TARGET
  43 struct target_expmed *this_target_expmed = &default_target_expmed;
  44 #endif
  45
  46 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  47                                    unsigned HOST_WIDE_INT,
  48                                    unsigned HOST_WIDE_INT,
  49                                    unsigned HOST_WIDE_INT,
  50                                    rtx);
  51 static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT,
  52                                      unsigned HOST_WIDE_INT,
  53                                      rtx);
  54 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  55                                    unsigned HOST_WIDE_INT,
  56                                    unsigned HOST_WIDE_INT,
  57                                    unsigned HOST_WIDE_INT,
  58                                    rtx);
  59 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  60                                     unsigned HOST_WIDE_INT,
  61                                     unsigned HOST_WIDE_INT, rtx, int);
  62 static rtx extract_fixed_bit_field_1 (enum machine_mode, rtx,
  63                                       unsigned HOST_WIDE_INT,
  64                                       unsigned HOST_WIDE_INT, rtx, int);
  65 static rtx mask_rtx (enum machine_mode, int, int, int);
  66 static rtx lshift_value (enum machine_mode, unsigned HOST_WIDE_INT, int);
  67 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  68                                     unsigned HOST_WIDE_INT, int);
  69 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  70 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  71 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  72
  73 /* Test whether a value is zero of a power of two.  */
  74 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
  75   (((x) & ((x) - (unsigned HOST_WIDE_INT) 1)) == 0)
  76
  77 struct init_expmed_rtl
  78 {
  79   struct rtx_def reg;
  80   struct rtx_def plus;
  81   struct rtx_def neg;
  82   struct rtx_def mult;
  83   struct rtx_def sdiv;
  84   struct rtx_def udiv;
  85   struct rtx_def sdiv_32;
  86   struct rtx_def smod_32;
  87   struct rtx_def wide_mult;
  88   struct rtx_def wide_lshr;
  89   struct rtx_def wide_trunc;
  90   struct rtx_def shift;
  91   struct rtx_def shift_mult;
  92   struct rtx_def shift_add;
  93   struct rtx_def shift_sub0;
  94   struct rtx_def shift_sub1;
  95   struct rtx_def zext;
  96   struct rtx_def trunc;
  97
  98   rtx pow2[MAX_BITS_PER_WORD];
  99   rtx cint[MAX_BITS_PER_WORD];
 100 };
 101
 102 static void
 103 init_expmed_one_conv (struct init_expmed_rtl *all, enum machine_mode to_mode,
 104                       enum machine_mode from_mode, bool speed)
 105 {
 106   int to_size, from_size;
 107   rtx which;
 108
 109   /* We're given no information about the true size of a partial integer,
 110      only the size of the "full" integer it requires for storage.  For
 111      comparison purposes here, reduce the bit size by one in that case.  */
 112   to_size = (GET_MODE_BITSIZE (to_mode)
 113              - (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT));
 114   from_size = (GET_MODE_BITSIZE (from_mode)
 115                - (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT));
 116
 117   /* Assume cost of zero-extend and sign-extend is the same.  */
 118   which = (to_size < from_size ? &all->trunc : &all->zext);
 119
 120   PUT_MODE (&all->reg, from_mode);
 121   set_convert_cost (to_mode, from_mode, speed, set_src_cost (which, speed));
 122 }
 123
 124 static void
 125 init_expmed_one_mode (struct init_expmed_rtl *all,
 126                       enum machine_mode mode, int speed)
 127 {
 128   int m, n, mode_bitsize;
 129   enum machine_mode mode_from;
 130
 131   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 132
 133   PUT_MODE (&all->reg, mode);
 134   PUT_MODE (&all->plus, mode);
 135   PUT_MODE (&all->neg, mode);
 136   PUT_MODE (&all->mult, mode);
 137   PUT_MODE (&all->sdiv, mode);
 138   PUT_MODE (&all->udiv, mode);
 139   PUT_MODE (&all->sdiv_32, mode);
 140   PUT_MODE (&all->smod_32, mode);
 141   PUT_MODE (&all->wide_trunc, mode);
 142   PUT_MODE (&all->shift, mode);
 143   PUT_MODE (&all->shift_mult, mode);
 144   PUT_MODE (&all->shift_add, mode);
 145   PUT_MODE (&all->shift_sub0, mode);
 146   PUT_MODE (&all->shift_sub1, mode);
 147   PUT_MODE (&all->zext, mode);
 148   PUT_MODE (&all->trunc, mode);
 149
 150   set_add_cost (speed, mode, set_src_cost (&all->plus, speed));
 151   set_neg_cost (speed, mode, set_src_cost (&all->neg, speed));
 152   set_mul_cost (speed, mode, set_src_cost (&all->mult, speed));
 153   set_sdiv_cost (speed, mode, set_src_cost (&all->sdiv, speed));
 154   set_udiv_cost (speed, mode, set_src_cost (&all->udiv, speed));
 155
 156   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (&all->sdiv_32, speed)
 157                                      <= 2 * add_cost (speed, mode)));
 158   set_smod_pow2_cheap (speed, mode, (set_src_cost (&all->smod_32, speed)
 159                                      <= 4 * add_cost (speed, mode)));
 160
 161   set_shift_cost (speed, mode, 0, 0);
 162   {
 163     int cost = add_cost (speed, mode);
 164     set_shiftadd_cost (speed, mode, 0, cost);
 165     set_shiftsub0_cost (speed, mode, 0, cost);
 166     set_shiftsub1_cost (speed, mode, 0, cost);
 167   }
 168
 169   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 170   for (m = 1; m < n; m++)
 171     {
 172       XEXP (&all->shift, 1) = all->cint[m];
 173       XEXP (&all->shift_mult, 1) = all->pow2[m];
 174
 175       set_shift_cost (speed, mode, m, set_src_cost (&all->shift, speed));
 176       set_shiftadd_cost (speed, mode, m, set_src_cost (&all->shift_add, speed));
 177       set_shiftsub0_cost (speed, mode, m, set_src_cost (&all->shift_sub0, speed));
 178       set_shiftsub1_cost (speed, mode, m, set_src_cost (&all->shift_sub1, speed));
 179     }
 180
 181   if (SCALAR_INT_MODE_P (mode))
 182     {
 183       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 184            mode_from = (enum machine_mode)(mode_from + 1))
 185         init_expmed_one_conv (all, mode, mode_from, speed);
 186     }
 187   if (GET_MODE_CLASS (mode) == MODE_INT)
 188     {
 189       enum machine_mode  wider_mode = GET_MODE_WIDER_MODE (mode);
 190       if (wider_mode != VOIDmode)
 191         {
 192           PUT_MODE (&all->zext, wider_mode);
 193           PUT_MODE (&all->wide_mult, wider_mode);
 194           PUT_MODE (&all->wide_lshr, wider_mode);
 195           XEXP (&all->wide_lshr, 1) = GEN_INT (mode_bitsize);
 196
 197           set_mul_widen_cost (speed, wider_mode,
 198                               set_src_cost (&all->wide_mult, speed));
 199           set_mul_highpart_cost (speed, mode,
 200                                  set_src_cost (&all->wide_trunc, speed));
 201         }
 202     }
 203 }
 204
 205 void
 206 init_expmed (void)
 207 {
 208   struct init_expmed_rtl all;
 209   enum machine_mode mode;
 210   int m, speed;
 211
 212   memset (&all, 0, sizeof all);
 213   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 214     {
 215       all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 216       all.cint[m] = GEN_INT (m);
 217     }
 218
 219   PUT_CODE (&all.reg, REG);
 220   /* Avoid using hard regs in ways which may be unsupported.  */
 221   SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
 222
 223   PUT_CODE (&all.plus, PLUS);
 224   XEXP (&all.plus, 0) = &all.reg;
 225   XEXP (&all.plus, 1) = &all.reg;
 226
 227   PUT_CODE (&all.neg, NEG);
 228   XEXP (&all.neg, 0) = &all.reg;
 229
 230   PUT_CODE (&all.mult, MULT);
 231   XEXP (&all.mult, 0) = &all.reg;
 232   XEXP (&all.mult, 1) = &all.reg;
 233
 234   PUT_CODE (&all.sdiv, DIV);
 235   XEXP (&all.sdiv, 0) = &all.reg;
 236   XEXP (&all.sdiv, 1) = &all.reg;
 237
 238   PUT_CODE (&all.udiv, UDIV);
 239   XEXP (&all.udiv, 0) = &all.reg;
 240   XEXP (&all.udiv, 1) = &all.reg;
 241
 242   PUT_CODE (&all.sdiv_32, DIV);
 243   XEXP (&all.sdiv_32, 0) = &all.reg;
 244   XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? all.cint[32] : GEN_INT (32);
 245
 246   PUT_CODE (&all.smod_32, MOD);
 247   XEXP (&all.smod_32, 0) = &all.reg;
 248   XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
 249
 250   PUT_CODE (&all.zext, ZERO_EXTEND);
 251   XEXP (&all.zext, 0) = &all.reg;
 252
 253   PUT_CODE (&all.wide_mult, MULT);
 254   XEXP (&all.wide_mult, 0) = &all.zext;
 255   XEXP (&all.wide_mult, 1) = &all.zext;
 256
 257   PUT_CODE (&all.wide_lshr, LSHIFTRT);
 258   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
 259
 260   PUT_CODE (&all.wide_trunc, TRUNCATE);
 261   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
 262
 263   PUT_CODE (&all.shift, ASHIFT);
 264   XEXP (&all.shift, 0) = &all.reg;
 265
 266   PUT_CODE (&all.shift_mult, MULT);
 267   XEXP (&all.shift_mult, 0) = &all.reg;
 268
 269   PUT_CODE (&all.shift_add, PLUS);
 270   XEXP (&all.shift_add, 0) = &all.shift_mult;
 271   XEXP (&all.shift_add, 1) = &all.reg;
 272
 273   PUT_CODE (&all.shift_sub0, MINUS);
 274   XEXP (&all.shift_sub0, 0) = &all.shift_mult;
 275   XEXP (&all.shift_sub0, 1) = &all.reg;
 276
 277   PUT_CODE (&all.shift_sub1, MINUS);
 278   XEXP (&all.shift_sub1, 0) = &all.reg;
 279   XEXP (&all.shift_sub1, 1) = &all.shift_mult;
 280
 281   PUT_CODE (&all.trunc, TRUNCATE);
 282   XEXP (&all.trunc, 0) = &all.reg;
 283
 284   for (speed = 0; speed < 2; speed++)
 285     {
 286       crtl->maybe_hot_insn_p = speed;
 287       set_zero_cost (speed, set_src_cost (const0_rtx, speed));
 288
 289       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 290            mode = (enum machine_mode)(mode + 1))
 291         init_expmed_one_mode (&all, mode, speed);
 292
 293       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 294         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 295              mode = (enum machine_mode)(mode + 1))
 296           init_expmed_one_mode (&all, mode, speed);
 297
 298       if (MIN_MODE_VECTOR_INT != VOIDmode)
 299         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 300              mode = (enum machine_mode)(mode + 1))
 301           init_expmed_one_mode (&all, mode, speed);
 302     }
 303
 304   if (alg_hash_used_p ())
 305     {
 306       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 307       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 308     }
 309   else
 310     set_alg_hash_used_p (true);
 311   default_rtl_profile ();
 312 }
 313
 314 /* Return an rtx representing minus the value of X.
 315    MODE is the intended mode of the result,
 316    useful if X is a CONST_INT.  */
 317
 318 rtx
 319 negate_rtx (enum machine_mode mode, rtx x)
 320 {
 321   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 322
 323   if (result == 0)
 324     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 325
 326   return result;
 327 }
 328
 329 /* Adjust bitfield memory MEM so that it points to the first unit of mode
 330    MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
 331    If MODE is BLKmode, return a reference to every byte in the bitfield.
 332    Set *NEW_BITNUM to the bit position of the field within the new memory.  */
 333
 334 static rtx
 335 narrow_bit_field_mem (rtx mem, enum machine_mode mode,
 336                       unsigned HOST_WIDE_INT bitsize,
 337                       unsigned HOST_WIDE_INT bitnum,
 338                       unsigned HOST_WIDE_INT *new_bitnum)
 339 {
 340   if (mode == BLKmode)
 341     {
 342       *new_bitnum = bitnum % BITS_PER_UNIT;
 343       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 344       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 345                             / BITS_PER_UNIT);
 346       return adjust_bitfield_address_size (mem, mode, offset, size);
 347     }
 348   else
 349     {
 350       unsigned int unit = GET_MODE_BITSIZE (mode);
 351       *new_bitnum = bitnum % unit;
 352       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 353       return adjust_bitfield_address (mem, mode, offset);
 354     }
 355 }
 356
 357 /* The caller wants to perform insertion or extraction PATTERN on a
 358    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 359    BITREGION_START and BITREGION_END are as for store_bit_field
 360    and FIELDMODE is the natural mode of the field.
 361
 362    Search for a mode that is compatible with the memory access
 363    restrictions and (where applicable) with a register insertion or
 364    extraction.  Return the new memory on success, storing the adjusted
 365    bit position in *NEW_BITNUM.  Return null otherwise.  */
 366
 367 static rtx
 368 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 369                               rtx op0, HOST_WIDE_INT bitsize,
 370                               HOST_WIDE_INT bitnum,
 371                               unsigned HOST_WIDE_INT bitregion_start,
 372                               unsigned HOST_WIDE_INT bitregion_end,
 373                               enum machine_mode fieldmode,
 374                               unsigned HOST_WIDE_INT *new_bitnum)
 375 {
 376   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 377                                 bitregion_end, MEM_ALIGN (op0),
 378                                 MEM_VOLATILE_P (op0));
 379   enum machine_mode best_mode;
 380   if (iter.next_mode (&best_mode))
 381     {
 382       /* We can use a memory in BEST_MODE.  See whether this is true for
 383          any wider modes.  All other things being equal, we prefer to
 384          use the widest mode possible because it tends to expose more
 385          CSE opportunities.  */
 386       if (!iter.prefer_smaller_modes ())
 387         {
 388           /* Limit the search to the mode required by the corresponding
 389              register insertion or extraction instruction, if any.  */
 390           enum machine_mode limit_mode = word_mode;
 391           extraction_insn insn;
 392           if (get_best_reg_extraction_insn (&insn, pattern,
 393                                             GET_MODE_BITSIZE (best_mode),
 394                                             fieldmode))
 395             limit_mode = insn.field_mode;
 396
 397           enum machine_mode wider_mode;
 398           while (iter.next_mode (&wider_mode)
 399                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 400             best_mode = wider_mode;
 401         }
 402       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 403                                    new_bitnum);
 404     }
 405   return NULL_RTX;
 406 }
 407
 408 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 409    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 410    offset is then BITNUM / BITS_PER_UNIT.  */
 411
 412 static bool
 413 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
 414                      unsigned HOST_WIDE_INT bitsize,
 415                      enum machine_mode struct_mode)
 416 {
 417   if (BYTES_BIG_ENDIAN)
 418     return (bitnum % BITS_PER_UNIT == 0
 419             && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
 420                 || (bitnum + bitsize) % BITS_PER_WORD == 0));
 421   else
 422     return bitnum % BITS_PER_WORD == 0;
 423 }
 424
 425 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
 426    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
 427    Return false if the access would touch memory outside the range
 428    BITREGION_START to BITREGION_END for conformance to the C++ memory
 429    model.  */
 430
 431 static bool
 432 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 433                             unsigned HOST_WIDE_INT bitnum,
 434                             enum machine_mode fieldmode,
 435                             unsigned HOST_WIDE_INT bitregion_start,
 436                             unsigned HOST_WIDE_INT bitregion_end)
 437 {
 438   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
 439
 440   /* -fstrict-volatile-bitfields must be enabled and we must have a
 441      volatile MEM.  */
 442   if (!MEM_P (op0)
 443       || !MEM_VOLATILE_P (op0)
 444       || flag_strict_volatile_bitfields <= 0)
 445     return false;
 446
 447   /* Non-integral modes likely only happen with packed structures.
 448      Punt.  */
 449   if (!SCALAR_INT_MODE_P (fieldmode))
 450     return false;
 451
 452   /* The bit size must not be larger than the field mode, and
 453      the field mode must not be larger than a word.  */
 454   if (bitsize > modesize || modesize > BITS_PER_WORD)
 455     return false;
 456
 457   /* Check for cases of unaligned fields that must be split.  */
 458   if (bitnum % BITS_PER_UNIT + bitsize > modesize
 459       || (STRICT_ALIGNMENT
 460           && bitnum % GET_MODE_ALIGNMENT (fieldmode) + bitsize > modesize))
 461     return false;
 462
 463   /* Check for cases where the C++ memory model applies.  */
 464   if (bitregion_end != 0
 465       && (bitnum - bitnum % modesize < bitregion_start
 466           || bitnum - bitnum % modesize + modesize > bitregion_end))
 467     return false;
 468
 469   return true;
 470 }
 471
 472 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 473    bit number BITNUM can be treated as a simple value of mode MODE.  */
 474
 475 static bool
 476 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 477                        unsigned HOST_WIDE_INT bitnum, enum machine_mode mode)
 478 {
 479   return (MEM_P (op0)
 480           && bitnum % BITS_PER_UNIT == 0
 481           && bitsize == GET_MODE_BITSIZE (mode)
 482           && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
 483               || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
 484                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 485 }
 486 \f
 487 /* Try to use instruction INSV to store VALUE into a field of OP0.
 488    BITSIZE and BITNUM are as for store_bit_field.  */
 489
 490 static bool
 491 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 492                             unsigned HOST_WIDE_INT bitsize,
 493                             unsigned HOST_WIDE_INT bitnum,
 494                             rtx value)
 495 {
 496   struct expand_operand ops[4];
 497   rtx value1;
 498   rtx xop0 = op0;
 499   rtx last = get_last_insn ();
 500   bool copy_back = false;
 501
 502   enum machine_mode op_mode = insv->field_mode;
 503   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 504   if (bitsize == 0 || bitsize > unit)
 505     return false;
 506
 507   if (MEM_P (xop0))
 508     /* Get a reference to the first byte of the field.  */
 509     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 510                                  &bitnum);
 511   else
 512     {
 513       /* Convert from counting within OP0 to counting in OP_MODE.  */
 514       if (BYTES_BIG_ENDIAN)
 515         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 516
 517       /* If xop0 is a register, we need it in OP_MODE
 518          to make it acceptable to the format of insv.  */
 519       if (GET_CODE (xop0) == SUBREG)
 520         /* We can't just change the mode, because this might clobber op0,
 521            and we will need the original value of op0 if insv fails.  */
 522         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 523       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 524         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 525     }
 526
 527   /* If the destination is a paradoxical subreg such that we need a
 528      truncate to the inner mode, perform the insertion on a temporary and
 529      truncate the result to the original destination.  Note that we can't
 530      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 531      X) 0)) is (reg:N X).  */
 532   if (GET_CODE (xop0) == SUBREG
 533       && REG_P (SUBREG_REG (xop0))
 534       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 535                                          op_mode))
 536     {
 537       rtx tem = gen_reg_rtx (op_mode);
 538       emit_move_insn (tem, xop0);
 539       xop0 = tem;
 540       copy_back = true;
 541     }
 542
 543   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 544      "backwards" from the size of the unit we are inserting into.
 545      Otherwise, we count bits from the most significant on a
 546      BYTES/BITS_BIG_ENDIAN machine.  */
 547
 548   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 549     bitnum = unit - bitsize - bitnum;
 550
 551   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 552   value1 = value;
 553   if (GET_MODE (value) != op_mode)
 554     {
 555       if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 556         {
 557           /* Optimization: Don't bother really extending VALUE
 558              if it has all the bits we will actually use.  However,
 559              if we must narrow it, be sure we do it correctly.  */
 560
 561           if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 562             {
 563               rtx tmp;
 564
 565               tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 566               if (! tmp)
 567                 tmp = simplify_gen_subreg (op_mode,
 568                                            force_reg (GET_MODE (value),
 569                                                       value1),
 570                                            GET_MODE (value), 0);
 571               value1 = tmp;
 572             }
 573           else
 574             value1 = gen_lowpart (op_mode, value1);
 575         }
 576       else if (CONST_INT_P (value))
 577         value1 = gen_int_mode (INTVAL (value), op_mode);
 578       else
 579         /* Parse phase is supposed to make VALUE's data type
 580            match that of the component reference, which is a type
 581            at least as wide as the field; so VALUE should have
 582            a mode that corresponds to that type.  */
 583         gcc_assert (CONSTANT_P (value));
 584     }
 585
 586   create_fixed_operand (&ops[0], xop0);
 587   create_integer_operand (&ops[1], bitsize);
 588   create_integer_operand (&ops[2], bitnum);
 589   create_input_operand (&ops[3], value1, op_mode);
 590   if (maybe_expand_insn (insv->icode, 4, ops))
 591     {
 592       if (copy_back)
 593         convert_move (op0, xop0, true);
 594       return true;
 595     }
 596   delete_insns_since (last);
 597   return false;
 598 }
 599
 600 /* A subroutine of store_bit_field, with the same arguments.  Return true
 601    if the operation could be implemented.
 602
 603    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 604    no other way of implementing the operation.  If FALLBACK_P is false,
 605    return false instead.  */
 606
 607 static bool
 608 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 609                    unsigned HOST_WIDE_INT bitnum,
 610                    unsigned HOST_WIDE_INT bitregion_start,
 611                    unsigned HOST_WIDE_INT bitregion_end,
 612                    enum machine_mode fieldmode,
 613                    rtx value, bool fallback_p)
 614 {
 615   rtx op0 = str_rtx;
 616   rtx orig_value;
 617
 618   while (GET_CODE (op0) == SUBREG)
 619     {
 620       /* The following line once was done only if WORDS_BIG_ENDIAN,
 621          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 622          meaningful at a much higher level; when structures are copied
 623          between memory and regs, the higher-numbered regs
 624          always get higher addresses.  */
 625       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 626       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 627       int byte_offset = 0;
 628
 629       /* Paradoxical subregs need special handling on big endian machines.  */
 630       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 631         {
 632           int difference = inner_mode_size - outer_mode_size;
 633
 634           if (WORDS_BIG_ENDIAN)
 635             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 636           if (BYTES_BIG_ENDIAN)
 637             byte_offset += difference % UNITS_PER_WORD;
 638         }
 639       else
 640         byte_offset = SUBREG_BYTE (op0);
 641
 642       bitnum += byte_offset * BITS_PER_UNIT;
 643       op0 = SUBREG_REG (op0);
 644     }
 645
 646   /* No action is needed if the target is a register and if the field
 647      lies completely outside that register.  This can occur if the source
 648      code contains an out-of-bounds access to a small array.  */
 649   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 650     return true;
 651
 652   /* Use vec_set patterns for inserting parts of vectors whenever
 653      available.  */
 654   if (VECTOR_MODE_P (GET_MODE (op0))
 655       && !MEM_P (op0)
 656       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
 657       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 658       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 659       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 660     {
 661       struct expand_operand ops[3];
 662       enum machine_mode outermode = GET_MODE (op0);
 663       enum machine_mode innermode = GET_MODE_INNER (outermode);
 664       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 665       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 666
 667       create_fixed_operand (&ops[0], op0);
 668       create_input_operand (&ops[1], value, innermode);
 669       create_integer_operand (&ops[2], pos);
 670       if (maybe_expand_insn (icode, 3, ops))
 671         return true;
 672     }
 673
 674   /* If the target is a register, overwriting the entire object, or storing
 675      a full-word or multi-word field can be done with just a SUBREG.  */
 676   if (!MEM_P (op0)
 677       && bitsize == GET_MODE_BITSIZE (fieldmode)
 678       && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
 679           || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
 680     {
 681       /* Use the subreg machinery either to narrow OP0 to the required
 682          words or to cope with mode punning between equal-sized modes.
 683          In the latter case, use subreg on the rhs side, not lhs.  */
 684       rtx sub;
 685
 686       if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
 687         {
 688           sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
 689           if (sub)
 690             {
 691               emit_move_insn (op0, sub);
 692               return true;
 693             }
 694         }
 695       else
 696         {
 697           sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 698                                      bitnum / BITS_PER_UNIT);
 699           if (sub)
 700             {
 701               emit_move_insn (sub, value);
 702               return true;
 703             }
 704         }
 705     }
 706
 707   /* If the target is memory, storing any naturally aligned field can be
 708      done with a simple store.  For targets that support fast unaligned
 709      memory, any naturally sized, unit aligned field can be done directly.  */
 710   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
 711     {
 712       op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
 713       emit_move_insn (op0, value);
 714       return true;
 715     }
 716
 717   /* Make sure we are playing with integral modes.  Pun with subregs
 718      if we aren't.  This must come after the entire register case above,
 719      since that case is valid for any mode.  The following cases are only
 720      valid for integral modes.  */
 721   {
 722     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 723     if (imode != GET_MODE (op0))
 724       {
 725         if (MEM_P (op0))
 726           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
 727         else
 728           {
 729             gcc_assert (imode != BLKmode);
 730             op0 = gen_lowpart (imode, op0);
 731           }
 732       }
 733   }
 734
 735   /* Storing an lsb-aligned field in a register
 736      can be done with a movstrict instruction.  */
 737
 738   if (!MEM_P (op0)
 739       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
 740       && bitsize == GET_MODE_BITSIZE (fieldmode)
 741       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 742     {
 743       struct expand_operand ops[2];
 744       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 745       rtx arg0 = op0;
 746       unsigned HOST_WIDE_INT subreg_off;
 747
 748       if (GET_CODE (arg0) == SUBREG)
 749         {
 750           /* Else we've got some float mode source being extracted into
 751              a different float mode destination -- this combination of
 752              subregs results in Severe Tire Damage.  */
 753           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 754                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 755                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 756           arg0 = SUBREG_REG (arg0);
 757         }
 758
 759       subreg_off = bitnum / BITS_PER_UNIT;
 760       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 761         {
 762           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 763
 764           create_fixed_operand (&ops[0], arg0);
 765           /* Shrink the source operand to FIELDMODE.  */
 766           create_convert_operand_to (&ops[1], value, fieldmode, false);
 767           if (maybe_expand_insn (icode, 2, ops))
 768             return true;
 769         }
 770     }
 771
 772   /* Handle fields bigger than a word.  */
 773
 774   if (bitsize > BITS_PER_WORD)
 775     {
 776       /* Here we transfer the words of the field
 777          in the order least significant first.
 778          This is because the most significant word is the one which may
 779          be less than full.
 780          However, only do that if the value is not BLKmode.  */
 781
 782       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 783       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 784       unsigned int i;
 785       rtx last;
 786
 787       /* This is the mode we must force value to, so that there will be enough
 788          subwords to extract.  Note that fieldmode will often (always?) be
 789          VOIDmode, because that is what store_field uses to indicate that this
 790          is a bit field, but passing VOIDmode to operand_subword_force
 791          is not allowed.  */
 792       fieldmode = GET_MODE (value);
 793       if (fieldmode == VOIDmode)
 794         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 795
 796       last = get_last_insn ();
 797       for (i = 0; i < nwords; i++)
 798         {
 799           /* If I is 0, use the low-order word in both field and target;
 800              if I is 1, use the next to lowest word; and so on.  */
 801           unsigned int wordnum = (backwards
 802                                   ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
 803                                   - i - 1
 804                                   : i);
 805           unsigned int bit_offset = (backwards
 806                                      ? MAX ((int) bitsize - ((int) i + 1)
 807                                             * BITS_PER_WORD,
 808                                             0)
 809                                      : (int) i * BITS_PER_WORD);
 810           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 811           unsigned HOST_WIDE_INT new_bitsize =
 812             MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 813
 814           /* If the remaining chunk doesn't have full wordsize we have
 815              to make sure that for big endian machines the higher order
 816              bits are used.  */
 817           if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
 818             value_word = simplify_expand_binop (word_mode, lshr_optab,
 819                                                 value_word,
 820                                                 GEN_INT (BITS_PER_WORD
 821                                                          - new_bitsize),
 822                                                 NULL_RTX, true,
 823                                                 OPTAB_LIB_WIDEN);
 824
 825           if (!store_bit_field_1 (op0, new_bitsize,
 826                                   bitnum + bit_offset,
 827                                   bitregion_start, bitregion_end,
 828                                   word_mode,
 829                                   value_word, fallback_p))
 830             {
 831               delete_insns_since (last);
 832               return false;
 833             }
 834         }
 835       return true;
 836     }
 837
 838   /* If VALUE has a floating-point or complex mode, access it as an
 839      integer of the corresponding size.  This can occur on a machine
 840      with 64 bit registers that uses SFmode for float.  It can also
 841      occur for unaligned float or complex fields.  */
 842   orig_value = value;
 843   if (GET_MODE (value) != VOIDmode
 844       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 845       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 846     {
 847       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 848       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 849     }
 850
 851   /* If OP0 is a multi-word register, narrow it to the affected word.
 852      If the region spans two words, defer to store_split_bit_field.  */
 853   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 854     {
 855       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
 856                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
 857       gcc_assert (op0);
 858       bitnum %= BITS_PER_WORD;
 859       if (bitnum + bitsize > BITS_PER_WORD)
 860         {
 861           if (!fallback_p)
 862             return false;
 863
 864           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
 865                                  bitregion_end, value);
 866           return true;
 867         }
 868     }
 869
 870   /* From here on we can assume that the field to be stored in fits
 871      within a word.  If the destination is a register, it too fits
 872      in a word.  */
 873
 874   extraction_insn insv;
 875   if (!MEM_P (op0)
 876       && get_best_reg_extraction_insn (&insv, EP_insv,
 877                                        GET_MODE_BITSIZE (GET_MODE (op0)),
 878                                        fieldmode)
 879       && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 880     return true;
 881
 882   /* If OP0 is a memory, try copying it to a register and seeing if a
 883      cheap register alternative is available.  */
 884   if (MEM_P (op0))
 885     {
 886       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
 887                                         fieldmode)
 888           && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 889         return true;
 890
 891       rtx last = get_last_insn ();
 892
 893       /* Try loading part of OP0 into a register, inserting the bitfield
 894          into that, and then copying the result back to OP0.  */
 895       unsigned HOST_WIDE_INT bitpos;
 896       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
 897                                                bitregion_start, bitregion_end,
 898                                                fieldmode, &bitpos);
 899       if (xop0)
 900         {
 901           rtx tempreg = copy_to_reg (xop0);
 902           if (store_bit_field_1 (tempreg, bitsize, bitpos,
 903                                  bitregion_start, bitregion_end,
 904                                  fieldmode, orig_value, false))
 905             {
 906               emit_move_insn (xop0, tempreg);
 907               return true;
 908             }
 909           delete_insns_since (last);
 910         }
 911     }
 912
 913   if (!fallback_p)
 914     return false;
 915
 916   store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
 917                          bitregion_end, value);
 918   return true;
 919 }
 920
 921 /* Generate code to store value from rtx VALUE
 922    into a bit-field within structure STR_RTX
 923    containing BITSIZE bits starting at bit BITNUM.
 924
 925    BITREGION_START is bitpos of the first bitfield in this region.
 926    BITREGION_END is the bitpos of the ending bitfield in this region.
 927    These two fields are 0, if the C++ memory model does not apply,
 928    or we are not interested in keeping track of bitfield regions.
 929
 930    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.  */
 931
 932 void
 933 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 934                  unsigned HOST_WIDE_INT bitnum,
 935                  unsigned HOST_WIDE_INT bitregion_start,
 936                  unsigned HOST_WIDE_INT bitregion_end,
 937                  enum machine_mode fieldmode,
 938                  rtx value)
 939 {
 940   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
 941   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode,
 942                                   bitregion_start, bitregion_end))
 943     {
 944       /* Storing any naturally aligned field can be done with a simple
 945          store.  For targets that support fast unaligned memory, any
 946          naturally sized, unit aligned field can be done directly.  */
 947       if (simple_mem_bitfield_p (str_rtx, bitsize, bitnum, fieldmode))
 948         {
 949           str_rtx = adjust_bitfield_address (str_rtx, fieldmode,
 950                                              bitnum / BITS_PER_UNIT);
 951           emit_move_insn (str_rtx, value);
 952         }
 953       else
 954         {
 955           str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum,
 956                                           &bitnum);
 957           /* Explicitly override the C/C++ memory model; ignore the
 958              bit range so that we can do the access in the mode mandated
 959              by -fstrict-volatile-bitfields instead.  */
 960           store_fixed_bit_field_1 (str_rtx, bitsize, bitnum, value);
 961         }
 962
 963       return;
 964     }
 965
 966   /* Under the C++0x memory model, we must not touch bits outside the
 967      bit region.  Adjust the address to start at the beginning of the
 968      bit region.  */
 969   if (MEM_P (str_rtx) && bitregion_start > 0)
 970     {
 971       enum machine_mode bestmode;
 972       HOST_WIDE_INT offset, size;
 973
 974       gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
 975
 976       offset = bitregion_start / BITS_PER_UNIT;
 977       bitnum -= bitregion_start;
 978       size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
 979       bitregion_end -= bitregion_start;
 980       bitregion_start = 0;
 981       bestmode = get_best_mode (bitsize, bitnum,
 982                                 bitregion_start, bitregion_end,
 983                                 MEM_ALIGN (str_rtx), VOIDmode,
 984                                 MEM_VOLATILE_P (str_rtx));
 985       str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
 986     }
 987
 988   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
 989                           bitregion_start, bitregion_end,
 990                           fieldmode, value, true))
 991     gcc_unreachable ();
 992 }
 993 \f
 994 /* Use shifts and boolean operations to store VALUE into a bit field of
 995    width BITSIZE in OP0, starting at bit BITNUM.  */
 996
 997 static void
 998 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
 999                        unsigned HOST_WIDE_INT bitnum,
1000                        unsigned HOST_WIDE_INT bitregion_start,
1001                        unsigned HOST_WIDE_INT bitregion_end,
1002                        rtx value)
1003 {
1004   /* There is a case not handled here:
1005      a structure with a known alignment of just a halfword
1006      and a field split across two aligned halfwords within the structure.
1007      Or likewise a structure with a known alignment of just a byte
1008      and a field split across two bytes.
1009      Such cases are not supposed to be able to occur.  */
1010
1011   if (MEM_P (op0))
1012     {
1013       enum machine_mode mode = GET_MODE (op0);
1014       if (GET_MODE_BITSIZE (mode) == 0
1015           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
1016         mode = word_mode;
1017       mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1018                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
1019
1020       if (mode == VOIDmode)
1021         {
1022           /* The only way this should occur is if the field spans word
1023              boundaries.  */
1024           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
1025                                  bitregion_end, value);
1026           return;
1027         }
1028
1029       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1030     }
1031
1032   store_fixed_bit_field_1 (op0, bitsize, bitnum, value);
1033 }
1034
1035 /* Helper function for store_fixed_bit_field, stores
1036    the bit field always using the MODE of OP0.  */
1037
1038 static void
1039 store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize,
1040                          unsigned HOST_WIDE_INT bitnum,
1041                          rtx value)
1042 {
1043   enum machine_mode mode;
1044   rtx temp;
1045   int all_zero = 0;
1046   int all_one = 0;
1047
1048   mode = GET_MODE (op0);
1049   gcc_assert (SCALAR_INT_MODE_P (mode));
1050
1051   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1052      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1053
1054   if (BYTES_BIG_ENDIAN)
1055     /* BITNUM is the distance between our msb
1056        and that of the containing datum.
1057        Convert it to the distance from the lsb.  */
1058     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1059
1060   /* Now BITNUM is always the distance between our lsb
1061      and that of OP0.  */
1062
1063   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1064      we must first convert its mode to MODE.  */
1065
1066   if (CONST_INT_P (value))
1067     {
1068       HOST_WIDE_INT v = INTVAL (value);
1069
1070       if (bitsize < HOST_BITS_PER_WIDE_INT)
1071         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
1072
1073       if (v == 0)
1074         all_zero = 1;
1075       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1076                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
1077                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
1078         all_one = 1;
1079
1080       value = lshift_value (mode, v, bitnum);
1081     }
1082   else
1083     {
1084       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1085                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1086
1087       if (GET_MODE (value) != mode)
1088         value = convert_to_mode (mode, value, 1);
1089
1090       if (must_and)
1091         value = expand_binop (mode, and_optab, value,
1092                               mask_rtx (mode, 0, bitsize, 0),
1093                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1094       if (bitnum > 0)
1095         value = expand_shift (LSHIFT_EXPR, mode, value,
1096                               bitnum, NULL_RTX, 1);
1097     }
1098
1099   /* Now clear the chosen bits in OP0,
1100      except that if VALUE is -1 we need not bother.  */
1101   /* We keep the intermediates in registers to allow CSE to combine
1102      consecutive bitfield assignments.  */
1103
1104   temp = force_reg (mode, op0);
1105
1106   if (! all_one)
1107     {
1108       temp = expand_binop (mode, and_optab, temp,
1109                            mask_rtx (mode, bitnum, bitsize, 1),
1110                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1111       temp = force_reg (mode, temp);
1112     }
1113
1114   /* Now logical-or VALUE into OP0, unless it is zero.  */
1115
1116   if (! all_zero)
1117     {
1118       temp = expand_binop (mode, ior_optab, temp, value,
1119                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1120       temp = force_reg (mode, temp);
1121     }
1122
1123   if (op0 != temp)
1124     {
1125       op0 = copy_rtx (op0);
1126       emit_move_insn (op0, temp);
1127     }
1128 }
1129 \f
1130 /* Store a bit field that is split across multiple accessible memory objects.
1131
1132    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1133    BITSIZE is the field width; BITPOS the position of its first bit
1134    (within the word).
1135    VALUE is the value to store.
1136
1137    This does not yet handle fields wider than BITS_PER_WORD.  */
1138
1139 static void
1140 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1141                        unsigned HOST_WIDE_INT bitpos,
1142                        unsigned HOST_WIDE_INT bitregion_start,
1143                        unsigned HOST_WIDE_INT bitregion_end,
1144                        rtx value)
1145 {
1146   unsigned int unit;
1147   unsigned int bitsdone = 0;
1148
1149   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1150      much at a time.  */
1151   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1152     unit = BITS_PER_WORD;
1153   else
1154     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1155
1156   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1157      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1158      again, and we will mutually recurse forever.  */
1159   if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1160     unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0)));
1161
1162   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1163      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1164      that VALUE might be a floating-point constant.  */
1165   if (CONSTANT_P (value) && !CONST_INT_P (value))
1166     {
1167       rtx word = gen_lowpart_common (word_mode, value);
1168
1169       if (word && (value != word))
1170         value = word;
1171       else
1172         value = gen_lowpart_common (word_mode,
1173                                     force_reg (GET_MODE (value) != VOIDmode
1174                                                ? GET_MODE (value)
1175                                                : word_mode, value));
1176     }
1177
1178   while (bitsdone < bitsize)
1179     {
1180       unsigned HOST_WIDE_INT thissize;
1181       rtx part, word;
1182       unsigned HOST_WIDE_INT thispos;
1183       unsigned HOST_WIDE_INT offset;
1184
1185       offset = (bitpos + bitsdone) / unit;
1186       thispos = (bitpos + bitsdone) % unit;
1187
1188       /* When region of bytes we can touch is restricted, decrease
1189          UNIT close to the end of the region as needed.  If op0 is a REG
1190          or SUBREG of REG, don't do this, as there can't be data races
1191          on a register and we can expand shorter code in some cases.  */
1192       if (bitregion_end
1193           && unit > BITS_PER_UNIT
1194           && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1195           && !REG_P (op0)
1196           && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1197         {
1198           unit = unit / 2;
1199           continue;
1200         }
1201
1202       /* THISSIZE must not overrun a word boundary.  Otherwise,
1203          store_fixed_bit_field will call us again, and we will mutually
1204          recurse forever.  */
1205       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1206       thissize = MIN (thissize, unit - thispos);
1207
1208       if (BYTES_BIG_ENDIAN)
1209         {
1210           /* Fetch successively less significant portions.  */
1211           if (CONST_INT_P (value))
1212             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1213                              >> (bitsize - bitsdone - thissize))
1214                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1215           else
1216             {
1217               int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1218               /* The args are chosen so that the last part includes the
1219                  lsb.  Give extract_bit_field the value it needs (with
1220                  endianness compensation) to fetch the piece we want.  */
1221               part = extract_fixed_bit_field (word_mode, value, thissize,
1222                                               total_bits - bitsize + bitsdone,
1223                                               NULL_RTX, 1);
1224             }
1225         }
1226       else
1227         {
1228           /* Fetch successively more significant portions.  */
1229           if (CONST_INT_P (value))
1230             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1231                              >> bitsdone)
1232                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1233           else
1234             part = extract_fixed_bit_field (word_mode, value, thissize,
1235                                             bitsdone, NULL_RTX, 1);
1236         }
1237
1238       /* If OP0 is a register, then handle OFFSET here.
1239
1240          When handling multiword bitfields, extract_bit_field may pass
1241          down a word_mode SUBREG of a larger REG for a bitfield that actually
1242          crosses a word boundary.  Thus, for a SUBREG, we must find
1243          the current word starting from the base register.  */
1244       if (GET_CODE (op0) == SUBREG)
1245         {
1246           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD)
1247                             + (offset * unit / BITS_PER_WORD);
1248           enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1249           if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1250             word = word_offset ? const0_rtx : op0;
1251           else
1252             word = operand_subword_force (SUBREG_REG (op0), word_offset,
1253                                           GET_MODE (SUBREG_REG (op0)));
1254           offset &= BITS_PER_WORD / unit - 1;
1255         }
1256       else if (REG_P (op0))
1257         {
1258           enum machine_mode op0_mode = GET_MODE (op0);
1259           if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1260             word = offset ? const0_rtx : op0;
1261           else
1262             word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1263                                           GET_MODE (op0));
1264           offset &= BITS_PER_WORD / unit - 1;
1265         }
1266       else
1267         word = op0;
1268
1269       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1270          it is just an out-of-bounds access.  Ignore it.  */
1271       if (word != const0_rtx)
1272         store_fixed_bit_field (word, thissize, offset * unit + thispos,
1273                                bitregion_start, bitregion_end, part);
1274       bitsdone += thissize;
1275     }
1276 }
1277 \f
1278 /* A subroutine of extract_bit_field_1 that converts return value X
1279    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1280    to extract_bit_field.  */
1281
1282 static rtx
1283 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1284                              enum machine_mode tmode, bool unsignedp)
1285 {
1286   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1287     return x;
1288
1289   /* If the x mode is not a scalar integral, first convert to the
1290      integer mode of that size and then access it as a floating-point
1291      value via a SUBREG.  */
1292   if (!SCALAR_INT_MODE_P (tmode))
1293     {
1294       enum machine_mode smode;
1295
1296       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1297       x = convert_to_mode (smode, x, unsignedp);
1298       x = force_reg (smode, x);
1299       return gen_lowpart (tmode, x);
1300     }
1301
1302   return convert_to_mode (tmode, x, unsignedp);
1303 }
1304
1305 /* Try to use an ext(z)v pattern to extract a field from OP0.
1306    Return the extracted value on success, otherwise return null.
1307    EXT_MODE is the mode of the extraction and the other arguments
1308    are as for extract_bit_field.  */
1309
1310 static rtx
1311 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1312                               unsigned HOST_WIDE_INT bitsize,
1313                               unsigned HOST_WIDE_INT bitnum,
1314                               int unsignedp, rtx target,
1315                               enum machine_mode mode, enum machine_mode tmode)
1316 {
1317   struct expand_operand ops[4];
1318   rtx spec_target = target;
1319   rtx spec_target_subreg = 0;
1320   enum machine_mode ext_mode = extv->field_mode;
1321   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1322
1323   if (bitsize == 0 || unit < bitsize)
1324     return NULL_RTX;
1325
1326   if (MEM_P (op0))
1327     /* Get a reference to the first byte of the field.  */
1328     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1329                                 &bitnum);
1330   else
1331     {
1332       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1333       if (BYTES_BIG_ENDIAN)
1334         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1335
1336       /* If op0 is a register, we need it in EXT_MODE to make it
1337          acceptable to the format of ext(z)v.  */
1338       if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1339         return NULL_RTX;
1340       if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1341         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1342     }
1343
1344   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1345      "backwards" from the size of the unit we are extracting from.
1346      Otherwise, we count bits from the most significant on a
1347      BYTES/BITS_BIG_ENDIAN machine.  */
1348
1349   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1350     bitnum = unit - bitsize - bitnum;
1351
1352   if (target == 0)
1353     target = spec_target = gen_reg_rtx (tmode);
1354
1355   if (GET_MODE (target) != ext_mode)
1356     {
1357       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1358          between the mode of the extraction (word_mode) and the target
1359          mode.  Instead, create a temporary and use convert_move to set
1360          the target.  */
1361       if (REG_P (target)
1362           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1363         {
1364           target = gen_lowpart (ext_mode, target);
1365           if (GET_MODE_PRECISION (ext_mode)
1366               > GET_MODE_PRECISION (GET_MODE (spec_target)))
1367             spec_target_subreg = target;
1368         }
1369       else
1370         target = gen_reg_rtx (ext_mode);
1371     }
1372
1373   create_output_operand (&ops[0], target, ext_mode);
1374   create_fixed_operand (&ops[1], op0);
1375   create_integer_operand (&ops[2], bitsize);
1376   create_integer_operand (&ops[3], bitnum);
1377   if (maybe_expand_insn (extv->icode, 4, ops))
1378     {
1379       target = ops[0].value;
1380       if (target == spec_target)
1381         return target;
1382       if (target == spec_target_subreg)
1383         return spec_target;
1384       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1385     }
1386   return NULL_RTX;
1387 }
1388
1389 /* A subroutine of extract_bit_field, with the same arguments.
1390    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1391    if we can find no other means of implementing the operation.
1392    if FALLBACK_P is false, return NULL instead.  */
1393
1394 static rtx
1395 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1396                      unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1397                      enum machine_mode mode, enum machine_mode tmode,
1398                      bool fallback_p)
1399 {
1400   rtx op0 = str_rtx;
1401   enum machine_mode int_mode;
1402   enum machine_mode mode1;
1403
1404   if (tmode == VOIDmode)
1405     tmode = mode;
1406
1407   while (GET_CODE (op0) == SUBREG)
1408     {
1409       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1410       op0 = SUBREG_REG (op0);
1411     }
1412
1413   /* If we have an out-of-bounds access to a register, just return an
1414      uninitialized register of the required mode.  This can occur if the
1415      source code contains an out-of-bounds access to a small array.  */
1416   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1417     return gen_reg_rtx (tmode);
1418
1419   if (REG_P (op0)
1420       && mode == GET_MODE (op0)
1421       && bitnum == 0
1422       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1423     {
1424       /* We're trying to extract a full register from itself.  */
1425       return op0;
1426     }
1427
1428   /* See if we can get a better vector mode before extracting.  */
1429   if (VECTOR_MODE_P (GET_MODE (op0))
1430       && !MEM_P (op0)
1431       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1432     {
1433       enum machine_mode new_mode;
1434
1435       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1436         new_mode = MIN_MODE_VECTOR_FLOAT;
1437       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1438         new_mode = MIN_MODE_VECTOR_FRACT;
1439       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1440         new_mode = MIN_MODE_VECTOR_UFRACT;
1441       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1442         new_mode = MIN_MODE_VECTOR_ACCUM;
1443       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1444         new_mode = MIN_MODE_VECTOR_UACCUM;
1445       else
1446         new_mode = MIN_MODE_VECTOR_INT;
1447
1448       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1449         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1450             && targetm.vector_mode_supported_p (new_mode))
1451           break;
1452       if (new_mode != VOIDmode)
1453         op0 = gen_lowpart (new_mode, op0);
1454     }
1455
1456   /* Use vec_extract patterns for extracting parts of vectors whenever
1457      available.  */
1458   if (VECTOR_MODE_P (GET_MODE (op0))
1459       && !MEM_P (op0)
1460       && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1461       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1462           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1463     {
1464       struct expand_operand ops[3];
1465       enum machine_mode outermode = GET_MODE (op0);
1466       enum machine_mode innermode = GET_MODE_INNER (outermode);
1467       enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1468       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1469
1470       create_output_operand (&ops[0], target, innermode);
1471       create_input_operand (&ops[1], op0, outermode);
1472       create_integer_operand (&ops[2], pos);
1473       if (maybe_expand_insn (icode, 3, ops))
1474         {
1475           target = ops[0].value;
1476           if (GET_MODE (target) != mode)
1477             return gen_lowpart (tmode, target);
1478           return target;
1479         }
1480     }
1481
1482   /* Make sure we are playing with integral modes.  Pun with subregs
1483      if we aren't.  */
1484   {
1485     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1486     if (imode != GET_MODE (op0))
1487       {
1488         if (MEM_P (op0))
1489           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1490         else if (imode != BLKmode)
1491           {
1492             op0 = gen_lowpart (imode, op0);
1493
1494             /* If we got a SUBREG, force it into a register since we
1495                aren't going to be able to do another SUBREG on it.  */
1496             if (GET_CODE (op0) == SUBREG)
1497               op0 = force_reg (imode, op0);
1498           }
1499         else if (REG_P (op0))
1500           {
1501             rtx reg, subreg;
1502             imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1503                                             MODE_INT);
1504             reg = gen_reg_rtx (imode);
1505             subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1506             emit_move_insn (subreg, op0);
1507             op0 = reg;
1508             bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1509           }
1510         else
1511           {
1512             HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1513             rtx mem = assign_stack_temp (GET_MODE (op0), size);
1514             emit_move_insn (mem, op0);
1515             op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1516           }
1517       }
1518   }
1519
1520   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1521      If that's wrong, the solution is to test for it and set TARGET to 0
1522      if needed.  */
1523
1524   /* Get the mode of the field to use for atomic access or subreg
1525      conversion.  */
1526   mode1 = mode;
1527   if (SCALAR_INT_MODE_P (tmode))
1528     {
1529       enum machine_mode try_mode = mode_for_size (bitsize,
1530                                                   GET_MODE_CLASS (tmode), 0);
1531       if (try_mode != BLKmode)
1532         mode1 = try_mode;
1533     }
1534   gcc_assert (mode1 != BLKmode);
1535
1536   /* Extraction of a full MODE1 value can be done with a subreg as long
1537      as the least significant bit of the value is the least significant
1538      bit of either OP0 or a word of OP0.  */
1539   if (!MEM_P (op0)
1540       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1541       && bitsize == GET_MODE_BITSIZE (mode1)
1542       && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1543     {
1544       rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1545                                      bitnum / BITS_PER_UNIT);
1546       if (sub)
1547         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1548     }
1549
1550   /* Extraction of a full MODE1 value can be done with a load as long as
1551      the field is on a byte boundary and is sufficiently aligned.  */
1552   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1553     {
1554       op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1555       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1556     }
1557
1558   /* Handle fields bigger than a word.  */
1559
1560   if (bitsize > BITS_PER_WORD)
1561     {
1562       /* Here we transfer the words of the field
1563          in the order least significant first.
1564          This is because the most significant word is the one which may
1565          be less than full.  */
1566
1567       unsigned int backwards = WORDS_BIG_ENDIAN;
1568       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1569       unsigned int i;
1570       rtx last;
1571
1572       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1573         target = gen_reg_rtx (mode);
1574
1575       /* Indicate for flow that the entire target reg is being set.  */
1576       emit_clobber (target);
1577
1578       last = get_last_insn ();
1579       for (i = 0; i < nwords; i++)
1580         {
1581           /* If I is 0, use the low-order word in both field and target;
1582              if I is 1, use the next to lowest word; and so on.  */
1583           /* Word number in TARGET to use.  */
1584           unsigned int wordnum
1585             = (backwards
1586                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1587                : i);
1588           /* Offset from start of field in OP0.  */
1589           unsigned int bit_offset = (backwards
1590                                      ? MAX ((int) bitsize - ((int) i + 1)
1591                                             * BITS_PER_WORD,
1592                                             0)
1593                                      : (int) i * BITS_PER_WORD);
1594           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1595           rtx result_part
1596             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1597                                              bitsize - i * BITS_PER_WORD),
1598                                    bitnum + bit_offset, 1, target_part,
1599                                    mode, word_mode, fallback_p);
1600
1601           gcc_assert (target_part);
1602           if (!result_part)
1603             {
1604               delete_insns_since (last);
1605               return NULL;
1606             }
1607
1608           if (result_part != target_part)
1609             emit_move_insn (target_part, result_part);
1610         }
1611
1612       if (unsignedp)
1613         {
1614           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1615              need to be zero'd out.  */
1616           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1617             {
1618               unsigned int i, total_words;
1619
1620               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1621               for (i = nwords; i < total_words; i++)
1622                 emit_move_insn
1623                   (operand_subword (target,
1624                                     backwards ? total_words - i - 1 : i,
1625                                     1, VOIDmode),
1626                    const0_rtx);
1627             }
1628           return target;
1629         }
1630
1631       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1632       target = expand_shift (LSHIFT_EXPR, mode, target,
1633                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1634       return expand_shift (RSHIFT_EXPR, mode, target,
1635                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1636     }
1637
1638   /* If OP0 is a multi-word register, narrow it to the affected word.
1639      If the region spans two words, defer to extract_split_bit_field.  */
1640   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1641     {
1642       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1643                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1644       bitnum %= BITS_PER_WORD;
1645       if (bitnum + bitsize > BITS_PER_WORD)
1646         {
1647           if (!fallback_p)
1648             return NULL_RTX;
1649           target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1650           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1651         }
1652     }
1653
1654   /* From here on we know the desired field is smaller than a word.
1655      If OP0 is a register, it too fits within a word.  */
1656   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1657   extraction_insn extv;
1658   if (!MEM_P (op0)
1659       /* ??? We could limit the structure size to the part of OP0 that
1660          contains the field, with appropriate checks for endianness
1661          and TRULY_NOOP_TRUNCATION.  */
1662       && get_best_reg_extraction_insn (&extv, pattern,
1663                                        GET_MODE_BITSIZE (GET_MODE (op0)),
1664                                        tmode))
1665     {
1666       rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1667                                                  unsignedp, target, mode,
1668                                                  tmode);
1669       if (result)
1670         return result;
1671     }
1672
1673   /* If OP0 is a memory, try copying it to a register and seeing if a
1674      cheap register alternative is available.  */
1675   if (MEM_P (op0))
1676     {
1677       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1678                                         tmode))
1679         {
1680           rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1681                                                      bitnum, unsignedp,
1682                                                      target, mode,
1683                                                      tmode);
1684           if (result)
1685             return result;
1686         }
1687
1688       rtx last = get_last_insn ();
1689
1690       /* Try loading part of OP0 into a register and extracting the
1691          bitfield from that.  */
1692       unsigned HOST_WIDE_INT bitpos;
1693       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1694                                                0, 0, tmode, &bitpos);
1695       if (xop0)
1696         {
1697           xop0 = copy_to_reg (xop0);
1698           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1699                                             unsignedp, target,
1700                                             mode, tmode, false);
1701           if (result)
1702             return result;
1703           delete_insns_since (last);
1704         }
1705     }
1706
1707   if (!fallback_p)
1708     return NULL;
1709
1710   /* Find a correspondingly-sized integer field, so we can apply
1711      shifts and masks to it.  */
1712   int_mode = int_mode_for_mode (tmode);
1713   if (int_mode == BLKmode)
1714     int_mode = int_mode_for_mode (mode);
1715   /* Should probably push op0 out to memory and then do a load.  */
1716   gcc_assert (int_mode != BLKmode);
1717
1718   target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum,
1719                                     target, unsignedp);
1720   return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1721 }
1722
1723 /* Generate code to extract a byte-field from STR_RTX
1724    containing BITSIZE bits, starting at BITNUM,
1725    and put it in TARGET if possible (if TARGET is nonzero).
1726    Regardless of TARGET, we return the rtx for where the value is placed.
1727
1728    STR_RTX is the structure containing the byte (a REG or MEM).
1729    UNSIGNEDP is nonzero if this is an unsigned bit field.
1730    MODE is the natural mode of the field value once extracted.
1731    TMODE is the mode the caller would like the value to have;
1732    but the value may be returned with type MODE instead.
1733
1734    If a TARGET is specified and we can store in it at no extra cost,
1735    we do so, and return TARGET.
1736    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1737    if they are equally easy.  */
1738
1739 rtx
1740 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1741                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1742                    enum machine_mode mode, enum machine_mode tmode)
1743 {
1744   enum machine_mode mode1;
1745
1746   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1747   if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1748     mode1 = GET_MODE (str_rtx);
1749   else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1750     mode1 = GET_MODE (target);
1751   else
1752     mode1 = tmode;
1753
1754   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0))
1755     {
1756       rtx result;
1757
1758       /* Extraction of a full MODE1 value can be done with a load as long as
1759          the field is on a byte boundary and is sufficiently aligned.  */
1760       if (simple_mem_bitfield_p (str_rtx, bitsize, bitnum, mode1))
1761         result = adjust_bitfield_address (str_rtx, mode1,
1762                                           bitnum / BITS_PER_UNIT);
1763       else
1764         {
1765           str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum,
1766                                           &bitnum);
1767           result = extract_fixed_bit_field_1 (mode, str_rtx, bitsize, bitnum,
1768                                               target, unsignedp);
1769         }
1770
1771       return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1772     }
1773
1774   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1775                               target, mode, tmode, true);
1776 }
1777 \f
1778 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1779    from bit BITNUM of OP0.
1780
1781    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1782    If TARGET is nonzero, attempts to store the value there
1783    and return TARGET, but this is not guaranteed.
1784    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1785
1786 static rtx
1787 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1788                          unsigned HOST_WIDE_INT bitsize,
1789                          unsigned HOST_WIDE_INT bitnum, rtx target,
1790                          int unsignedp)
1791 {
1792   if (MEM_P (op0))
1793     {
1794       enum machine_mode mode
1795         = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode,
1796                          MEM_VOLATILE_P (op0));
1797
1798       if (mode == VOIDmode)
1799         /* The only way this should occur is if the field spans word
1800            boundaries.  */
1801         return extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1802
1803       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1804     }
1805
1806   return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum,
1807                                     target, unsignedp);
1808 }
1809
1810 /* Helper function for extract_fixed_bit_field, extracts
1811    the bit field always using the MODE of OP0.  */
1812
1813 static rtx
1814 extract_fixed_bit_field_1 (enum machine_mode tmode, rtx op0,
1815                            unsigned HOST_WIDE_INT bitsize,
1816                            unsigned HOST_WIDE_INT bitnum, rtx target,
1817                            int unsignedp)
1818 {
1819   enum machine_mode mode = GET_MODE (op0);
1820   gcc_assert (SCALAR_INT_MODE_P (mode));
1821
1822   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1823      for invalid input, such as extract equivalent of f5 from
1824      gcc.dg/pr48335-2.c.  */
1825
1826   if (BYTES_BIG_ENDIAN)
1827     /* BITNUM is the distance between our msb and that of OP0.
1828        Convert it to the distance from the lsb.  */
1829     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1830
1831   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1832      We have reduced the big-endian case to the little-endian case.  */
1833
1834   if (unsignedp)
1835     {
1836       if (bitnum)
1837         {
1838           /* If the field does not already start at the lsb,
1839              shift it so it does.  */
1840           /* Maybe propagate the target for the shift.  */
1841           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1842           if (tmode != mode)
1843             subtarget = 0;
1844           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
1845         }
1846       /* Convert the value to the desired mode.  */
1847       if (mode != tmode)
1848         op0 = convert_to_mode (tmode, op0, 1);
1849
1850       /* Unless the msb of the field used to be the msb when we shifted,
1851          mask out the upper bits.  */
1852
1853       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
1854         return expand_binop (GET_MODE (op0), and_optab, op0,
1855                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1856                              target, 1, OPTAB_LIB_WIDEN);
1857       return op0;
1858     }
1859
1860   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1861      then arithmetic-shift its lsb to the lsb of the word.  */
1862   op0 = force_reg (mode, op0);
1863
1864   /* Find the narrowest integer mode that contains the field.  */
1865
1866   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1867        mode = GET_MODE_WIDER_MODE (mode))
1868     if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
1869       {
1870         op0 = convert_to_mode (mode, op0, 0);
1871         break;
1872       }
1873
1874   if (mode != tmode)
1875     target = 0;
1876
1877   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
1878     {
1879       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
1880       /* Maybe propagate the target for the shift.  */
1881       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1882       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1883     }
1884
1885   return expand_shift (RSHIFT_EXPR, mode, op0,
1886                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1887 }
1888 \f
1889 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1890    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1891    complement of that if COMPLEMENT.  The mask is truncated if
1892    necessary to the width of mode MODE.  The mask is zero-extended if
1893    BITSIZE+BITPOS is too small for MODE.  */
1894
1895 static rtx
1896 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1897 {
1898   double_int mask;
1899
1900   mask = double_int::mask (bitsize);
1901   mask = mask.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);
1902
1903   if (complement)
1904     mask = ~mask;
1905
1906   return immed_double_int_const (mask, mode);
1907 }
1908
1909 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1910    VALUE << BITPOS.  */
1911
1912 static rtx
1913 lshift_value (enum machine_mode mode, unsigned HOST_WIDE_INT value,
1914               int bitpos)
1915 {
1916   double_int val;
1917
1918   val = double_int::from_uhwi (value);
1919   val = val.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);
1920
1921   return immed_double_int_const (val, mode);
1922 }
1923 \f
1924 /* Extract a bit field that is split across two words
1925    and return an RTX for the result.
1926
1927    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1928    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1929    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1930
1931 static rtx
1932 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1933                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1934 {
1935   unsigned int unit;
1936   unsigned int bitsdone = 0;
1937   rtx result = NULL_RTX;
1938   int first = 1;
1939
1940   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1941      much at a time.  */
1942   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1943     unit = BITS_PER_WORD;
1944   else
1945     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1946
1947   while (bitsdone < bitsize)
1948     {
1949       unsigned HOST_WIDE_INT thissize;
1950       rtx part, word;
1951       unsigned HOST_WIDE_INT thispos;
1952       unsigned HOST_WIDE_INT offset;
1953
1954       offset = (bitpos + bitsdone) / unit;
1955       thispos = (bitpos + bitsdone) % unit;
1956
1957       /* THISSIZE must not overrun a word boundary.  Otherwise,
1958          extract_fixed_bit_field will call us again, and we will mutually
1959          recurse forever.  */
1960       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1961       thissize = MIN (thissize, unit - thispos);
1962
1963       /* If OP0 is a register, then handle OFFSET here.
1964
1965          When handling multiword bitfields, extract_bit_field may pass
1966          down a word_mode SUBREG of a larger REG for a bitfield that actually
1967          crosses a word boundary.  Thus, for a SUBREG, we must find
1968          the current word starting from the base register.  */
1969       if (GET_CODE (op0) == SUBREG)
1970         {
1971           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1972           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1973                                         GET_MODE (SUBREG_REG (op0)));
1974           offset = 0;
1975         }
1976       else if (REG_P (op0))
1977         {
1978           word = operand_subword_force (op0, offset, GET_MODE (op0));
1979           offset = 0;
1980         }
1981       else
1982         word = op0;
1983
1984       /* Extract the parts in bit-counting order,
1985          whose meaning is determined by BYTES_PER_UNIT.
1986          OFFSET is in UNITs, and UNIT is in bits.  */
1987       part = extract_fixed_bit_field (word_mode, word, thissize,
1988                                       offset * unit + thispos, 0, 1);
1989       bitsdone += thissize;
1990
1991       /* Shift this part into place for the result.  */
1992       if (BYTES_BIG_ENDIAN)
1993         {
1994           if (bitsize != bitsdone)
1995             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1996                                  bitsize - bitsdone, 0, 1);
1997         }
1998       else
1999         {
2000           if (bitsdone != thissize)
2001             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2002                                  bitsdone - thissize, 0, 1);
2003         }
2004
2005       if (first)
2006         result = part;
2007       else
2008         /* Combine the parts with bitwise or.  This works
2009            because we extracted each part as an unsigned bit field.  */
2010         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2011                                OPTAB_LIB_WIDEN);
2012
2013       first = 0;
2014     }
2015
2016   /* Unsigned bit field: we are done.  */
2017   if (unsignedp)
2018     return result;
2019   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2020   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2021                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2022   return expand_shift (RSHIFT_EXPR, word_mode, result,
2023                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2024 }
2025 \f
2026 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2027    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2028    MODE, fill the upper bits with zeros.  Fail if the layout of either
2029    mode is unknown (as for CC modes) or if the extraction would involve
2030    unprofitable mode punning.  Return the value on success, otherwise
2031    return null.
2032
2033    This is different from gen_lowpart* in these respects:
2034
2035      - the returned value must always be considered an rvalue
2036
2037      - when MODE is wider than SRC_MODE, the extraction involves
2038        a zero extension
2039
2040      - when MODE is smaller than SRC_MODE, the extraction involves
2041        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2042
2043    In other words, this routine performs a computation, whereas the
2044    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2045    operations.  */
2046
2047 rtx
2048 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
2049 {
2050   enum machine_mode int_mode, src_int_mode;
2051
2052   if (mode == src_mode)
2053     return src;
2054
2055   if (CONSTANT_P (src))
2056     {
2057       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2058          fails, it will happily create (subreg (symbol_ref)) or similar
2059          invalid SUBREGs.  */
2060       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2061       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2062       if (ret)
2063         return ret;
2064
2065       if (GET_MODE (src) == VOIDmode
2066           || !validate_subreg (mode, src_mode, src, byte))
2067         return NULL_RTX;
2068
2069       src = force_reg (GET_MODE (src), src);
2070       return gen_rtx_SUBREG (mode, src, byte);
2071     }
2072
2073   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2074     return NULL_RTX;
2075
2076   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2077       && MODES_TIEABLE_P (mode, src_mode))
2078     {
2079       rtx x = gen_lowpart_common (mode, src);
2080       if (x)
2081         return x;
2082     }
2083
2084   src_int_mode = int_mode_for_mode (src_mode);
2085   int_mode = int_mode_for_mode (mode);
2086   if (src_int_mode == BLKmode || int_mode == BLKmode)
2087     return NULL_RTX;
2088
2089   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2090     return NULL_RTX;
2091   if (!MODES_TIEABLE_P (int_mode, mode))
2092     return NULL_RTX;
2093
2094   src = gen_lowpart (src_int_mode, src);
2095   src = convert_modes (int_mode, src_int_mode, src, true);
2096   src = gen_lowpart (mode, src);
2097   return src;
2098 }
2099 \f
2100 /* Add INC into TARGET.  */
2101
2102 void
2103 expand_inc (rtx target, rtx inc)
2104 {
2105   rtx value = expand_binop (GET_MODE (target), add_optab,
2106                             target, inc,
2107                             target, 0, OPTAB_LIB_WIDEN);
2108   if (value != target)
2109     emit_move_insn (target, value);
2110 }
2111
2112 /* Subtract DEC from TARGET.  */
2113
2114 void
2115 expand_dec (rtx target, rtx dec)
2116 {
2117   rtx value = expand_binop (GET_MODE (target), sub_optab,
2118                             target, dec,
2119                             target, 0, OPTAB_LIB_WIDEN);
2120   if (value != target)
2121     emit_move_insn (target, value);
2122 }
2123 \f
2124 /* Output a shift instruction for expression code CODE,
2125    with SHIFTED being the rtx for the value to shift,
2126    and AMOUNT the rtx for the amount to shift by.
2127    Store the result in the rtx TARGET, if that is convenient.
2128    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2129    Return the rtx for where the value is.  */
2130
2131 static rtx
2132 expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted,
2133                 rtx amount, rtx target, int unsignedp)
2134 {
2135   rtx op1, temp = 0;
2136   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2137   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2138   optab lshift_optab = ashl_optab;
2139   optab rshift_arith_optab = ashr_optab;
2140   optab rshift_uns_optab = lshr_optab;
2141   optab lrotate_optab = rotl_optab;
2142   optab rrotate_optab = rotr_optab;
2143   enum machine_mode op1_mode;
2144   int attempt;
2145   bool speed = optimize_insn_for_speed_p ();
2146
2147   op1 = amount;
2148   op1_mode = GET_MODE (op1);
2149
2150   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2151      shift amount is a vector, use the vector/vector shift patterns.  */
2152   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2153     {
2154       lshift_optab = vashl_optab;
2155       rshift_arith_optab = vashr_optab;
2156       rshift_uns_optab = vlshr_optab;
2157       lrotate_optab = vrotl_optab;
2158       rrotate_optab = vrotr_optab;
2159     }
2160
2161   /* Previously detected shift-counts computed by NEGATE_EXPR
2162      and shifted in the other direction; but that does not work
2163      on all machines.  */
2164
2165   if (SHIFT_COUNT_TRUNCATED)
2166     {
2167       if (CONST_INT_P (op1)
2168           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2169               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2170         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2171                        % GET_MODE_BITSIZE (mode));
2172       else if (GET_CODE (op1) == SUBREG
2173                && subreg_lowpart_p (op1)
2174                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2175                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2176         op1 = SUBREG_REG (op1);
2177     }
2178
2179   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2180      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2181      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2182      amount instead.  */
2183   if (rotate
2184       && CONST_INT_P (op1)
2185       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (mode) / 2 + left,
2186                    GET_MODE_BITSIZE (mode) - 1))
2187     {
2188       op1 = GEN_INT (GET_MODE_BITSIZE (mode) - INTVAL (op1));
2189       left = !left;
2190       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2191     }
2192
2193   if (op1 == const0_rtx)
2194     return shifted;
2195
2196   /* Check whether its cheaper to implement a left shift by a constant
2197      bit count by a sequence of additions.  */
2198   if (code == LSHIFT_EXPR
2199       && CONST_INT_P (op1)
2200       && INTVAL (op1) > 0
2201       && INTVAL (op1) < GET_MODE_PRECISION (mode)
2202       && INTVAL (op1) < MAX_BITS_PER_WORD
2203       && (shift_cost (speed, mode, INTVAL (op1))
2204           > INTVAL (op1) * add_cost (speed, mode))
2205       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2206     {
2207       int i;
2208       for (i = 0; i < INTVAL (op1); i++)
2209         {
2210           temp = force_reg (mode, shifted);
2211           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2212                                   unsignedp, OPTAB_LIB_WIDEN);
2213         }
2214       return shifted;
2215     }
2216
2217   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2218     {
2219       enum optab_methods methods;
2220
2221       if (attempt == 0)
2222         methods = OPTAB_DIRECT;
2223       else if (attempt == 1)
2224         methods = OPTAB_WIDEN;
2225       else
2226         methods = OPTAB_LIB_WIDEN;
2227
2228       if (rotate)
2229         {
2230           /* Widening does not work for rotation.  */
2231           if (methods == OPTAB_WIDEN)
2232             continue;
2233           else if (methods == OPTAB_LIB_WIDEN)
2234             {
2235               /* If we have been unable to open-code this by a rotation,
2236                  do it as the IOR of two shifts.  I.e., to rotate A
2237                  by N bits, compute
2238                  (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2239                  where C is the bitsize of A.
2240
2241                  It is theoretically possible that the target machine might
2242                  not be able to perform either shift and hence we would
2243                  be making two libcalls rather than just the one for the
2244                  shift (similarly if IOR could not be done).  We will allow
2245                  this extremely unlikely lossage to avoid complicating the
2246                  code below.  */
2247
2248               rtx subtarget = target == shifted ? 0 : target;
2249               rtx new_amount, other_amount;
2250               rtx temp1;
2251
2252               new_amount = op1;
2253               if (op1 == const0_rtx)
2254                 return shifted;
2255               else if (CONST_INT_P (op1))
2256                 other_amount = GEN_INT (GET_MODE_BITSIZE (mode)
2257                                         - INTVAL (op1));
2258               else
2259                 {
2260                   other_amount
2261                     = simplify_gen_unary (NEG, GET_MODE (op1),
2262                                           op1, GET_MODE (op1));
2263                   HOST_WIDE_INT mask = GET_MODE_PRECISION (mode) - 1;
2264                   other_amount
2265                     = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2266                                            gen_int_mode (mask, GET_MODE (op1)));
2267                 }
2268
2269               shifted = force_reg (mode, shifted);
2270
2271               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2272                                      mode, shifted, new_amount, 0, 1);
2273               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2274                                       mode, shifted, other_amount,
2275                                       subtarget, 1);
2276               return expand_binop (mode, ior_optab, temp, temp1, target,
2277                                    unsignedp, methods);
2278             }
2279
2280           temp = expand_binop (mode,
2281                                left ? lrotate_optab : rrotate_optab,
2282                                shifted, op1, target, unsignedp, methods);
2283         }
2284       else if (unsignedp)
2285         temp = expand_binop (mode,
2286                              left ? lshift_optab : rshift_uns_optab,
2287                              shifted, op1, target, unsignedp, methods);
2288
2289       /* Do arithmetic shifts.
2290          Also, if we are going to widen the operand, we can just as well
2291          use an arithmetic right-shift instead of a logical one.  */
2292       if (temp == 0 && ! rotate
2293           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2294         {
2295           enum optab_methods methods1 = methods;
2296
2297           /* If trying to widen a log shift to an arithmetic shift,
2298              don't accept an arithmetic shift of the same size.  */
2299           if (unsignedp)
2300             methods1 = OPTAB_MUST_WIDEN;
2301
2302           /* Arithmetic shift */
2303
2304           temp = expand_binop (mode,
2305                                left ? lshift_optab : rshift_arith_optab,
2306                                shifted, op1, target, unsignedp, methods1);
2307         }
2308
2309       /* We used to try extzv here for logical right shifts, but that was
2310          only useful for one machine, the VAX, and caused poor code
2311          generation there for lshrdi3, so the code was deleted and a
2312          define_expand for lshrsi3 was added to vax.md.  */
2313     }
2314
2315   gcc_assert (temp);
2316   return temp;
2317 }
2318
2319 /* Output a shift instruction for expression code CODE,
2320    with SHIFTED being the rtx for the value to shift,
2321    and AMOUNT the amount to shift by.
2322    Store the result in the rtx TARGET, if that is convenient.
2323    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2324    Return the rtx for where the value is.  */
2325
2326 rtx
2327 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2328               int amount, rtx target, int unsignedp)
2329 {
2330   return expand_shift_1 (code, mode,
2331                          shifted, GEN_INT (amount), target, unsignedp);
2332 }
2333
2334 /* Output a shift instruction for expression code CODE,
2335    with SHIFTED being the rtx for the value to shift,
2336    and AMOUNT the tree for the amount to shift by.
2337    Store the result in the rtx TARGET, if that is convenient.
2338    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2339    Return the rtx for where the value is.  */
2340
2341 rtx
2342 expand_variable_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2343                        tree amount, rtx target, int unsignedp)
2344 {
2345   return expand_shift_1 (code, mode,
2346                          shifted, expand_normal (amount), target, unsignedp);
2347 }
2348
2349 \f
2350 /* Indicates the type of fixup needed after a constant multiplication.
2351    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2352    the result should be negated, and ADD_VARIANT means that the
2353    multiplicand should be added to the result.  */
2354 enum mult_variant {basic_variant, negate_variant, add_variant};
2355
2356 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2357                         const struct mult_cost *, enum machine_mode mode);
2358 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2359                                  struct algorithm *, enum mult_variant *, int);
2360 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2361                               const struct algorithm *, enum mult_variant);
2362 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2363 static rtx extract_high_half (enum machine_mode, rtx);
2364 static rtx expmed_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2365 static rtx expmed_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2366                                        int, int);
2367 /* Compute and return the best algorithm for multiplying by T.
2368    The algorithm must cost less than cost_limit
2369    If retval.cost >= COST_LIMIT, no algorithm was found and all
2370    other field of the returned struct are undefined.
2371    MODE is the machine mode of the multiplication.  */
2372
2373 static void
2374 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2375             const struct mult_cost *cost_limit, enum machine_mode mode)
2376 {
2377   int m;
2378   struct algorithm *alg_in, *best_alg;
2379   struct mult_cost best_cost;
2380   struct mult_cost new_limit;
2381   int op_cost, op_latency;
2382   unsigned HOST_WIDE_INT orig_t = t;
2383   unsigned HOST_WIDE_INT q;
2384   int maxm, hash_index;
2385   bool cache_hit = false;
2386   enum alg_code cache_alg = alg_zero;
2387   bool speed = optimize_insn_for_speed_p ();
2388   enum machine_mode imode;
2389   struct alg_hash_entry *entry_ptr;
2390
2391   /* Indicate that no algorithm is yet found.  If no algorithm
2392      is found, this value will be returned and indicate failure.  */
2393   alg_out->cost.cost = cost_limit->cost + 1;
2394   alg_out->cost.latency = cost_limit->latency + 1;
2395
2396   if (cost_limit->cost < 0
2397       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2398     return;
2399
2400   /* Be prepared for vector modes.  */
2401   imode = GET_MODE_INNER (mode);
2402   if (imode == VOIDmode)
2403     imode = mode;
2404
2405   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2406
2407   /* Restrict the bits of "t" to the multiplication's mode.  */
2408   t &= GET_MODE_MASK (imode);
2409
2410   /* t == 1 can be done in zero cost.  */
2411   if (t == 1)
2412     {
2413       alg_out->ops = 1;
2414       alg_out->cost.cost = 0;
2415       alg_out->cost.latency = 0;
2416       alg_out->op[0] = alg_m;
2417       return;
2418     }
2419
2420   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2421      fail now.  */
2422   if (t == 0)
2423     {
2424       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2425         return;
2426       else
2427         {
2428           alg_out->ops = 1;
2429           alg_out->cost.cost = zero_cost (speed);
2430           alg_out->cost.latency = zero_cost (speed);
2431           alg_out->op[0] = alg_zero;
2432           return;
2433         }
2434     }
2435
2436   /* We'll be needing a couple extra algorithm structures now.  */
2437
2438   alg_in = XALLOCA (struct algorithm);
2439   best_alg = XALLOCA (struct algorithm);
2440   best_cost = *cost_limit;
2441
2442   /* Compute the hash index.  */
2443   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2444
2445   /* See if we already know what to do for T.  */
2446   entry_ptr = alg_hash_entry_ptr (hash_index);
2447   if (entry_ptr->t == t
2448       && entry_ptr->mode == mode
2449       && entry_ptr->mode == mode
2450       && entry_ptr->speed == speed
2451       && entry_ptr->alg != alg_unknown)
2452     {
2453       cache_alg = entry_ptr->alg;
2454
2455       if (cache_alg == alg_impossible)
2456         {
2457           /* The cache tells us that it's impossible to synthesize
2458              multiplication by T within entry_ptr->cost.  */
2459           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2460             /* COST_LIMIT is at least as restrictive as the one
2461                recorded in the hash table, in which case we have no
2462                hope of synthesizing a multiplication.  Just
2463                return.  */
2464             return;
2465
2466           /* If we get here, COST_LIMIT is less restrictive than the
2467              one recorded in the hash table, so we may be able to
2468              synthesize a multiplication.  Proceed as if we didn't
2469              have the cache entry.  */
2470         }
2471       else
2472         {
2473           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2474             /* The cached algorithm shows that this multiplication
2475                requires more cost than COST_LIMIT.  Just return.  This
2476                way, we don't clobber this cache entry with
2477                alg_impossible but retain useful information.  */
2478             return;
2479
2480           cache_hit = true;
2481
2482           switch (cache_alg)
2483             {
2484             case alg_shift:
2485               goto do_alg_shift;
2486
2487             case alg_add_t_m2:
2488             case alg_sub_t_m2:
2489               goto do_alg_addsub_t_m2;
2490
2491             case alg_add_factor:
2492             case alg_sub_factor:
2493               goto do_alg_addsub_factor;
2494
2495             case alg_add_t2_m:
2496               goto do_alg_add_t2_m;
2497
2498             case alg_sub_t2_m:
2499               goto do_alg_sub_t2_m;
2500
2501             default:
2502               gcc_unreachable ();
2503             }
2504         }
2505     }
2506
2507   /* If we have a group of zero bits at the low-order part of T, try
2508      multiplying by the remaining bits and then doing a shift.  */
2509
2510   if ((t & 1) == 0)
2511     {
2512     do_alg_shift:
2513       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2514       if (m < maxm)
2515         {
2516           q = t >> m;
2517           /* The function expand_shift will choose between a shift and
2518              a sequence of additions, so the observed cost is given as
2519              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2520           op_cost = m * add_cost (speed, mode);
2521           if (shift_cost (speed, mode, m) < op_cost)
2522             op_cost = shift_cost (speed, mode, m);
2523           new_limit.cost = best_cost.cost - op_cost;
2524           new_limit.latency = best_cost.latency - op_cost;
2525           synth_mult (alg_in, q, &new_limit, mode);
2526
2527           alg_in->cost.cost += op_cost;
2528           alg_in->cost.latency += op_cost;
2529           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2530             {
2531               struct algorithm *x;
2532               best_cost = alg_in->cost;
2533               x = alg_in, alg_in = best_alg, best_alg = x;
2534               best_alg->log[best_alg->ops] = m;
2535               best_alg->op[best_alg->ops] = alg_shift;
2536             }
2537
2538           /* See if treating ORIG_T as a signed number yields a better
2539              sequence.  Try this sequence only for a negative ORIG_T
2540              as it would be useless for a non-negative ORIG_T.  */
2541           if ((HOST_WIDE_INT) orig_t < 0)
2542             {
2543               /* Shift ORIG_T as follows because a right shift of a
2544                  negative-valued signed type is implementation
2545                  defined.  */
2546               q = ~(~orig_t >> m);
2547               /* The function expand_shift will choose between a shift
2548                  and a sequence of additions, so the observed cost is
2549                  given as MIN (m * add_cost(speed, mode),
2550                  shift_cost(speed, mode, m)).  */
2551               op_cost = m * add_cost (speed, mode);
2552               if (shift_cost (speed, mode, m) < op_cost)
2553                 op_cost = shift_cost (speed, mode, m);
2554               new_limit.cost = best_cost.cost - op_cost;
2555               new_limit.latency = best_cost.latency - op_cost;
2556               synth_mult (alg_in, q, &new_limit, mode);
2557
2558               alg_in->cost.cost += op_cost;
2559               alg_in->cost.latency += op_cost;
2560               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2561                 {
2562                   struct algorithm *x;
2563                   best_cost = alg_in->cost;
2564                   x = alg_in, alg_in = best_alg, best_alg = x;
2565                   best_alg->log[best_alg->ops] = m;
2566                   best_alg->op[best_alg->ops] = alg_shift;
2567                 }
2568             }
2569         }
2570       if (cache_hit)
2571         goto done;
2572     }
2573
2574   /* If we have an odd number, add or subtract one.  */
2575   if ((t & 1) != 0)
2576     {
2577       unsigned HOST_WIDE_INT w;
2578
2579     do_alg_addsub_t_m2:
2580       for (w = 1; (w & t) != 0; w <<= 1)
2581         ;
2582       /* If T was -1, then W will be zero after the loop.  This is another
2583          case where T ends with ...111.  Handling this with (T + 1) and
2584          subtract 1 produces slightly better code and results in algorithm
2585          selection much faster than treating it like the ...0111 case
2586          below.  */
2587       if (w == 0
2588           || (w > 2
2589               /* Reject the case where t is 3.
2590                  Thus we prefer addition in that case.  */
2591               && t != 3))
2592         {
2593           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2594
2595           op_cost = add_cost (speed, mode);
2596           new_limit.cost = best_cost.cost - op_cost;
2597           new_limit.latency = best_cost.latency - op_cost;
2598           synth_mult (alg_in, t + 1, &new_limit, mode);
2599
2600           alg_in->cost.cost += op_cost;
2601           alg_in->cost.latency += op_cost;
2602           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2603             {
2604               struct algorithm *x;
2605               best_cost = alg_in->cost;
2606               x = alg_in, alg_in = best_alg, best_alg = x;
2607               best_alg->log[best_alg->ops] = 0;
2608               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2609             }
2610         }
2611       else
2612         {
2613           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2614
2615           op_cost = add_cost (speed, mode);
2616           new_limit.cost = best_cost.cost - op_cost;
2617           new_limit.latency = best_cost.latency - op_cost;
2618           synth_mult (alg_in, t - 1, &new_limit, mode);
2619
2620           alg_in->cost.cost += op_cost;
2621           alg_in->cost.latency += op_cost;
2622           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2623             {
2624               struct algorithm *x;
2625               best_cost = alg_in->cost;
2626               x = alg_in, alg_in = best_alg, best_alg = x;
2627               best_alg->log[best_alg->ops] = 0;
2628               best_alg->op[best_alg->ops] = alg_add_t_m2;
2629             }
2630         }
2631
2632       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2633          quickly with a - a * n for some appropriate constant n.  */
2634       m = exact_log2 (-orig_t + 1);
2635       if (m >= 0 && m < maxm)
2636         {
2637           op_cost = shiftsub1_cost (speed, mode, m);
2638           new_limit.cost = best_cost.cost - op_cost;
2639           new_limit.latency = best_cost.latency - op_cost;
2640           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2641                       &new_limit, mode);
2642
2643           alg_in->cost.cost += op_cost;
2644           alg_in->cost.latency += op_cost;
2645           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2646             {
2647               struct algorithm *x;
2648               best_cost = alg_in->cost;
2649               x = alg_in, alg_in = best_alg, best_alg = x;
2650               best_alg->log[best_alg->ops] = m;
2651               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2652             }
2653         }
2654
2655       if (cache_hit)
2656         goto done;
2657     }
2658
2659   /* Look for factors of t of the form
2660      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2661      If we find such a factor, we can multiply by t using an algorithm that
2662      multiplies by q, shift the result by m and add/subtract it to itself.
2663
2664      We search for large factors first and loop down, even if large factors
2665      are less probable than small; if we find a large factor we will find a
2666      good sequence quickly, and therefore be able to prune (by decreasing
2667      COST_LIMIT) the search.  */
2668
2669  do_alg_addsub_factor:
2670   for (m = floor_log2 (t - 1); m >= 2; m--)
2671     {
2672       unsigned HOST_WIDE_INT d;
2673
2674       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2675       if (t % d == 0 && t > d && m < maxm
2676           && (!cache_hit || cache_alg == alg_add_factor))
2677         {
2678           /* If the target has a cheap shift-and-add instruction use
2679              that in preference to a shift insn followed by an add insn.
2680              Assume that the shift-and-add is "atomic" with a latency
2681              equal to its cost, otherwise assume that on superscalar
2682              hardware the shift may be executed concurrently with the
2683              earlier steps in the algorithm.  */
2684           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2685           if (shiftadd_cost (speed, mode, m) < op_cost)
2686             {
2687               op_cost = shiftadd_cost (speed, mode, m);
2688               op_latency = op_cost;
2689             }
2690           else
2691             op_latency = add_cost (speed, mode);
2692
2693           new_limit.cost = best_cost.cost - op_cost;
2694           new_limit.latency = best_cost.latency - op_latency;
2695           synth_mult (alg_in, t / d, &new_limit, mode);
2696
2697           alg_in->cost.cost += op_cost;
2698           alg_in->cost.latency += op_latency;
2699           if (alg_in->cost.latency < op_cost)
2700             alg_in->cost.latency = op_cost;
2701           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2702             {
2703               struct algorithm *x;
2704               best_cost = alg_in->cost;
2705               x = alg_in, alg_in = best_alg, best_alg = x;
2706               best_alg->log[best_alg->ops] = m;
2707               best_alg->op[best_alg->ops] = alg_add_factor;
2708             }
2709           /* Other factors will have been taken care of in the recursion.  */
2710           break;
2711         }
2712
2713       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2714       if (t % d == 0 && t > d && m < maxm
2715           && (!cache_hit || cache_alg == alg_sub_factor))
2716         {
2717           /* If the target has a cheap shift-and-subtract insn use
2718              that in preference to a shift insn followed by a sub insn.
2719              Assume that the shift-and-sub is "atomic" with a latency
2720              equal to it's cost, otherwise assume that on superscalar
2721              hardware the shift may be executed concurrently with the
2722              earlier steps in the algorithm.  */
2723           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2724           if (shiftsub0_cost (speed, mode, m) < op_cost)
2725             {
2726               op_cost = shiftsub0_cost (speed, mode, m);
2727               op_latency = op_cost;
2728             }
2729           else
2730             op_latency = add_cost (speed, mode);
2731
2732           new_limit.cost = best_cost.cost - op_cost;
2733           new_limit.latency = best_cost.latency - op_latency;
2734           synth_mult (alg_in, t / d, &new_limit, mode);
2735
2736           alg_in->cost.cost += op_cost;
2737           alg_in->cost.latency += op_latency;
2738           if (alg_in->cost.latency < op_cost)
2739             alg_in->cost.latency = op_cost;
2740           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2741             {
2742               struct algorithm *x;
2743               best_cost = alg_in->cost;
2744               x = alg_in, alg_in = best_alg, best_alg = x;
2745               best_alg->log[best_alg->ops] = m;
2746               best_alg->op[best_alg->ops] = alg_sub_factor;
2747             }
2748           break;
2749         }
2750     }
2751   if (cache_hit)
2752     goto done;
2753
2754   /* Try shift-and-add (load effective address) instructions,
2755      i.e. do a*3, a*5, a*9.  */
2756   if ((t & 1) != 0)
2757     {
2758     do_alg_add_t2_m:
2759       q = t - 1;
2760       q = q & -q;
2761       m = exact_log2 (q);
2762       if (m >= 0 && m < maxm)
2763         {
2764           op_cost = shiftadd_cost (speed, mode, m);
2765           new_limit.cost = best_cost.cost - op_cost;
2766           new_limit.latency = best_cost.latency - op_cost;
2767           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2768
2769           alg_in->cost.cost += op_cost;
2770           alg_in->cost.latency += op_cost;
2771           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2772             {
2773               struct algorithm *x;
2774               best_cost = alg_in->cost;
2775               x = alg_in, alg_in = best_alg, best_alg = x;
2776               best_alg->log[best_alg->ops] = m;
2777               best_alg->op[best_alg->ops] = alg_add_t2_m;
2778             }
2779         }
2780       if (cache_hit)
2781         goto done;
2782
2783     do_alg_sub_t2_m:
2784       q = t + 1;
2785       q = q & -q;
2786       m = exact_log2 (q);
2787       if (m >= 0 && m < maxm)
2788         {
2789           op_cost = shiftsub0_cost (speed, mode, m);
2790           new_limit.cost = best_cost.cost - op_cost;
2791           new_limit.latency = best_cost.latency - op_cost;
2792           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2793
2794           alg_in->cost.cost += op_cost;
2795           alg_in->cost.latency += op_cost;
2796           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2797             {
2798               struct algorithm *x;
2799               best_cost = alg_in->cost;
2800               x = alg_in, alg_in = best_alg, best_alg = x;
2801               best_alg->log[best_alg->ops] = m;
2802               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2803             }
2804         }
2805       if (cache_hit)
2806         goto done;
2807     }
2808
2809  done:
2810   /* If best_cost has not decreased, we have not found any algorithm.  */
2811   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2812     {
2813       /* We failed to find an algorithm.  Record alg_impossible for
2814          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2815          we are asked to find an algorithm for T within the same or
2816          lower COST_LIMIT, we can immediately return to the
2817          caller.  */
2818       entry_ptr->t = t;
2819       entry_ptr->mode = mode;
2820       entry_ptr->speed = speed;
2821       entry_ptr->alg = alg_impossible;
2822       entry_ptr->cost = *cost_limit;
2823       return;
2824     }
2825
2826   /* Cache the result.  */
2827   if (!cache_hit)
2828     {
2829       entry_ptr->t = t;
2830       entry_ptr->mode = mode;
2831       entry_ptr->speed = speed;
2832       entry_ptr->alg = best_alg->op[best_alg->ops];
2833       entry_ptr->cost.cost = best_cost.cost;
2834       entry_ptr->cost.latency = best_cost.latency;
2835     }
2836
2837   /* If we are getting a too long sequence for `struct algorithm'
2838      to record, make this search fail.  */
2839   if (best_alg->ops == MAX_BITS_PER_WORD)
2840     return;
2841
2842   /* Copy the algorithm from temporary space to the space at alg_out.
2843      We avoid using structure assignment because the majority of
2844      best_alg is normally undefined, and this is a critical function.  */
2845   alg_out->ops = best_alg->ops + 1;
2846   alg_out->cost = best_cost;
2847   memcpy (alg_out->op, best_alg->op,
2848           alg_out->ops * sizeof *alg_out->op);
2849   memcpy (alg_out->log, best_alg->log,
2850           alg_out->ops * sizeof *alg_out->log);
2851 }
2852 \f
2853 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2854    Try three variations:
2855
2856        - a shift/add sequence based on VAL itself
2857        - a shift/add sequence based on -VAL, followed by a negation
2858        - a shift/add sequence based on VAL - 1, followed by an addition.
2859
2860    Return true if the cheapest of these cost less than MULT_COST,
2861    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2862
2863 static bool
2864 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2865                      struct algorithm *alg, enum mult_variant *variant,
2866                      int mult_cost)
2867 {
2868   struct algorithm alg2;
2869   struct mult_cost limit;
2870   int op_cost;
2871   bool speed = optimize_insn_for_speed_p ();
2872
2873   /* Fail quickly for impossible bounds.  */
2874   if (mult_cost < 0)
2875     return false;
2876
2877   /* Ensure that mult_cost provides a reasonable upper bound.
2878      Any constant multiplication can be performed with less
2879      than 2 * bits additions.  */
2880   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
2881   if (mult_cost > op_cost)
2882     mult_cost = op_cost;
2883
2884   *variant = basic_variant;
2885   limit.cost = mult_cost;
2886   limit.latency = mult_cost;
2887   synth_mult (alg, val, &limit, mode);
2888
2889   /* This works only if the inverted value actually fits in an
2890      `unsigned int' */
2891   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
2892     {
2893       op_cost = neg_cost (speed, mode);
2894       if (MULT_COST_LESS (&alg->cost, mult_cost))
2895         {
2896           limit.cost = alg->cost.cost - op_cost;
2897           limit.latency = alg->cost.latency - op_cost;
2898         }
2899       else
2900         {
2901           limit.cost = mult_cost - op_cost;
2902           limit.latency = mult_cost - op_cost;
2903         }
2904
2905       synth_mult (&alg2, -val, &limit, mode);
2906       alg2.cost.cost += op_cost;
2907       alg2.cost.latency += op_cost;
2908       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2909         *alg = alg2, *variant = negate_variant;
2910     }
2911
2912   /* This proves very useful for division-by-constant.  */
2913   op_cost = add_cost (speed, mode);
2914   if (MULT_COST_LESS (&alg->cost, mult_cost))
2915     {
2916       limit.cost = alg->cost.cost - op_cost;
2917       limit.latency = alg->cost.latency - op_cost;
2918     }
2919   else
2920     {
2921       limit.cost = mult_cost - op_cost;
2922       limit.latency = mult_cost - op_cost;
2923     }
2924
2925   synth_mult (&alg2, val - 1, &limit, mode);
2926   alg2.cost.cost += op_cost;
2927   alg2.cost.latency += op_cost;
2928   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2929     *alg = alg2, *variant = add_variant;
2930
2931   return MULT_COST_LESS (&alg->cost, mult_cost);
2932 }
2933
2934 /* A subroutine of expand_mult, used for constant multiplications.
2935    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2936    convenient.  Use the shift/add sequence described by ALG and apply
2937    the final fixup specified by VARIANT.  */
2938
2939 static rtx
2940 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2941                    rtx target, const struct algorithm *alg,
2942                    enum mult_variant variant)
2943 {
2944   HOST_WIDE_INT val_so_far;
2945   rtx insn, accum, tem;
2946   int opno;
2947   enum machine_mode nmode;
2948
2949   /* Avoid referencing memory over and over and invalid sharing
2950      on SUBREGs.  */
2951   op0 = force_reg (mode, op0);
2952
2953   /* ACCUM starts out either as OP0 or as a zero, depending on
2954      the first operation.  */
2955
2956   if (alg->op[0] == alg_zero)
2957     {
2958       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
2959       val_so_far = 0;
2960     }
2961   else if (alg->op[0] == alg_m)
2962     {
2963       accum = copy_to_mode_reg (mode, op0);
2964       val_so_far = 1;
2965     }
2966   else
2967     gcc_unreachable ();
2968
2969   for (opno = 1; opno < alg->ops; opno++)
2970     {
2971       int log = alg->log[opno];
2972       rtx shift_subtarget = optimize ? 0 : accum;
2973       rtx add_target
2974         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2975            && !optimize)
2976           ? target : 0;
2977       rtx accum_target = optimize ? 0 : accum;
2978       rtx accum_inner;
2979
2980       switch (alg->op[opno])
2981         {
2982         case alg_shift:
2983           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2984           /* REG_EQUAL note will be attached to the following insn.  */
2985           emit_move_insn (accum, tem);
2986           val_so_far <<= log;
2987           break;
2988
2989         case alg_add_t_m2:
2990           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2991           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2992                                  add_target ? add_target : accum_target);
2993           val_so_far += (HOST_WIDE_INT) 1 << log;
2994           break;
2995
2996         case alg_sub_t_m2:
2997           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2998           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2999                                  add_target ? add_target : accum_target);
3000           val_so_far -= (HOST_WIDE_INT) 1 << log;
3001           break;
3002
3003         case alg_add_t2_m:
3004           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3005                                 log, shift_subtarget, 0);
3006           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3007                                  add_target ? add_target : accum_target);
3008           val_so_far = (val_so_far << log) + 1;
3009           break;
3010
3011         case alg_sub_t2_m:
3012           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3013                                 log, shift_subtarget, 0);
3014           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3015                                  add_target ? add_target : accum_target);
3016           val_so_far = (val_so_far << log) - 1;
3017           break;
3018
3019         case alg_add_factor:
3020           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3021           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3022                                  add_target ? add_target : accum_target);
3023           val_so_far += val_so_far << log;
3024           break;
3025
3026         case alg_sub_factor:
3027           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3028           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3029                                  (add_target
3030                                   ? add_target : (optimize ? 0 : tem)));
3031           val_so_far = (val_so_far << log) - val_so_far;
3032           break;
3033
3034         default:
3035           gcc_unreachable ();
3036         }
3037
3038       if (SCALAR_INT_MODE_P (mode))
3039         {
3040           /* Write a REG_EQUAL note on the last insn so that we can cse
3041              multiplication sequences.  Note that if ACCUM is a SUBREG,
3042              we've set the inner register and must properly indicate that.  */
3043           tem = op0, nmode = mode;
3044           accum_inner = accum;
3045           if (GET_CODE (accum) == SUBREG)
3046             {
3047               accum_inner = SUBREG_REG (accum);
3048               nmode = GET_MODE (accum_inner);
3049               tem = gen_lowpart (nmode, op0);
3050             }
3051
3052           insn = get_last_insn ();
3053           set_dst_reg_note (insn, REG_EQUAL,
3054                             gen_rtx_MULT (nmode, tem,
3055                                           gen_int_mode (val_so_far, nmode)),
3056                             accum_inner);
3057         }
3058     }
3059
3060   if (variant == negate_variant)
3061     {
3062       val_so_far = -val_so_far;
3063       accum = expand_unop (mode, neg_optab, accum, target, 0);
3064     }
3065   else if (variant == add_variant)
3066     {
3067       val_so_far = val_so_far + 1;
3068       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3069     }
3070
3071   /* Compare only the bits of val and val_so_far that are significant
3072      in the result mode, to avoid sign-/zero-extension confusion.  */
3073   nmode = GET_MODE_INNER (mode);
3074   if (nmode == VOIDmode)
3075     nmode = mode;
3076   val &= GET_MODE_MASK (nmode);
3077   val_so_far &= GET_MODE_MASK (nmode);
3078   gcc_assert (val == val_so_far);
3079
3080   return accum;
3081 }
3082
3083 /* Perform a multiplication and return an rtx for the result.
3084    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3085    TARGET is a suggestion for where to store the result (an rtx).
3086
3087    We check specially for a constant integer as OP1.
3088    If you want this check for OP0 as well, then before calling
3089    you should swap the two operands if OP0 would be constant.  */
3090
3091 rtx
3092 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3093              int unsignedp)
3094 {
3095   enum mult_variant variant;
3096   struct algorithm algorithm;
3097   rtx scalar_op1;
3098   int max_cost;
3099   bool speed = optimize_insn_for_speed_p ();
3100   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3101
3102   if (CONSTANT_P (op0))
3103     {
3104       rtx temp = op0;
3105       op0 = op1;
3106       op1 = temp;
3107     }
3108
3109   /* For vectors, there are several simplifications that can be made if
3110      all elements of the vector constant are identical.  */
3111   scalar_op1 = op1;
3112   if (GET_CODE (op1) == CONST_VECTOR)
3113     {
3114       int i, n = CONST_VECTOR_NUNITS (op1);
3115       scalar_op1 = CONST_VECTOR_ELT (op1, 0);
3116       for (i = 1; i < n; ++i)
3117         if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i)))
3118           goto skip_scalar;
3119     }
3120
3121   if (INTEGRAL_MODE_P (mode))
3122     {
3123       rtx fake_reg;
3124       HOST_WIDE_INT coeff;
3125       bool is_neg;
3126       int mode_bitsize;
3127
3128       if (op1 == CONST0_RTX (mode))
3129         return op1;
3130       if (op1 == CONST1_RTX (mode))
3131         return op0;
3132       if (op1 == CONSTM1_RTX (mode))
3133         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3134                             op0, target, 0);
3135
3136       if (do_trapv)
3137         goto skip_synth;
3138
3139       /* These are the operations that are potentially turned into
3140          a sequence of shifts and additions.  */
3141       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3142
3143       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3144          less than or equal in size to `unsigned int' this doesn't matter.
3145          If the mode is larger than `unsigned int', then synth_mult works
3146          only if the constant value exactly fits in an `unsigned int' without
3147          any truncation.  This means that multiplying by negative values does
3148          not work; results are off by 2^32 on a 32 bit machine.  */
3149
3150       if (CONST_INT_P (scalar_op1))
3151         {
3152           coeff = INTVAL (scalar_op1);
3153           is_neg = coeff < 0;
3154         }
3155       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3156         {
3157           /* If we are multiplying in DImode, it may still be a win
3158              to try to work with shifts and adds.  */
3159           if (CONST_DOUBLE_HIGH (scalar_op1) == 0
3160               && (CONST_DOUBLE_LOW (scalar_op1) > 0
3161                   || (CONST_DOUBLE_LOW (scalar_op1) < 0
3162                       && EXACT_POWER_OF_2_OR_ZERO_P
3163                            (CONST_DOUBLE_LOW (scalar_op1)))))
3164             {
3165               coeff = CONST_DOUBLE_LOW (scalar_op1);
3166               is_neg = false;
3167             }
3168           else if (CONST_DOUBLE_LOW (scalar_op1) == 0)
3169             {
3170               coeff = CONST_DOUBLE_HIGH (scalar_op1);
3171               if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3172                 {
3173                   int shift = floor_log2 (coeff) + HOST_BITS_PER_WIDE_INT;
3174                   if (shift < HOST_BITS_PER_DOUBLE_INT - 1
3175                       || mode_bitsize <= HOST_BITS_PER_DOUBLE_INT)
3176                     return expand_shift (LSHIFT_EXPR, mode, op0,
3177                                          shift, target, unsignedp);
3178                 }
3179               goto skip_synth;
3180             }
3181           else
3182             goto skip_synth;
3183         }
3184       else
3185         goto skip_synth;
3186
3187       /* We used to test optimize here, on the grounds that it's better to
3188          produce a smaller program when -O is not used.  But this causes
3189          such a terrible slowdown sometimes that it seems better to always
3190          use synth_mult.  */
3191
3192       /* Special case powers of two.  */
3193       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3194           && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3195         return expand_shift (LSHIFT_EXPR, mode, op0,
3196                              floor_log2 (coeff), target, unsignedp);
3197
3198       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3199
3200       /* Attempt to handle multiplication of DImode values by negative
3201          coefficients, by performing the multiplication by a positive
3202          multiplier and then inverting the result.  */
3203       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3204         {
3205           /* Its safe to use -coeff even for INT_MIN, as the
3206              result is interpreted as an unsigned coefficient.
3207              Exclude cost of op0 from max_cost to match the cost
3208              calculation of the synth_mult.  */
3209           coeff = -(unsigned HOST_WIDE_INT) coeff;
3210           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
3211                       - neg_cost (speed, mode));
3212           if (max_cost <= 0)
3213             goto skip_synth;
3214
3215           /* Special case powers of two.  */
3216           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3217             {
3218               rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3219                                        floor_log2 (coeff), target, unsignedp);
3220               return expand_unop (mode, neg_optab, temp, target, 0);
3221             }
3222
3223           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3224                                    max_cost))
3225             {
3226               rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3227                                             &algorithm, variant);
3228               return expand_unop (mode, neg_optab, temp, target, 0);
3229             }
3230           goto skip_synth;
3231         }
3232
3233       /* Exclude cost of op0 from max_cost to match the cost
3234          calculation of the synth_mult.  */
3235       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3236       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3237         return expand_mult_const (mode, op0, coeff, target,
3238                                   &algorithm, variant);
3239     }
3240  skip_synth:
3241
3242   /* Expand x*2.0 as x+x.  */
3243   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1))
3244     {
3245       REAL_VALUE_TYPE d;
3246       REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1);
3247
3248       if (REAL_VALUES_EQUAL (d, dconst2))
3249         {
3250           op0 = force_reg (GET_MODE (op0), op0);
3251           return expand_binop (mode, add_optab, op0, op0,
3252                                target, unsignedp, OPTAB_LIB_WIDEN);
3253         }
3254     }
3255  skip_scalar:
3256
3257   /* This used to use umul_optab if unsigned, but for non-widening multiply
3258      there is no difference between signed and unsigned.  */
3259   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3260                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3261   gcc_assert (op0);
3262   return op0;
3263 }
3264
3265 /* Return a cost estimate for multiplying a register by the given
3266    COEFFicient in the given MODE and SPEED.  */
3267
3268 int
3269 mult_by_coeff_cost (HOST_WIDE_INT coeff, enum machine_mode mode, bool speed)
3270 {
3271   int max_cost;
3272   struct algorithm algorithm;
3273   enum mult_variant variant;
3274
3275   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3276   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), speed);
3277   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3278     return algorithm.cost.cost;
3279   else
3280     return max_cost;
3281 }
3282
3283 /* Perform a widening multiplication and return an rtx for the result.
3284    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3285    TARGET is a suggestion for where to store the result (an rtx).
3286    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3287    or smul_widen_optab.
3288
3289    We check specially for a constant integer as OP1, comparing the
3290    cost of a widening multiply against the cost of a sequence of shifts
3291    and adds.  */
3292
3293 rtx
3294 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3295                       int unsignedp, optab this_optab)
3296 {
3297   bool speed = optimize_insn_for_speed_p ();
3298   rtx cop1;
3299
3300   if (CONST_INT_P (op1)
3301       && GET_MODE (op0) != VOIDmode
3302       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3303                                 this_optab == umul_widen_optab))
3304       && CONST_INT_P (cop1)
3305       && (INTVAL (cop1) >= 0
3306           || HWI_COMPUTABLE_MODE_P (mode)))
3307     {
3308       HOST_WIDE_INT coeff = INTVAL (cop1);
3309       int max_cost;
3310       enum mult_variant variant;
3311       struct algorithm algorithm;
3312
3313       /* Special case powers of two.  */
3314       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3315         {
3316           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3317           return expand_shift (LSHIFT_EXPR, mode, op0,
3318                                floor_log2 (coeff), target, unsignedp);
3319         }
3320
3321       /* Exclude cost of op0 from max_cost to match the cost
3322          calculation of the synth_mult.  */
3323       max_cost = mul_widen_cost (speed, mode);
3324       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3325                                max_cost))
3326         {
3327           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3328           return expand_mult_const (mode, op0, coeff, target,
3329                                     &algorithm, variant);
3330         }
3331     }
3332   return expand_binop (mode, this_optab, op0, op1, target,
3333                        unsignedp, OPTAB_LIB_WIDEN);
3334 }
3335 \f
3336 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3337    replace division by D, and put the least significant N bits of the result
3338    in *MULTIPLIER_PTR and return the most significant bit.
3339
3340    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3341    needed precision is in PRECISION (should be <= N).
3342
3343    PRECISION should be as small as possible so this function can choose
3344    multiplier more freely.
3345
3346    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3347    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3348
3349    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3350    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3351
3352 unsigned HOST_WIDE_INT
3353 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3354                    unsigned HOST_WIDE_INT *multiplier_ptr,
3355                    int *post_shift_ptr, int *lgup_ptr)
3356 {
3357   double_int mhigh, mlow;
3358   int lgup, post_shift;
3359   int pow, pow2;
3360
3361   /* lgup = ceil(log2(divisor)); */
3362   lgup = ceil_log2 (d);
3363
3364   gcc_assert (lgup <= n);
3365
3366   pow = n + lgup;
3367   pow2 = n + lgup - precision;
3368
3369   /* We could handle this with some effort, but this case is much
3370      better handled directly with a scc insn, so rely on caller using
3371      that.  */
3372   gcc_assert (pow != HOST_BITS_PER_DOUBLE_INT);
3373
3374   /* mlow = 2^(N + lgup)/d */
3375   double_int val = double_int_zero.set_bit (pow);
3376   mlow = val.div (double_int::from_uhwi (d), true, TRUNC_DIV_EXPR);
3377
3378   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3379   val |= double_int_zero.set_bit (pow2);
3380   mhigh = val.div (double_int::from_uhwi (d), true, TRUNC_DIV_EXPR);
3381
3382   gcc_assert (!mhigh.high || val.high - d < d);
3383   gcc_assert (mhigh.high <= 1 && mlow.high <= 1);
3384   /* Assert that mlow < mhigh.  */
3385   gcc_assert (mlow.ult (mhigh));
3386
3387   /* If precision == N, then mlow, mhigh exceed 2^N
3388      (but they do not exceed 2^(N+1)).  */
3389
3390   /* Reduce to lowest terms.  */
3391   for (post_shift = lgup; post_shift > 0; post_shift--)
3392     {
3393       int shft = HOST_BITS_PER_WIDE_INT - 1;
3394       unsigned HOST_WIDE_INT ml_lo = (mlow.high << shft) | (mlow.low >> 1);
3395       unsigned HOST_WIDE_INT mh_lo = (mhigh.high << shft) | (mhigh.low >> 1);
3396       if (ml_lo >= mh_lo)
3397         break;
3398
3399       mlow = double_int::from_uhwi (ml_lo);
3400       mhigh = double_int::from_uhwi (mh_lo);
3401     }
3402
3403   *post_shift_ptr = post_shift;
3404   *lgup_ptr = lgup;
3405   if (n < HOST_BITS_PER_WIDE_INT)
3406     {
3407       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3408       *multiplier_ptr = mhigh.low & mask;
3409       return mhigh.low >= mask;
3410     }
3411   else
3412     {
3413       *multiplier_ptr = mhigh.low;
3414       return mhigh.high;
3415     }
3416 }
3417
3418 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3419    congruent to 1 (mod 2**N).  */
3420
3421 static unsigned HOST_WIDE_INT
3422 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3423 {
3424   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3425
3426   /* The algorithm notes that the choice y = x satisfies
3427      x*y == 1 mod 2^3, since x is assumed odd.
3428      Each iteration doubles the number of bits of significance in y.  */
3429
3430   unsigned HOST_WIDE_INT mask;
3431   unsigned HOST_WIDE_INT y = x;
3432   int nbit = 3;
3433
3434   mask = (n == HOST_BITS_PER_WIDE_INT
3435           ? ~(unsigned HOST_WIDE_INT) 0
3436           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3437
3438   while (nbit < n)
3439     {
3440       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3441       nbit *= 2;
3442     }
3443   return y;
3444 }
3445
3446 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3447    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3448    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3449    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3450    become signed.
3451
3452    The result is put in TARGET if that is convenient.
3453
3454    MODE is the mode of operation.  */
3455
3456 rtx
3457 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3458                              rtx op1, rtx target, int unsignedp)
3459 {
3460   rtx tem;
3461   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3462
3463   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3464                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3465   tem = expand_and (mode, tem, op1, NULL_RTX);
3466   adj_operand
3467     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3468                      adj_operand);
3469
3470   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3471                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3472   tem = expand_and (mode, tem, op0, NULL_RTX);
3473   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3474                           target);
3475
3476   return target;
3477 }
3478
3479 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3480
3481 static rtx
3482 extract_high_half (enum machine_mode mode, rtx op)
3483 {
3484   enum machine_mode wider_mode;
3485
3486   if (mode == word_mode)
3487     return gen_highpart (mode, op);
3488
3489   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3490
3491   wider_mode = GET_MODE_WIDER_MODE (mode);
3492   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3493                      GET_MODE_BITSIZE (mode), 0, 1);
3494   return convert_modes (mode, wider_mode, op, 0);
3495 }
3496
3497 /* Like expmed_mult_highpart, but only consider using a multiplication
3498    optab.  OP1 is an rtx for the constant operand.  */
3499
3500 static rtx
3501 expmed_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3502                             rtx target, int unsignedp, int max_cost)
3503 {
3504   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3505   enum machine_mode wider_mode;
3506   optab moptab;
3507   rtx tem;
3508   int size;
3509   bool speed = optimize_insn_for_speed_p ();
3510
3511   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3512
3513   wider_mode = GET_MODE_WIDER_MODE (mode);
3514   size = GET_MODE_BITSIZE (mode);
3515
3516   /* Firstly, try using a multiplication insn that only generates the needed
3517      high part of the product, and in the sign flavor of unsignedp.  */
3518   if (mul_highpart_cost (speed, mode) < max_cost)
3519     {
3520       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3521       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3522                           unsignedp, OPTAB_DIRECT);
3523       if (tem)
3524         return tem;
3525     }
3526
3527   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3528      Need to adjust the result after the multiplication.  */
3529   if (size - 1 < BITS_PER_WORD
3530       && (mul_highpart_cost (speed, mode)
3531           + 2 * shift_cost (speed, mode, size-1)
3532           + 4 * add_cost (speed, mode) < max_cost))
3533     {
3534       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3535       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3536                           unsignedp, OPTAB_DIRECT);
3537       if (tem)
3538         /* We used the wrong signedness.  Adjust the result.  */
3539         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3540                                             tem, unsignedp);
3541     }
3542
3543   /* Try widening multiplication.  */
3544   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3545   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3546       && mul_widen_cost (speed, wider_mode) < max_cost)
3547     {
3548       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3549                           unsignedp, OPTAB_WIDEN);
3550       if (tem)
3551         return extract_high_half (mode, tem);
3552     }
3553
3554   /* Try widening the mode and perform a non-widening multiplication.  */
3555   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3556       && size - 1 < BITS_PER_WORD
3557       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3558           < max_cost))
3559     {
3560       rtx insns, wop0, wop1;
3561
3562       /* We need to widen the operands, for example to ensure the
3563          constant multiplier is correctly sign or zero extended.
3564          Use a sequence to clean-up any instructions emitted by
3565          the conversions if things don't work out.  */
3566       start_sequence ();
3567       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3568       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3569       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3570                           unsignedp, OPTAB_WIDEN);
3571       insns = get_insns ();
3572       end_sequence ();
3573
3574       if (tem)
3575         {
3576           emit_insn (insns);
3577           return extract_high_half (mode, tem);
3578         }
3579     }
3580
3581   /* Try widening multiplication of opposite signedness, and adjust.  */
3582   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3583   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3584       && size - 1 < BITS_PER_WORD
3585       && (mul_widen_cost (speed, wider_mode)
3586           + 2 * shift_cost (speed, mode, size-1)
3587           + 4 * add_cost (speed, mode) < max_cost))
3588     {
3589       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3590                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3591       if (tem != 0)
3592         {
3593           tem = extract_high_half (mode, tem);
3594           /* We used the wrong signedness.  Adjust the result.  */
3595           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3596                                               target, unsignedp);
3597         }
3598     }
3599
3600   return 0;
3601 }
3602
3603 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3604    putting the high half of the result in TARGET if that is convenient,
3605    and return where the result is.  If the operation can not be performed,
3606    0 is returned.
3607
3608    MODE is the mode of operation and result.
3609
3610    UNSIGNEDP nonzero means unsigned multiply.
3611
3612    MAX_COST is the total allowed cost for the expanded RTL.  */
3613
3614 static rtx
3615 expmed_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3616                       rtx target, int unsignedp, int max_cost)
3617 {
3618   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3619   unsigned HOST_WIDE_INT cnst1;
3620   int extra_cost;
3621   bool sign_adjust = false;
3622   enum mult_variant variant;
3623   struct algorithm alg;
3624   rtx tem;
3625   bool speed = optimize_insn_for_speed_p ();
3626
3627   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3628   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3629   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3630
3631   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3632
3633   /* We can't optimize modes wider than BITS_PER_WORD.
3634      ??? We might be able to perform double-word arithmetic if
3635      mode == word_mode, however all the cost calculations in
3636      synth_mult etc. assume single-word operations.  */
3637   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3638     return expmed_mult_highpart_optab (mode, op0, op1, target,
3639                                        unsignedp, max_cost);
3640
3641   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3642
3643   /* Check whether we try to multiply by a negative constant.  */
3644   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3645     {
3646       sign_adjust = true;
3647       extra_cost += add_cost (speed, mode);
3648     }
3649
3650   /* See whether shift/add multiplication is cheap enough.  */
3651   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3652                            max_cost - extra_cost))
3653     {
3654       /* See whether the specialized multiplication optabs are
3655          cheaper than the shift/add version.  */
3656       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3657                                         alg.cost.cost + extra_cost);
3658       if (tem)
3659         return tem;
3660
3661       tem = convert_to_mode (wider_mode, op0, unsignedp);
3662       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3663       tem = extract_high_half (mode, tem);
3664
3665       /* Adjust result for signedness.  */
3666       if (sign_adjust)
3667         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3668
3669       return tem;
3670     }
3671   return expmed_mult_highpart_optab (mode, op0, op1, target,
3672                                      unsignedp, max_cost);
3673 }
3674
3675
3676 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3677
3678 static rtx
3679 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3680 {
3681   unsigned HOST_WIDE_INT masklow, maskhigh;
3682   rtx result, temp, shift, label;
3683   int logd;
3684
3685   logd = floor_log2 (d);
3686   result = gen_reg_rtx (mode);
3687
3688   /* Avoid conditional branches when they're expensive.  */
3689   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3690       && optimize_insn_for_speed_p ())
3691     {
3692       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3693                                       mode, 0, -1);
3694       if (signmask)
3695         {
3696           signmask = force_reg (mode, signmask);
3697           masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3698           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3699
3700           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3701              which instruction sequence to use.  If logical right shifts
3702              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3703              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3704
3705           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3706           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3707               || (set_src_cost (temp, optimize_insn_for_speed_p ())
3708                   > COSTS_N_INSNS (2)))
3709             {
3710               temp = expand_binop (mode, xor_optab, op0, signmask,
3711                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3712               temp = expand_binop (mode, sub_optab, temp, signmask,
3713                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3714               temp = expand_binop (mode, and_optab, temp,
3715                                    gen_int_mode (masklow, mode),
3716                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3717               temp = expand_binop (mode, xor_optab, temp, signmask,
3718                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3719               temp = expand_binop (mode, sub_optab, temp, signmask,
3720                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3721             }
3722           else
3723             {
3724               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3725                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3726               signmask = force_reg (mode, signmask);
3727
3728               temp = expand_binop (mode, add_optab, op0, signmask,
3729                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3730               temp = expand_binop (mode, and_optab, temp,
3731                                    gen_int_mode (masklow, mode),
3732                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3733               temp = expand_binop (mode, sub_optab, temp, signmask,
3734                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3735             }
3736           return temp;
3737         }
3738     }
3739
3740   /* Mask contains the mode's signbit and the significant bits of the
3741      modulus.  By including the signbit in the operation, many targets
3742      can avoid an explicit compare operation in the following comparison
3743      against zero.  */
3744
3745   masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3746   if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3747     {
3748       masklow |= HOST_WIDE_INT_M1U << (GET_MODE_BITSIZE (mode) - 1);
3749       maskhigh = -1;
3750     }
3751   else
3752     maskhigh = HOST_WIDE_INT_M1U
3753                  << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3754
3755   temp = expand_binop (mode, and_optab, op0,
3756                        immed_double_const (masklow, maskhigh, mode),
3757                        result, 1, OPTAB_LIB_WIDEN);
3758   if (temp != result)
3759     emit_move_insn (result, temp);
3760
3761   label = gen_label_rtx ();
3762   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3763
3764   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3765                        0, OPTAB_LIB_WIDEN);
3766   masklow = HOST_WIDE_INT_M1U << logd;
3767   maskhigh = -1;
3768   temp = expand_binop (mode, ior_optab, temp,
3769                        immed_double_const (masklow, maskhigh, mode),
3770                        result, 1, OPTAB_LIB_WIDEN);
3771   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3772                        0, OPTAB_LIB_WIDEN);
3773   if (temp != result)
3774     emit_move_insn (result, temp);
3775   emit_label (label);
3776   return result;
3777 }
3778
3779 /* Expand signed division of OP0 by a power of two D in mode MODE.
3780    This routine is only called for positive values of D.  */
3781
3782 static rtx
3783 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3784 {
3785   rtx temp, label;
3786   int logd;
3787
3788   logd = floor_log2 (d);
3789
3790   if (d == 2
3791       && BRANCH_COST (optimize_insn_for_speed_p (),
3792                       false) >= 1)
3793     {
3794       temp = gen_reg_rtx (mode);
3795       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3796       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3797                            0, OPTAB_LIB_WIDEN);
3798       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3799     }
3800
3801 #ifdef HAVE_conditional_move
3802   if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3803       >= 2)
3804     {
3805       rtx temp2;
3806
3807       start_sequence ();
3808       temp2 = copy_to_mode_reg (mode, op0);
3809       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3810                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3811       temp = force_reg (mode, temp);
3812
3813       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3814       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3815                                      mode, temp, temp2, mode, 0);
3816       if (temp2)
3817         {
3818           rtx seq = get_insns ();
3819           end_sequence ();
3820           emit_insn (seq);
3821           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3822         }
3823       end_sequence ();
3824     }
3825 #endif
3826
3827   if (BRANCH_COST (optimize_insn_for_speed_p (),
3828                    false) >= 2)
3829     {
3830       int ushift = GET_MODE_BITSIZE (mode) - logd;
3831
3832       temp = gen_reg_rtx (mode);
3833       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3834       if (shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3835           > COSTS_N_INSNS (1))
3836         temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3837                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3838       else
3839         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3840                              ushift, NULL_RTX, 1);
3841       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3842                            0, OPTAB_LIB_WIDEN);
3843       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3844     }
3845
3846   label = gen_label_rtx ();
3847   temp = copy_to_mode_reg (mode, op0);
3848   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3849   expand_inc (temp, gen_int_mode (d - 1, mode));
3850   emit_label (label);
3851   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3852 }
3853 \f
3854 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3855    if that is convenient, and returning where the result is.
3856    You may request either the quotient or the remainder as the result;
3857    specify REM_FLAG nonzero to get the remainder.
3858
3859    CODE is the expression code for which kind of division this is;
3860    it controls how rounding is done.  MODE is the machine mode to use.
3861    UNSIGNEDP nonzero means do unsigned division.  */
3862
3863 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3864    and then correct it by or'ing in missing high bits
3865    if result of ANDI is nonzero.
3866    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3867    This could optimize to a bfexts instruction.
3868    But C doesn't use these operations, so their optimizations are
3869    left for later.  */
3870 /* ??? For modulo, we don't actually need the highpart of the first product,
3871    the low part will do nicely.  And for small divisors, the second multiply
3872    can also be a low-part only multiply or even be completely left out.
3873    E.g. to calculate the remainder of a division by 3 with a 32 bit
3874    multiply, multiply with 0x55555556 and extract the upper two bits;
3875    the result is exact for inputs up to 0x1fffffff.
3876    The input range can be reduced by using cross-sum rules.
3877    For odd divisors >= 3, the following table gives right shift counts
3878    so that if a number is shifted by an integer multiple of the given
3879    amount, the remainder stays the same:
3880    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3881    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3882    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3883    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3884    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3885
3886    Cross-sum rules for even numbers can be derived by leaving as many bits
3887    to the right alone as the divisor has zeros to the right.
3888    E.g. if x is an unsigned 32 bit number:
3889    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3890    */
3891
3892 rtx
3893 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3894                rtx op0, rtx op1, rtx target, int unsignedp)
3895 {
3896   enum machine_mode compute_mode;
3897   rtx tquotient;
3898   rtx quotient = 0, remainder = 0;
3899   rtx last;
3900   int size;
3901   rtx insn;
3902   optab optab1, optab2;
3903   int op1_is_constant, op1_is_pow2 = 0;
3904   int max_cost, extra_cost;
3905   static HOST_WIDE_INT last_div_const = 0;
3906   bool speed = optimize_insn_for_speed_p ();
3907
3908   op1_is_constant = CONST_INT_P (op1);
3909   if (op1_is_constant)
3910     {
3911       unsigned HOST_WIDE_INT ext_op1 = UINTVAL (op1);
3912       if (unsignedp)
3913         ext_op1 &= GET_MODE_MASK (mode);
3914       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3915                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3916     }
3917
3918   /*
3919      This is the structure of expand_divmod:
3920
3921      First comes code to fix up the operands so we can perform the operations
3922      correctly and efficiently.
3923
3924      Second comes a switch statement with code specific for each rounding mode.
3925      For some special operands this code emits all RTL for the desired
3926      operation, for other cases, it generates only a quotient and stores it in
3927      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3928      to indicate that it has not done anything.
3929
3930      Last comes code that finishes the operation.  If QUOTIENT is set and
3931      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3932      QUOTIENT is not set, it is computed using trunc rounding.
3933
3934      We try to generate special code for division and remainder when OP1 is a
3935      constant.  If |OP1| = 2**n we can use shifts and some other fast
3936      operations.  For other values of OP1, we compute a carefully selected
3937      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3938      by m.
3939
3940      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3941      half of the product.  Different strategies for generating the product are
3942      implemented in expmed_mult_highpart.
3943
3944      If what we actually want is the remainder, we generate that by another
3945      by-constant multiplication and a subtraction.  */
3946
3947   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3948      code below will malfunction if we are, so check here and handle
3949      the special case if so.  */
3950   if (op1 == const1_rtx)
3951     return rem_flag ? const0_rtx : op0;
3952
3953     /* When dividing by -1, we could get an overflow.
3954      negv_optab can handle overflows.  */
3955   if (! unsignedp && op1 == constm1_rtx)
3956     {
3957       if (rem_flag)
3958         return const0_rtx;
3959       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
3960                           ? negv_optab : neg_optab, op0, target, 0);
3961     }
3962
3963   if (target
3964       /* Don't use the function value register as a target
3965          since we have to read it as well as write it,
3966          and function-inlining gets confused by this.  */
3967       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3968           /* Don't clobber an operand while doing a multi-step calculation.  */
3969           || ((rem_flag || op1_is_constant)
3970               && (reg_mentioned_p (target, op0)
3971                   || (MEM_P (op0) && MEM_P (target))))
3972           || reg_mentioned_p (target, op1)
3973           || (MEM_P (op1) && MEM_P (target))))
3974     target = 0;
3975
3976   /* Get the mode in which to perform this computation.  Normally it will
3977      be MODE, but sometimes we can't do the desired operation in MODE.
3978      If so, pick a wider mode in which we can do the operation.  Convert
3979      to that mode at the start to avoid repeated conversions.
3980
3981      First see what operations we need.  These depend on the expression
3982      we are evaluating.  (We assume that divxx3 insns exist under the
3983      same conditions that modxx3 insns and that these insns don't normally
3984      fail.  If these assumptions are not correct, we may generate less
3985      efficient code in some cases.)
3986
3987      Then see if we find a mode in which we can open-code that operation
3988      (either a division, modulus, or shift).  Finally, check for the smallest
3989      mode for which we can do the operation with a library call.  */
3990
3991   /* We might want to refine this now that we have division-by-constant
3992      optimization.  Since expmed_mult_highpart tries so many variants, it is
3993      not straightforward to generalize this.  Maybe we should make an array
3994      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3995
3996   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3997             ? (unsignedp ? lshr_optab : ashr_optab)
3998             : (unsignedp ? udiv_optab : sdiv_optab));
3999   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
4000             ? optab1
4001             : (unsignedp ? udivmod_optab : sdivmod_optab));
4002
4003   for (compute_mode = mode; compute_mode != VOIDmode;
4004        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4005     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4006         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4007       break;
4008
4009   if (compute_mode == VOIDmode)
4010     for (compute_mode = mode; compute_mode != VOIDmode;
4011          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4012       if (optab_libfunc (optab1, compute_mode)
4013           || optab_libfunc (optab2, compute_mode))
4014         break;
4015
4016   /* If we still couldn't find a mode, use MODE, but expand_binop will
4017      probably die.  */
4018   if (compute_mode == VOIDmode)
4019     compute_mode = mode;
4020
4021   if (target && GET_MODE (target) == compute_mode)
4022     tquotient = target;
4023   else
4024     tquotient = gen_reg_rtx (compute_mode);
4025
4026   size = GET_MODE_BITSIZE (compute_mode);
4027 #if 0
4028   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4029      (mode), and thereby get better code when OP1 is a constant.  Do that
4030      later.  It will require going over all usages of SIZE below.  */
4031   size = GET_MODE_BITSIZE (mode);
4032 #endif
4033
4034   /* Only deduct something for a REM if the last divide done was
4035      for a different constant.   Then set the constant of the last
4036      divide.  */
4037   max_cost = (unsignedp
4038               ? udiv_cost (speed, compute_mode)
4039               : sdiv_cost (speed, compute_mode));
4040   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4041                      && INTVAL (op1) == last_div_const))
4042     max_cost -= (mul_cost (speed, compute_mode)
4043                  + add_cost (speed, compute_mode));
4044
4045   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4046
4047   /* Now convert to the best mode to use.  */
4048   if (compute_mode != mode)
4049     {
4050       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4051       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4052
4053       /* convert_modes may have placed op1 into a register, so we
4054          must recompute the following.  */
4055       op1_is_constant = CONST_INT_P (op1);
4056       op1_is_pow2 = (op1_is_constant
4057                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4058                           || (! unsignedp
4059                               && EXACT_POWER_OF_2_OR_ZERO_P (-UINTVAL (op1))))));
4060     }
4061
4062   /* If one of the operands is a volatile MEM, copy it into a register.  */
4063
4064   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4065     op0 = force_reg (compute_mode, op0);
4066   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4067     op1 = force_reg (compute_mode, op1);
4068
4069   /* If we need the remainder or if OP1 is constant, we need to
4070      put OP0 in a register in case it has any queued subexpressions.  */
4071   if (rem_flag || op1_is_constant)
4072     op0 = force_reg (compute_mode, op0);
4073
4074   last = get_last_insn ();
4075
4076   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4077   if (unsignedp)
4078     {
4079       if (code == FLOOR_DIV_EXPR)
4080         code = TRUNC_DIV_EXPR;
4081       if (code == FLOOR_MOD_EXPR)
4082         code = TRUNC_MOD_EXPR;
4083       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4084         code = TRUNC_DIV_EXPR;
4085     }
4086
4087   if (op1 != const0_rtx)
4088     switch (code)
4089       {
4090       case TRUNC_MOD_EXPR:
4091       case TRUNC_DIV_EXPR:
4092         if (op1_is_constant)
4093           {
4094             if (unsignedp)
4095               {
4096                 unsigned HOST_WIDE_INT mh, ml;
4097                 int pre_shift, post_shift;
4098                 int dummy;
4099                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4100                                             & GET_MODE_MASK (compute_mode));
4101
4102                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4103                   {
4104                     pre_shift = floor_log2 (d);
4105                     if (rem_flag)
4106                       {
4107                         unsigned HOST_WIDE_INT mask
4108                           = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4109                         remainder
4110                           = expand_binop (compute_mode, and_optab, op0,
4111                                           gen_int_mode (mask, compute_mode),
4112                                           remainder, 1,
4113                                           OPTAB_LIB_WIDEN);
4114                         if (remainder)
4115                           return gen_lowpart (mode, remainder);
4116                       }
4117                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4118                                              pre_shift, tquotient, 1);
4119                   }
4120                 else if (size <= HOST_BITS_PER_WIDE_INT)
4121                   {
4122                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4123                       {
4124                         /* Most significant bit of divisor is set; emit an scc
4125                            insn.  */
4126                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4127                                                           compute_mode, 1, 1);
4128                       }
4129                     else
4130                       {
4131                         /* Find a suitable multiplier and right shift count
4132                            instead of multiplying with D.  */
4133
4134                         mh = choose_multiplier (d, size, size,
4135                                                 &ml, &post_shift, &dummy);
4136
4137                         /* If the suggested multiplier is more than SIZE bits,
4138                            we can do better for even divisors, using an
4139                            initial right shift.  */
4140                         if (mh != 0 && (d & 1) == 0)
4141                           {
4142                             pre_shift = floor_log2 (d & -d);
4143                             mh = choose_multiplier (d >> pre_shift, size,
4144                                                     size - pre_shift,
4145                                                     &ml, &post_shift, &dummy);
4146                             gcc_assert (!mh);
4147                           }
4148                         else
4149                           pre_shift = 0;
4150
4151                         if (mh != 0)
4152                           {
4153                             rtx t1, t2, t3, t4;
4154
4155                             if (post_shift - 1 >= BITS_PER_WORD)
4156                               goto fail1;
4157
4158                             extra_cost
4159                               = (shift_cost (speed, compute_mode, post_shift - 1)
4160                                  + shift_cost (speed, compute_mode, 1)
4161                                  + 2 * add_cost (speed, compute_mode));
4162                             t1 = expmed_mult_highpart
4163                               (compute_mode, op0,
4164                                gen_int_mode (ml, compute_mode),
4165                                NULL_RTX, 1, max_cost - extra_cost);
4166                             if (t1 == 0)
4167                               goto fail1;
4168                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4169                                                                op0, t1),
4170                                                 NULL_RTX);
4171                             t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4172                                                t2, 1, NULL_RTX, 1);
4173                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4174                                                               t1, t3),
4175                                                 NULL_RTX);
4176                             quotient = expand_shift
4177                               (RSHIFT_EXPR, compute_mode, t4,
4178                                post_shift - 1, tquotient, 1);
4179                           }
4180                         else
4181                           {
4182                             rtx t1, t2;
4183
4184                             if (pre_shift >= BITS_PER_WORD
4185                                 || post_shift >= BITS_PER_WORD)
4186                               goto fail1;
4187
4188                             t1 = expand_shift
4189                               (RSHIFT_EXPR, compute_mode, op0,
4190                                pre_shift, NULL_RTX, 1);
4191                             extra_cost
4192                               = (shift_cost (speed, compute_mode, pre_shift)
4193                                  + shift_cost (speed, compute_mode, post_shift));
4194                             t2 = expmed_mult_highpart
4195                               (compute_mode, t1,
4196                                gen_int_mode (ml, compute_mode),
4197                                NULL_RTX, 1, max_cost - extra_cost);
4198                             if (t2 == 0)
4199                               goto fail1;
4200                             quotient = expand_shift
4201                               (RSHIFT_EXPR, compute_mode, t2,
4202                                post_shift, tquotient, 1);
4203                           }
4204                       }
4205                   }
4206                 else            /* Too wide mode to use tricky code */
4207                   break;
4208
4209                 insn = get_last_insn ();
4210                 if (insn != last)
4211                   set_dst_reg_note (insn, REG_EQUAL,
4212                                     gen_rtx_UDIV (compute_mode, op0, op1),
4213                                     quotient);
4214               }
4215             else                /* TRUNC_DIV, signed */
4216               {
4217                 unsigned HOST_WIDE_INT ml;
4218                 int lgup, post_shift;
4219                 rtx mlr;
4220                 HOST_WIDE_INT d = INTVAL (op1);
4221                 unsigned HOST_WIDE_INT abs_d;
4222
4223                 /* Since d might be INT_MIN, we have to cast to
4224                    unsigned HOST_WIDE_INT before negating to avoid
4225                    undefined signed overflow.  */
4226                 abs_d = (d >= 0
4227                          ? (unsigned HOST_WIDE_INT) d
4228                          : - (unsigned HOST_WIDE_INT) d);
4229
4230                 /* n rem d = n rem -d */
4231                 if (rem_flag && d < 0)
4232                   {
4233                     d = abs_d;
4234                     op1 = gen_int_mode (abs_d, compute_mode);
4235                   }
4236
4237                 if (d == 1)
4238                   quotient = op0;
4239                 else if (d == -1)
4240                   quotient = expand_unop (compute_mode, neg_optab, op0,
4241                                           tquotient, 0);
4242                 else if (HOST_BITS_PER_WIDE_INT >= size
4243                          && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4244                   {
4245                     /* This case is not handled correctly below.  */
4246                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4247                                                 compute_mode, 1, 1);
4248                     if (quotient == 0)
4249                       goto fail1;
4250                   }
4251                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4252                          && (rem_flag
4253                              ? smod_pow2_cheap (speed, compute_mode)
4254                              : sdiv_pow2_cheap (speed, compute_mode))
4255                          /* We assume that cheap metric is true if the
4256                             optab has an expander for this mode.  */
4257                          && ((optab_handler ((rem_flag ? smod_optab
4258                                               : sdiv_optab),
4259                                              compute_mode)
4260                               != CODE_FOR_nothing)
4261                              || (optab_handler (sdivmod_optab,
4262                                                 compute_mode)
4263                                  != CODE_FOR_nothing)))
4264                   ;
4265                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4266                   {
4267                     if (rem_flag)
4268                       {
4269                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4270                         if (remainder)
4271                           return gen_lowpart (mode, remainder);
4272                       }
4273
4274                     if (sdiv_pow2_cheap (speed, compute_mode)
4275                         && ((optab_handler (sdiv_optab, compute_mode)
4276                              != CODE_FOR_nothing)
4277                             || (optab_handler (sdivmod_optab, compute_mode)
4278                                 != CODE_FOR_nothing)))
4279                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4280                                                 compute_mode, op0,
4281                                                 gen_int_mode (abs_d,
4282                                                               compute_mode),
4283                                                 NULL_RTX, 0);
4284                     else
4285                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4286
4287                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4288                        negate the quotient.  */
4289                     if (d < 0)
4290                       {
4291                         insn = get_last_insn ();
4292                         if (insn != last
4293                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4294                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4295                           set_dst_reg_note (insn, REG_EQUAL,
4296                                             gen_rtx_DIV (compute_mode, op0,
4297                                                          gen_int_mode
4298                                                            (abs_d,
4299                                                             compute_mode)),
4300                                             quotient);
4301
4302                         quotient = expand_unop (compute_mode, neg_optab,
4303                                                 quotient, quotient, 0);
4304                       }
4305                   }
4306                 else if (size <= HOST_BITS_PER_WIDE_INT)
4307                   {
4308                     choose_multiplier (abs_d, size, size - 1,
4309                                        &ml, &post_shift, &lgup);
4310                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4311                       {
4312                         rtx t1, t2, t3;
4313
4314                         if (post_shift >= BITS_PER_WORD
4315                             || size - 1 >= BITS_PER_WORD)
4316                           goto fail1;
4317
4318                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4319                                       + shift_cost (speed, compute_mode, size - 1)
4320                                       + add_cost (speed, compute_mode));
4321                         t1 = expmed_mult_highpart
4322                           (compute_mode, op0, gen_int_mode (ml, compute_mode),
4323                            NULL_RTX, 0, max_cost - extra_cost);
4324                         if (t1 == 0)
4325                           goto fail1;
4326                         t2 = expand_shift
4327                           (RSHIFT_EXPR, compute_mode, t1,
4328                            post_shift, NULL_RTX, 0);
4329                         t3 = expand_shift
4330                           (RSHIFT_EXPR, compute_mode, op0,
4331                            size - 1, NULL_RTX, 0);
4332                         if (d < 0)
4333                           quotient
4334                             = force_operand (gen_rtx_MINUS (compute_mode,
4335                                                             t3, t2),
4336                                              tquotient);
4337                         else
4338                           quotient
4339                             = force_operand (gen_rtx_MINUS (compute_mode,
4340                                                             t2, t3),
4341                                              tquotient);
4342                       }
4343                     else
4344                       {
4345                         rtx t1, t2, t3, t4;
4346
4347                         if (post_shift >= BITS_PER_WORD
4348                             || size - 1 >= BITS_PER_WORD)
4349                           goto fail1;
4350
4351                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4352                         mlr = gen_int_mode (ml, compute_mode);
4353                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4354                                       + shift_cost (speed, compute_mode, size - 1)
4355                                       + 2 * add_cost (speed, compute_mode));
4356                         t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4357                                                    NULL_RTX, 0,
4358                                                    max_cost - extra_cost);
4359                         if (t1 == 0)
4360                           goto fail1;
4361                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4362                                                           t1, op0),
4363                                             NULL_RTX);
4364                         t3 = expand_shift
4365                           (RSHIFT_EXPR, compute_mode, t2,
4366                            post_shift, NULL_RTX, 0);
4367                         t4 = expand_shift
4368                           (RSHIFT_EXPR, compute_mode, op0,
4369                            size - 1, NULL_RTX, 0);
4370                         if (d < 0)
4371                           quotient
4372                             = force_operand (gen_rtx_MINUS (compute_mode,
4373                                                             t4, t3),
4374                                              tquotient);
4375                         else
4376                           quotient
4377                             = force_operand (gen_rtx_MINUS (compute_mode,
4378                                                             t3, t4),
4379                                              tquotient);
4380                       }
4381                   }
4382                 else            /* Too wide mode to use tricky code */
4383                   break;
4384
4385                 insn = get_last_insn ();
4386                 if (insn != last)
4387                   set_dst_reg_note (insn, REG_EQUAL,
4388                                     gen_rtx_DIV (compute_mode, op0, op1),
4389                                     quotient);
4390               }
4391             break;
4392           }
4393       fail1:
4394         delete_insns_since (last);
4395         break;
4396
4397       case FLOOR_DIV_EXPR:
4398       case FLOOR_MOD_EXPR:
4399       /* We will come here only for signed operations.  */
4400         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4401           {
4402             unsigned HOST_WIDE_INT mh, ml;
4403             int pre_shift, lgup, post_shift;
4404             HOST_WIDE_INT d = INTVAL (op1);
4405
4406             if (d > 0)
4407               {
4408                 /* We could just as easily deal with negative constants here,
4409                    but it does not seem worth the trouble for GCC 2.6.  */
4410                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4411                   {
4412                     pre_shift = floor_log2 (d);
4413                     if (rem_flag)
4414                       {
4415                         unsigned HOST_WIDE_INT mask
4416                           = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4417                         remainder = expand_binop
4418                           (compute_mode, and_optab, op0,
4419                            gen_int_mode (mask, compute_mode),
4420                            remainder, 0, OPTAB_LIB_WIDEN);
4421                         if (remainder)
4422                           return gen_lowpart (mode, remainder);
4423                       }
4424                     quotient = expand_shift
4425                       (RSHIFT_EXPR, compute_mode, op0,
4426                        pre_shift, tquotient, 0);
4427                   }
4428                 else
4429                   {
4430                     rtx t1, t2, t3, t4;
4431
4432                     mh = choose_multiplier (d, size, size - 1,
4433                                             &ml, &post_shift, &lgup);
4434                     gcc_assert (!mh);
4435
4436                     if (post_shift < BITS_PER_WORD
4437                         && size - 1 < BITS_PER_WORD)
4438                       {
4439                         t1 = expand_shift
4440                           (RSHIFT_EXPR, compute_mode, op0,
4441                            size - 1, NULL_RTX, 0);
4442                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4443                                            NULL_RTX, 0, OPTAB_WIDEN);
4444                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4445                                       + shift_cost (speed, compute_mode, size - 1)
4446                                       + 2 * add_cost (speed, compute_mode));
4447                         t3 = expmed_mult_highpart
4448                           (compute_mode, t2, gen_int_mode (ml, compute_mode),
4449                            NULL_RTX, 1, max_cost - extra_cost);
4450                         if (t3 != 0)
4451                           {
4452                             t4 = expand_shift
4453                               (RSHIFT_EXPR, compute_mode, t3,
4454                                post_shift, NULL_RTX, 1);
4455                             quotient = expand_binop (compute_mode, xor_optab,
4456                                                      t4, t1, tquotient, 0,
4457                                                      OPTAB_WIDEN);
4458                           }
4459                       }
4460                   }
4461               }
4462             else
4463               {
4464                 rtx nsign, t1, t2, t3, t4;
4465                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4466                                                   op0, constm1_rtx), NULL_RTX);
4467                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4468                                    0, OPTAB_WIDEN);
4469                 nsign = expand_shift
4470                   (RSHIFT_EXPR, compute_mode, t2,
4471                    size - 1, NULL_RTX, 0);
4472                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4473                                     NULL_RTX);
4474                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4475                                     NULL_RTX, 0);
4476                 if (t4)
4477                   {
4478                     rtx t5;
4479                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4480                                       NULL_RTX, 0);
4481                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4482                                                             t4, t5),
4483                                               tquotient);
4484                   }
4485               }
4486           }
4487
4488         if (quotient != 0)
4489           break;
4490         delete_insns_since (last);
4491
4492         /* Try using an instruction that produces both the quotient and
4493            remainder, using truncation.  We can easily compensate the quotient
4494            or remainder to get floor rounding, once we have the remainder.
4495            Notice that we compute also the final remainder value here,
4496            and return the result right away.  */
4497         if (target == 0 || GET_MODE (target) != compute_mode)
4498           target = gen_reg_rtx (compute_mode);
4499
4500         if (rem_flag)
4501           {
4502             remainder
4503               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4504             quotient = gen_reg_rtx (compute_mode);
4505           }
4506         else
4507           {
4508             quotient
4509               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4510             remainder = gen_reg_rtx (compute_mode);
4511           }
4512
4513         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4514                                  quotient, remainder, 0))
4515           {
4516             /* This could be computed with a branch-less sequence.
4517                Save that for later.  */
4518             rtx tem;
4519             rtx label = gen_label_rtx ();
4520             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4521             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4522                                 NULL_RTX, 0, OPTAB_WIDEN);
4523             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4524             expand_dec (quotient, const1_rtx);
4525             expand_inc (remainder, op1);
4526             emit_label (label);
4527             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4528           }
4529
4530         /* No luck with division elimination or divmod.  Have to do it
4531            by conditionally adjusting op0 *and* the result.  */
4532         {
4533           rtx label1, label2, label3, label4, label5;
4534           rtx adjusted_op0;
4535           rtx tem;
4536
4537           quotient = gen_reg_rtx (compute_mode);
4538           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4539           label1 = gen_label_rtx ();
4540           label2 = gen_label_rtx ();
4541           label3 = gen_label_rtx ();
4542           label4 = gen_label_rtx ();
4543           label5 = gen_label_rtx ();
4544           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4545           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4546           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4547                               quotient, 0, OPTAB_LIB_WIDEN);
4548           if (tem != quotient)
4549             emit_move_insn (quotient, tem);
4550           emit_jump_insn (gen_jump (label5));
4551           emit_barrier ();
4552           emit_label (label1);
4553           expand_inc (adjusted_op0, const1_rtx);
4554           emit_jump_insn (gen_jump (label4));
4555           emit_barrier ();
4556           emit_label (label2);
4557           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4558           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4559                               quotient, 0, OPTAB_LIB_WIDEN);
4560           if (tem != quotient)
4561             emit_move_insn (quotient, tem);
4562           emit_jump_insn (gen_jump (label5));
4563           emit_barrier ();
4564           emit_label (label3);
4565           expand_dec (adjusted_op0, const1_rtx);
4566           emit_label (label4);
4567           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4568                               quotient, 0, OPTAB_LIB_WIDEN);
4569           if (tem != quotient)
4570             emit_move_insn (quotient, tem);
4571           expand_dec (quotient, const1_rtx);
4572           emit_label (label5);
4573         }
4574         break;
4575
4576       case CEIL_DIV_EXPR:
4577       case CEIL_MOD_EXPR:
4578         if (unsignedp)
4579           {
4580             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4581               {
4582                 rtx t1, t2, t3;
4583                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4584                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4585                                    floor_log2 (d), tquotient, 1);
4586                 t2 = expand_binop (compute_mode, and_optab, op0,
4587                                    gen_int_mode (d - 1, compute_mode),
4588                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4589                 t3 = gen_reg_rtx (compute_mode);
4590                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4591                                       compute_mode, 1, 1);
4592                 if (t3 == 0)
4593                   {
4594                     rtx lab;
4595                     lab = gen_label_rtx ();
4596                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4597                     expand_inc (t1, const1_rtx);
4598                     emit_label (lab);
4599                     quotient = t1;
4600                   }
4601                 else
4602                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4603                                                           t1, t3),
4604                                             tquotient);
4605                 break;
4606               }
4607
4608             /* Try using an instruction that produces both the quotient and
4609                remainder, using truncation.  We can easily compensate the
4610                quotient or remainder to get ceiling rounding, once we have the
4611                remainder.  Notice that we compute also the final remainder
4612                value here, and return the result right away.  */
4613             if (target == 0 || GET_MODE (target) != compute_mode)
4614               target = gen_reg_rtx (compute_mode);
4615
4616             if (rem_flag)
4617               {
4618                 remainder = (REG_P (target)
4619                              ? target : gen_reg_rtx (compute_mode));
4620                 quotient = gen_reg_rtx (compute_mode);
4621               }
4622             else
4623               {
4624                 quotient = (REG_P (target)
4625                             ? target : gen_reg_rtx (compute_mode));
4626                 remainder = gen_reg_rtx (compute_mode);
4627               }
4628
4629             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4630                                      remainder, 1))
4631               {
4632                 /* This could be computed with a branch-less sequence.
4633                    Save that for later.  */
4634                 rtx label = gen_label_rtx ();
4635                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4636                                  compute_mode, label);
4637                 expand_inc (quotient, const1_rtx);
4638                 expand_dec (remainder, op1);
4639                 emit_label (label);
4640                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4641               }
4642
4643             /* No luck with division elimination or divmod.  Have to do it
4644                by conditionally adjusting op0 *and* the result.  */
4645             {
4646               rtx label1, label2;
4647               rtx adjusted_op0, tem;
4648
4649               quotient = gen_reg_rtx (compute_mode);
4650               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4651               label1 = gen_label_rtx ();
4652               label2 = gen_label_rtx ();
4653               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4654                                compute_mode, label1);
4655               emit_move_insn  (quotient, const0_rtx);
4656               emit_jump_insn (gen_jump (label2));
4657               emit_barrier ();
4658               emit_label (label1);
4659               expand_dec (adjusted_op0, const1_rtx);
4660               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4661                                   quotient, 1, OPTAB_LIB_WIDEN);
4662               if (tem != quotient)
4663                 emit_move_insn (quotient, tem);
4664               expand_inc (quotient, const1_rtx);
4665               emit_label (label2);
4666             }
4667           }
4668         else /* signed */
4669           {
4670             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4671                 && INTVAL (op1) >= 0)
4672               {
4673                 /* This is extremely similar to the code for the unsigned case
4674                    above.  For 2.7 we should merge these variants, but for
4675                    2.6.1 I don't want to touch the code for unsigned since that
4676                    get used in C.  The signed case will only be used by other
4677                    languages (Ada).  */
4678
4679                 rtx t1, t2, t3;
4680                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4681                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4682                                    floor_log2 (d), tquotient, 0);
4683                 t2 = expand_binop (compute_mode, and_optab, op0,
4684                                    gen_int_mode (d - 1, compute_mode),
4685                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4686                 t3 = gen_reg_rtx (compute_mode);
4687                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4688                                       compute_mode, 1, 1);
4689                 if (t3 == 0)
4690                   {
4691                     rtx lab;
4692                     lab = gen_label_rtx ();
4693                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4694                     expand_inc (t1, const1_rtx);
4695                     emit_label (lab);
4696                     quotient = t1;
4697                   }
4698                 else
4699                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4700                                                           t1, t3),
4701                                             tquotient);
4702                 break;
4703               }
4704
4705             /* Try using an instruction that produces both the quotient and
4706                remainder, using truncation.  We can easily compensate the
4707                quotient or remainder to get ceiling rounding, once we have the
4708                remainder.  Notice that we compute also the final remainder
4709                value here, and return the result right away.  */
4710             if (target == 0 || GET_MODE (target) != compute_mode)
4711               target = gen_reg_rtx (compute_mode);
4712             if (rem_flag)
4713               {
4714                 remainder= (REG_P (target)
4715                             ? target : gen_reg_rtx (compute_mode));
4716                 quotient = gen_reg_rtx (compute_mode);
4717               }
4718             else
4719               {
4720                 quotient = (REG_P (target)
4721                             ? target : gen_reg_rtx (compute_mode));
4722                 remainder = gen_reg_rtx (compute_mode);
4723               }
4724
4725             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4726                                      remainder, 0))
4727               {
4728                 /* This could be computed with a branch-less sequence.
4729                    Save that for later.  */
4730                 rtx tem;
4731                 rtx label = gen_label_rtx ();
4732                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4733                                  compute_mode, label);
4734                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4735                                     NULL_RTX, 0, OPTAB_WIDEN);
4736                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4737                 expand_inc (quotient, const1_rtx);
4738                 expand_dec (remainder, op1);
4739                 emit_label (label);
4740                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4741               }
4742
4743             /* No luck with division elimination or divmod.  Have to do it
4744                by conditionally adjusting op0 *and* the result.  */
4745             {
4746               rtx label1, label2, label3, label4, label5;
4747               rtx adjusted_op0;
4748               rtx tem;
4749
4750               quotient = gen_reg_rtx (compute_mode);
4751               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4752               label1 = gen_label_rtx ();
4753               label2 = gen_label_rtx ();
4754               label3 = gen_label_rtx ();
4755               label4 = gen_label_rtx ();
4756               label5 = gen_label_rtx ();
4757               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4758               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4759                                compute_mode, label1);
4760               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4761                                   quotient, 0, OPTAB_LIB_WIDEN);
4762               if (tem != quotient)
4763                 emit_move_insn (quotient, tem);
4764               emit_jump_insn (gen_jump (label5));
4765               emit_barrier ();
4766               emit_label (label1);
4767               expand_dec (adjusted_op0, const1_rtx);
4768               emit_jump_insn (gen_jump (label4));
4769               emit_barrier ();
4770               emit_label (label2);
4771               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4772                                compute_mode, label3);
4773               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4774                                   quotient, 0, OPTAB_LIB_WIDEN);
4775               if (tem != quotient)
4776                 emit_move_insn (quotient, tem);
4777               emit_jump_insn (gen_jump (label5));
4778               emit_barrier ();
4779               emit_label (label3);
4780               expand_inc (adjusted_op0, const1_rtx);
4781               emit_label (label4);
4782               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4783                                   quotient, 0, OPTAB_LIB_WIDEN);
4784               if (tem != quotient)
4785                 emit_move_insn (quotient, tem);
4786               expand_inc (quotient, const1_rtx);
4787               emit_label (label5);
4788             }
4789           }
4790         break;
4791
4792       case EXACT_DIV_EXPR:
4793         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4794           {
4795             HOST_WIDE_INT d = INTVAL (op1);
4796             unsigned HOST_WIDE_INT ml;
4797             int pre_shift;
4798             rtx t1;
4799
4800             pre_shift = floor_log2 (d & -d);
4801             ml = invert_mod2n (d >> pre_shift, size);
4802             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4803                                pre_shift, NULL_RTX, unsignedp);
4804             quotient = expand_mult (compute_mode, t1,
4805                                     gen_int_mode (ml, compute_mode),
4806                                     NULL_RTX, 1);
4807
4808             insn = get_last_insn ();
4809             set_dst_reg_note (insn, REG_EQUAL,
4810                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4811                                               compute_mode, op0, op1),
4812                               quotient);
4813           }
4814         break;
4815
4816       case ROUND_DIV_EXPR:
4817       case ROUND_MOD_EXPR:
4818         if (unsignedp)
4819           {
4820             rtx tem;
4821             rtx label;
4822             label = gen_label_rtx ();
4823             quotient = gen_reg_rtx (compute_mode);
4824             remainder = gen_reg_rtx (compute_mode);
4825             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4826               {
4827                 rtx tem;
4828                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4829                                          quotient, 1, OPTAB_LIB_WIDEN);
4830                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4831                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4832                                           remainder, 1, OPTAB_LIB_WIDEN);
4833               }
4834             tem = plus_constant (compute_mode, op1, -1);
4835             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4836             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4837             expand_inc (quotient, const1_rtx);
4838             expand_dec (remainder, op1);
4839             emit_label (label);
4840           }
4841         else
4842           {
4843             rtx abs_rem, abs_op1, tem, mask;
4844             rtx label;
4845             label = gen_label_rtx ();
4846             quotient = gen_reg_rtx (compute_mode);
4847             remainder = gen_reg_rtx (compute_mode);
4848             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4849               {
4850                 rtx tem;
4851                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4852                                          quotient, 0, OPTAB_LIB_WIDEN);
4853                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4854                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4855                                           remainder, 0, OPTAB_LIB_WIDEN);
4856               }
4857             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4858             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4859             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4860                                 1, NULL_RTX, 1);
4861             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4862             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4863                                 NULL_RTX, 0, OPTAB_WIDEN);
4864             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4865                                  size - 1, NULL_RTX, 0);
4866             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4867                                 NULL_RTX, 0, OPTAB_WIDEN);
4868             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4869                                 NULL_RTX, 0, OPTAB_WIDEN);
4870             expand_inc (quotient, tem);
4871             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4872                                 NULL_RTX, 0, OPTAB_WIDEN);
4873             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4874                                 NULL_RTX, 0, OPTAB_WIDEN);
4875             expand_dec (remainder, tem);
4876             emit_label (label);
4877           }
4878         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4879
4880       default:
4881         gcc_unreachable ();
4882       }
4883
4884   if (quotient == 0)
4885     {
4886       if (target && GET_MODE (target) != compute_mode)
4887         target = 0;
4888
4889       if (rem_flag)
4890         {
4891           /* Try to produce the remainder without producing the quotient.
4892              If we seem to have a divmod pattern that does not require widening,
4893              don't try widening here.  We should really have a WIDEN argument
4894              to expand_twoval_binop, since what we'd really like to do here is
4895              1) try a mod insn in compute_mode
4896              2) try a divmod insn in compute_mode
4897              3) try a div insn in compute_mode and multiply-subtract to get
4898                 remainder
4899              4) try the same things with widening allowed.  */
4900           remainder
4901             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4902                                  op0, op1, target,
4903                                  unsignedp,
4904                                  ((optab_handler (optab2, compute_mode)
4905                                    != CODE_FOR_nothing)
4906                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4907           if (remainder == 0)
4908             {
4909               /* No luck there.  Can we do remainder and divide at once
4910                  without a library call?  */
4911               remainder = gen_reg_rtx (compute_mode);
4912               if (! expand_twoval_binop ((unsignedp
4913                                           ? udivmod_optab
4914                                           : sdivmod_optab),
4915                                          op0, op1,
4916                                          NULL_RTX, remainder, unsignedp))
4917                 remainder = 0;
4918             }
4919
4920           if (remainder)
4921             return gen_lowpart (mode, remainder);
4922         }
4923
4924       /* Produce the quotient.  Try a quotient insn, but not a library call.
4925          If we have a divmod in this mode, use it in preference to widening
4926          the div (for this test we assume it will not fail). Note that optab2
4927          is set to the one of the two optabs that the call below will use.  */
4928       quotient
4929         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4930                              op0, op1, rem_flag ? NULL_RTX : target,
4931                              unsignedp,
4932                              ((optab_handler (optab2, compute_mode)
4933                                != CODE_FOR_nothing)
4934                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4935
4936       if (quotient == 0)
4937         {
4938           /* No luck there.  Try a quotient-and-remainder insn,
4939              keeping the quotient alone.  */
4940           quotient = gen_reg_rtx (compute_mode);
4941           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4942                                      op0, op1,
4943                                      quotient, NULL_RTX, unsignedp))
4944             {
4945               quotient = 0;
4946               if (! rem_flag)
4947                 /* Still no luck.  If we are not computing the remainder,
4948                    use a library call for the quotient.  */
4949                 quotient = sign_expand_binop (compute_mode,
4950                                               udiv_optab, sdiv_optab,
4951                                               op0, op1, target,
4952                                               unsignedp, OPTAB_LIB_WIDEN);
4953             }
4954         }
4955     }
4956
4957   if (rem_flag)
4958     {
4959       if (target && GET_MODE (target) != compute_mode)
4960         target = 0;
4961
4962       if (quotient == 0)
4963         {
4964           /* No divide instruction either.  Use library for remainder.  */
4965           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4966                                          op0, op1, target,
4967                                          unsignedp, OPTAB_LIB_WIDEN);
4968           /* No remainder function.  Try a quotient-and-remainder
4969              function, keeping the remainder.  */
4970           if (!remainder)
4971             {
4972               remainder = gen_reg_rtx (compute_mode);
4973               if (!expand_twoval_binop_libfunc
4974                   (unsignedp ? udivmod_optab : sdivmod_optab,
4975                    op0, op1,
4976                    NULL_RTX, remainder,
4977                    unsignedp ? UMOD : MOD))
4978                 remainder = NULL_RTX;
4979             }
4980         }
4981       else
4982         {
4983           /* We divided.  Now finish doing X - Y * (X / Y).  */
4984           remainder = expand_mult (compute_mode, quotient, op1,
4985                                    NULL_RTX, unsignedp);
4986           remainder = expand_binop (compute_mode, sub_optab, op0,
4987                                     remainder, target, unsignedp,
4988                                     OPTAB_LIB_WIDEN);
4989         }
4990     }
4991
4992   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4993 }
4994 \f
4995 /* Return a tree node with data type TYPE, describing the value of X.
4996    Usually this is an VAR_DECL, if there is no obvious better choice.
4997    X may be an expression, however we only support those expressions
4998    generated by loop.c.  */
4999
5000 tree
5001 make_tree (tree type, rtx x)
5002 {
5003   tree t;
5004
5005   switch (GET_CODE (x))
5006     {
5007     case CONST_INT:
5008       {
5009         HOST_WIDE_INT hi = 0;
5010
5011         if (INTVAL (x) < 0
5012             && !(TYPE_UNSIGNED (type)
5013                  && (GET_MODE_BITSIZE (TYPE_MODE (type))
5014                      < HOST_BITS_PER_WIDE_INT)))
5015           hi = -1;
5016
5017         t = build_int_cst_wide (type, INTVAL (x), hi);
5018
5019         return t;
5020       }
5021
5022     case CONST_DOUBLE:
5023       if (GET_MODE (x) == VOIDmode)
5024         t = build_int_cst_wide (type,
5025                                 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
5026       else
5027         {
5028           REAL_VALUE_TYPE d;
5029
5030           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
5031           t = build_real (type, d);
5032         }
5033
5034       return t;
5035
5036     case CONST_VECTOR:
5037       {
5038         int units = CONST_VECTOR_NUNITS (x);
5039         tree itype = TREE_TYPE (type);
5040         tree *elts;
5041         int i;
5042
5043         /* Build a tree with vector elements.  */
5044         elts = XALLOCAVEC (tree, units);
5045         for (i = units - 1; i >= 0; --i)
5046           {
5047             rtx elt = CONST_VECTOR_ELT (x, i);
5048             elts[i] = make_tree (itype, elt);
5049           }
5050
5051         return build_vector (type, elts);
5052       }
5053
5054     case PLUS:
5055       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5056                           make_tree (type, XEXP (x, 1)));
5057
5058     case MINUS:
5059       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5060                           make_tree (type, XEXP (x, 1)));
5061
5062     case NEG:
5063       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5064
5065     case MULT:
5066       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5067                           make_tree (type, XEXP (x, 1)));
5068
5069     case ASHIFT:
5070       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5071                           make_tree (type, XEXP (x, 1)));
5072
5073     case LSHIFTRT:
5074       t = unsigned_type_for (type);
5075       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5076                                          make_tree (t, XEXP (x, 0)),
5077                                          make_tree (type, XEXP (x, 1))));
5078
5079     case ASHIFTRT:
5080       t = signed_type_for (type);
5081       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5082                                          make_tree (t, XEXP (x, 0)),
5083                                          make_tree (type, XEXP (x, 1))));
5084
5085     case DIV:
5086       if (TREE_CODE (type) != REAL_TYPE)
5087         t = signed_type_for (type);
5088       else
5089         t = type;
5090
5091       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5092                                          make_tree (t, XEXP (x, 0)),
5093                                          make_tree (t, XEXP (x, 1))));
5094     case UDIV:
5095       t = unsigned_type_for (type);
5096       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5097                                          make_tree (t, XEXP (x, 0)),
5098                                          make_tree (t, XEXP (x, 1))));
5099
5100     case SIGN_EXTEND:
5101     case ZERO_EXTEND:
5102       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5103                                           GET_CODE (x) == ZERO_EXTEND);
5104       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5105
5106     case CONST:
5107       return make_tree (type, XEXP (x, 0));
5108
5109     case SYMBOL_REF:
5110       t = SYMBOL_REF_DECL (x);
5111       if (t)
5112         return fold_convert (type, build_fold_addr_expr (t));
5113       /* else fall through.  */
5114
5115     default:
5116       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5117
5118       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5119          address mode to pointer mode.  */
5120       if (POINTER_TYPE_P (type))
5121         x = convert_memory_address_addr_space
5122               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5123
5124       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5125          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5126       t->decl_with_rtl.rtl = x;
5127
5128       return t;
5129     }
5130 }
5131 \f
5132 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5133    and returning TARGET.
5134
5135    If TARGET is 0, a pseudo-register or constant is returned.  */
5136
5137 rtx
5138 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5139 {
5140   rtx tem = 0;
5141
5142   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5143     tem = simplify_binary_operation (AND, mode, op0, op1);
5144   if (tem == 0)
5145     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5146
5147   if (target == 0)
5148     target = tem;
5149   else if (tem != target)
5150     emit_move_insn (target, tem);
5151   return target;
5152 }
5153
5154 /* Helper function for emit_store_flag.  */
5155 static rtx
5156 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5157              enum machine_mode mode, enum machine_mode compare_mode,
5158              int unsignedp, rtx x, rtx y, int normalizep,
5159              enum machine_mode target_mode)
5160 {
5161   struct expand_operand ops[4];
5162   rtx op0, last, comparison, subtarget;
5163   enum machine_mode result_mode = targetm.cstore_mode (icode);
5164
5165   last = get_last_insn ();
5166   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5167   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5168   if (!x || !y)
5169     {
5170       delete_insns_since (last);
5171       return NULL_RTX;
5172     }
5173
5174   if (target_mode == VOIDmode)
5175     target_mode = result_mode;
5176   if (!target)
5177     target = gen_reg_rtx (target_mode);
5178
5179   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5180
5181   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5182   create_fixed_operand (&ops[1], comparison);
5183   create_fixed_operand (&ops[2], x);
5184   create_fixed_operand (&ops[3], y);
5185   if (!maybe_expand_insn (icode, 4, ops))
5186     {
5187       delete_insns_since (last);
5188       return NULL_RTX;
5189     }
5190   subtarget = ops[0].value;
5191
5192   /* If we are converting to a wider mode, first convert to
5193      TARGET_MODE, then normalize.  This produces better combining
5194      opportunities on machines that have a SIGN_EXTRACT when we are
5195      testing a single bit.  This mostly benefits the 68k.
5196
5197      If STORE_FLAG_VALUE does not have the sign bit set when
5198      interpreted in MODE, we can do this conversion as unsigned, which
5199      is usually more efficient.  */
5200   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5201     {
5202       convert_move (target, subtarget,
5203                     val_signbit_known_clear_p (result_mode,
5204                                                STORE_FLAG_VALUE));
5205       op0 = target;
5206       result_mode = target_mode;
5207     }
5208   else
5209     op0 = subtarget;
5210
5211   /* If we want to keep subexpressions around, don't reuse our last
5212      target.  */
5213   if (optimize)
5214     subtarget = 0;
5215
5216   /* Now normalize to the proper value in MODE.  Sometimes we don't
5217      have to do anything.  */
5218   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5219     ;
5220   /* STORE_FLAG_VALUE might be the most negative number, so write
5221      the comparison this way to avoid a compiler-time warning.  */
5222   else if (- normalizep == STORE_FLAG_VALUE)
5223     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5224
5225   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5226      it hard to use a value of just the sign bit due to ANSI integer
5227      constant typing rules.  */
5228   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5229     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5230                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5231                         normalizep == 1);
5232   else
5233     {
5234       gcc_assert (STORE_FLAG_VALUE & 1);
5235
5236       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5237       if (normalizep == -1)
5238         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5239     }
5240
5241   /* If we were converting to a smaller mode, do the conversion now.  */
5242   if (target_mode != result_mode)
5243     {
5244       convert_move (target, op0, 0);
5245       return target;
5246     }
5247   else
5248     return op0;
5249 }
5250
5251
5252 /* A subroutine of emit_store_flag only including "tricks" that do not
5253    need a recursive call.  These are kept separate to avoid infinite
5254    loops.  */
5255
5256 static rtx
5257 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5258                    enum machine_mode mode, int unsignedp, int normalizep,
5259                    enum machine_mode target_mode)
5260 {
5261   rtx subtarget;
5262   enum insn_code icode;
5263   enum machine_mode compare_mode;
5264   enum mode_class mclass;
5265   enum rtx_code scode;
5266   rtx tem;
5267
5268   if (unsignedp)
5269     code = unsigned_condition (code);
5270   scode = swap_condition (code);
5271
5272   /* If one operand is constant, make it the second one.  Only do this
5273      if the other operand is not constant as well.  */
5274
5275   if (swap_commutative_operands_p (op0, op1))
5276     {
5277       tem = op0;
5278       op0 = op1;
5279       op1 = tem;
5280       code = swap_condition (code);
5281     }
5282
5283   if (mode == VOIDmode)
5284     mode = GET_MODE (op0);
5285
5286   /* For some comparisons with 1 and -1, we can convert this to
5287      comparisons with zero.  This will often produce more opportunities for
5288      store-flag insns.  */
5289
5290   switch (code)
5291     {
5292     case LT:
5293       if (op1 == const1_rtx)
5294         op1 = const0_rtx, code = LE;
5295       break;
5296     case LE:
5297       if (op1 == constm1_rtx)
5298         op1 = const0_rtx, code = LT;
5299       break;
5300     case GE:
5301       if (op1 == const1_rtx)
5302         op1 = const0_rtx, code = GT;
5303       break;
5304     case GT:
5305       if (op1 == constm1_rtx)
5306         op1 = const0_rtx, code = GE;
5307       break;
5308     case GEU:
5309       if (op1 == const1_rtx)
5310         op1 = const0_rtx, code = NE;
5311       break;
5312     case LTU:
5313       if (op1 == const1_rtx)
5314         op1 = const0_rtx, code = EQ;
5315       break;
5316     default:
5317       break;
5318     }
5319
5320   /* If we are comparing a double-word integer with zero or -1, we can
5321      convert the comparison into one involving a single word.  */
5322   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5323       && GET_MODE_CLASS (mode) == MODE_INT
5324       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5325     {
5326       if ((code == EQ || code == NE)
5327           && (op1 == const0_rtx || op1 == constm1_rtx))
5328         {
5329           rtx op00, op01;
5330
5331           /* Do a logical OR or AND of the two words and compare the
5332              result.  */
5333           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5334           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5335           tem = expand_binop (word_mode,
5336                               op1 == const0_rtx ? ior_optab : and_optab,
5337                               op00, op01, NULL_RTX, unsignedp,
5338                               OPTAB_DIRECT);
5339
5340           if (tem != 0)
5341             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5342                                    unsignedp, normalizep);
5343         }
5344       else if ((code == LT || code == GE) && op1 == const0_rtx)
5345         {
5346           rtx op0h;
5347
5348           /* If testing the sign bit, can just test on high word.  */
5349           op0h = simplify_gen_subreg (word_mode, op0, mode,
5350                                       subreg_highpart_offset (word_mode,
5351                                                               mode));
5352           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5353                                  unsignedp, normalizep);
5354         }
5355       else
5356         tem = NULL_RTX;
5357
5358       if (tem)
5359         {
5360           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5361             return tem;
5362           if (!target)
5363             target = gen_reg_rtx (target_mode);
5364
5365           convert_move (target, tem,
5366                         !val_signbit_known_set_p (word_mode,
5367                                                   (normalizep ? normalizep
5368                                                    : STORE_FLAG_VALUE)));
5369           return target;
5370         }
5371     }
5372
5373   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5374      complement of A (for GE) and shifting the sign bit to the low bit.  */
5375   if (op1 == const0_rtx && (code == LT || code == GE)
5376       && GET_MODE_CLASS (mode) == MODE_INT
5377       && (normalizep || STORE_FLAG_VALUE == 1
5378           || val_signbit_p (mode, STORE_FLAG_VALUE)))
5379     {
5380       subtarget = target;
5381
5382       if (!target)
5383         target_mode = mode;
5384
5385       /* If the result is to be wider than OP0, it is best to convert it
5386          first.  If it is to be narrower, it is *incorrect* to convert it
5387          first.  */
5388       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5389         {
5390           op0 = convert_modes (target_mode, mode, op0, 0);
5391           mode = target_mode;
5392         }
5393
5394       if (target_mode != mode)
5395         subtarget = 0;
5396
5397       if (code == GE)
5398         op0 = expand_unop (mode, one_cmpl_optab, op0,
5399                            ((STORE_FLAG_VALUE == 1 || normalizep)
5400                             ? 0 : subtarget), 0);
5401
5402       if (STORE_FLAG_VALUE == 1 || normalizep)
5403         /* If we are supposed to produce a 0/1 value, we want to do
5404            a logical shift from the sign bit to the low-order bit; for
5405            a -1/0 value, we do an arithmetic shift.  */
5406         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5407                             GET_MODE_BITSIZE (mode) - 1,
5408                             subtarget, normalizep != -1);
5409
5410       if (mode != target_mode)
5411         op0 = convert_modes (target_mode, mode, op0, 0);
5412
5413       return op0;
5414     }
5415
5416   mclass = GET_MODE_CLASS (mode);
5417   for (compare_mode = mode; compare_mode != VOIDmode;
5418        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5419     {
5420      enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5421      icode = optab_handler (cstore_optab, optab_mode);
5422      if (icode != CODE_FOR_nothing)
5423         {
5424           do_pending_stack_adjust ();
5425           tem = emit_cstore (target, icode, code, mode, compare_mode,
5426                              unsignedp, op0, op1, normalizep, target_mode);
5427           if (tem)
5428             return tem;
5429
5430           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5431             {
5432               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5433                                  unsignedp, op1, op0, normalizep, target_mode);
5434               if (tem)
5435                 return tem;
5436             }
5437           break;
5438         }
5439     }
5440
5441   return 0;
5442 }
5443
5444 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5445    and storing in TARGET.  Normally return TARGET.
5446    Return 0 if that cannot be done.
5447
5448    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5449    it is VOIDmode, they cannot both be CONST_INT.
5450
5451    UNSIGNEDP is for the case where we have to widen the operands
5452    to perform the operation.  It says to use zero-extension.
5453
5454    NORMALIZEP is 1 if we should convert the result to be either zero
5455    or one.  Normalize is -1 if we should convert the result to be
5456    either zero or -1.  If NORMALIZEP is zero, the result will be left
5457    "raw" out of the scc insn.  */
5458
5459 rtx
5460 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5461                  enum machine_mode mode, int unsignedp, int normalizep)
5462 {
5463   enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5464   enum rtx_code rcode;
5465   rtx subtarget;
5466   rtx tem, last, trueval;
5467
5468   /* If we compare constants, we shouldn't use a store-flag operation,
5469      but a constant load.  We can get there via the vanilla route that
5470      usually generates a compare-branch sequence, but will in this case
5471      fold the comparison to a constant, and thus elide the branch.  */
5472   if (CONSTANT_P (op0) && CONSTANT_P (op1))
5473     return NULL_RTX;
5474
5475   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5476                            target_mode);
5477   if (tem)
5478     return tem;
5479
5480   /* If we reached here, we can't do this with a scc insn, however there
5481      are some comparisons that can be done in other ways.  Don't do any
5482      of these cases if branches are very cheap.  */
5483   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5484     return 0;
5485
5486   /* See what we need to return.  We can only return a 1, -1, or the
5487      sign bit.  */
5488
5489   if (normalizep == 0)
5490     {
5491       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5492         normalizep = STORE_FLAG_VALUE;
5493
5494       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5495         ;
5496       else
5497         return 0;
5498     }
5499
5500   last = get_last_insn ();
5501
5502   /* If optimizing, use different pseudo registers for each insn, instead
5503      of reusing the same pseudo.  This leads to better CSE, but slows
5504      down the compiler, since there are more pseudos */
5505   subtarget = (!optimize
5506                && (target_mode == mode)) ? target : NULL_RTX;
5507   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5508
5509   /* For floating-point comparisons, try the reverse comparison or try
5510      changing the "orderedness" of the comparison.  */
5511   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5512     {
5513       enum rtx_code first_code;
5514       bool and_them;
5515
5516       rcode = reverse_condition_maybe_unordered (code);
5517       if (can_compare_p (rcode, mode, ccp_store_flag)
5518           && (code == ORDERED || code == UNORDERED
5519               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5520               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5521         {
5522           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5523                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5524
5525           /* For the reverse comparison, use either an addition or a XOR.  */
5526           if (want_add
5527               && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5528                            optimize_insn_for_speed_p ()) == 0)
5529             {
5530               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5531                                        STORE_FLAG_VALUE, target_mode);
5532               if (tem)
5533                 return expand_binop (target_mode, add_optab, tem,
5534                                      gen_int_mode (normalizep, target_mode),
5535                                      target, 0, OPTAB_WIDEN);
5536             }
5537           else if (!want_add
5538                    && rtx_cost (trueval, XOR, 1,
5539                                 optimize_insn_for_speed_p ()) == 0)
5540             {
5541               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5542                                        normalizep, target_mode);
5543               if (tem)
5544                 return expand_binop (target_mode, xor_optab, tem, trueval,
5545                                      target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5546             }
5547         }
5548
5549       delete_insns_since (last);
5550
5551       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5552       if (code == ORDERED || code == UNORDERED)
5553         return 0;
5554
5555       and_them = split_comparison (code, mode, &first_code, &code);
5556
5557       /* If there are no NaNs, the first comparison should always fall through.
5558          Effectively change the comparison to the other one.  */
5559       if (!HONOR_NANS (mode))
5560         {
5561           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5562           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5563                                     target_mode);
5564         }
5565
5566 #ifdef HAVE_conditional_move
5567       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5568          conditional move.  */
5569       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5570                                normalizep, target_mode);
5571       if (tem == 0)
5572         return 0;
5573
5574       if (and_them)
5575         tem = emit_conditional_move (target, code, op0, op1, mode,
5576                                      tem, const0_rtx, GET_MODE (tem), 0);
5577       else
5578         tem = emit_conditional_move (target, code, op0, op1, mode,
5579                                      trueval, tem, GET_MODE (tem), 0);
5580
5581       if (tem == 0)
5582         delete_insns_since (last);
5583       return tem;
5584 #else
5585       return 0;
5586 #endif
5587     }
5588
5589   /* The remaining tricks only apply to integer comparisons.  */
5590
5591   if (GET_MODE_CLASS (mode) != MODE_INT)
5592     return 0;
5593
5594   /* If this is an equality comparison of integers, we can try to exclusive-or
5595      (or subtract) the two operands and use a recursive call to try the
5596      comparison with zero.  Don't do any of these cases if branches are
5597      very cheap.  */
5598
5599   if ((code == EQ || code == NE) && op1 != const0_rtx)
5600     {
5601       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5602                           OPTAB_WIDEN);
5603
5604       if (tem == 0)
5605         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5606                             OPTAB_WIDEN);
5607       if (tem != 0)
5608         tem = emit_store_flag (target, code, tem, const0_rtx,
5609                                mode, unsignedp, normalizep);
5610       if (tem != 0)
5611         return tem;
5612
5613       delete_insns_since (last);
5614     }
5615
5616   /* For integer comparisons, try the reverse comparison.  However, for
5617      small X and if we'd have anyway to extend, implementing "X != 0"
5618      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5619   rcode = reverse_condition (code);
5620   if (can_compare_p (rcode, mode, ccp_store_flag)
5621       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5622             && code == NE
5623             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5624             && op1 == const0_rtx))
5625     {
5626       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5627                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5628
5629       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5630       if (want_add
5631           && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5632                        optimize_insn_for_speed_p ()) == 0)
5633         {
5634           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5635                                    STORE_FLAG_VALUE, target_mode);
5636           if (tem != 0)
5637             tem = expand_binop (target_mode, add_optab, tem,
5638                                 gen_int_mode (normalizep, target_mode),
5639                                 target, 0, OPTAB_WIDEN);
5640         }
5641       else if (!want_add
5642                && rtx_cost (trueval, XOR, 1,
5643                             optimize_insn_for_speed_p ()) == 0)
5644         {
5645           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5646                                    normalizep, target_mode);
5647           if (tem != 0)
5648             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5649                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5650         }
5651
5652       if (tem != 0)
5653         return tem;
5654       delete_insns_since (last);
5655     }
5656
5657   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5658      the constant zero.  Reject all other comparisons at this point.  Only
5659      do LE and GT if branches are expensive since they are expensive on
5660      2-operand machines.  */
5661
5662   if (op1 != const0_rtx
5663       || (code != EQ && code != NE
5664           && (BRANCH_COST (optimize_insn_for_speed_p (),
5665                            false) <= 1 || (code != LE && code != GT))))
5666     return 0;
5667
5668   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5669      do the necessary operation below.  */
5670
5671   tem = 0;
5672
5673   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5674      the sign bit set.  */
5675
5676   if (code == LE)
5677     {
5678       /* This is destructive, so SUBTARGET can't be OP0.  */
5679       if (rtx_equal_p (subtarget, op0))
5680         subtarget = 0;
5681
5682       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5683                           OPTAB_WIDEN);
5684       if (tem)
5685         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5686                             OPTAB_WIDEN);
5687     }
5688
5689   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5690      number of bits in the mode of OP0, minus one.  */
5691
5692   if (code == GT)
5693     {
5694       if (rtx_equal_p (subtarget, op0))
5695         subtarget = 0;
5696
5697       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5698                           GET_MODE_BITSIZE (mode) - 1,
5699                           subtarget, 0);
5700       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5701                           OPTAB_WIDEN);
5702     }
5703
5704   if (code == EQ || code == NE)
5705     {
5706       /* For EQ or NE, one way to do the comparison is to apply an operation
5707          that converts the operand into a positive number if it is nonzero
5708          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5709          for NE we negate.  This puts the result in the sign bit.  Then we
5710          normalize with a shift, if needed.
5711
5712          Two operations that can do the above actions are ABS and FFS, so try
5713          them.  If that doesn't work, and MODE is smaller than a full word,
5714          we can use zero-extension to the wider mode (an unsigned conversion)
5715          as the operation.  */
5716
5717       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5718          that is compensated by the subsequent overflow when subtracting
5719          one / negating.  */
5720
5721       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5722         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5723       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5724         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5725       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5726         {
5727           tem = convert_modes (word_mode, mode, op0, 1);
5728           mode = word_mode;
5729         }
5730
5731       if (tem != 0)
5732         {
5733           if (code == EQ)
5734             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5735                                 0, OPTAB_WIDEN);
5736           else
5737             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5738         }
5739
5740       /* If we couldn't do it that way, for NE we can "or" the two's complement
5741          of the value with itself.  For EQ, we take the one's complement of
5742          that "or", which is an extra insn, so we only handle EQ if branches
5743          are expensive.  */
5744
5745       if (tem == 0
5746           && (code == NE
5747               || BRANCH_COST (optimize_insn_for_speed_p (),
5748                               false) > 1))
5749         {
5750           if (rtx_equal_p (subtarget, op0))
5751             subtarget = 0;
5752
5753           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5754           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5755                               OPTAB_WIDEN);
5756
5757           if (tem && code == EQ)
5758             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5759         }
5760     }
5761
5762   if (tem && normalizep)
5763     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5764                         GET_MODE_BITSIZE (mode) - 1,
5765                         subtarget, normalizep == 1);
5766
5767   if (tem)
5768     {
5769       if (!target)
5770         ;
5771       else if (GET_MODE (tem) != target_mode)
5772         {
5773           convert_move (target, tem, 0);
5774           tem = target;
5775         }
5776       else if (!subtarget)
5777         {
5778           emit_move_insn (target, tem);
5779           tem = target;
5780         }
5781     }
5782   else
5783     delete_insns_since (last);
5784
5785   return tem;
5786 }
5787
5788 /* Like emit_store_flag, but always succeeds.  */
5789
5790 rtx
5791 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5792                        enum machine_mode mode, int unsignedp, int normalizep)
5793 {
5794   rtx tem, label;
5795   rtx trueval, falseval;
5796
5797   /* First see if emit_store_flag can do the job.  */
5798   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5799   if (tem != 0)
5800     return tem;
5801
5802   if (!target)
5803     target = gen_reg_rtx (word_mode);
5804
5805   /* If this failed, we have to do this with set/compare/jump/set code.
5806      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5807   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5808   if (code == NE
5809       && GET_MODE_CLASS (mode) == MODE_INT
5810       && REG_P (target)
5811       && op0 == target
5812       && op1 == const0_rtx)
5813     {
5814       label = gen_label_rtx ();
5815       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5816                                mode, NULL_RTX, NULL_RTX, label, -1);
5817       emit_move_insn (target, trueval);
5818       emit_label (label);
5819       return target;
5820     }
5821
5822   if (!REG_P (target)
5823       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5824     target = gen_reg_rtx (GET_MODE (target));
5825
5826   /* Jump in the right direction if the target cannot implement CODE
5827      but can jump on its reverse condition.  */
5828   falseval = const0_rtx;
5829   if (! can_compare_p (code, mode, ccp_jump)
5830       && (! FLOAT_MODE_P (mode)
5831           || code == ORDERED || code == UNORDERED
5832           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5833           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5834     {
5835       enum rtx_code rcode;
5836       if (FLOAT_MODE_P (mode))
5837         rcode = reverse_condition_maybe_unordered (code);
5838       else
5839         rcode = reverse_condition (code);
5840
5841       /* Canonicalize to UNORDERED for the libcall.  */
5842       if (can_compare_p (rcode, mode, ccp_jump)
5843           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5844         {
5845           falseval = trueval;
5846           trueval = const0_rtx;
5847           code = rcode;
5848         }
5849     }
5850
5851   emit_move_insn (target, trueval);
5852   label = gen_label_rtx ();
5853   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5854                            NULL_RTX, label, -1);
5855
5856   emit_move_insn (target, falseval);
5857   emit_label (label);
5858
5859   return target;
5860 }
5861 \f
5862 /* Perform possibly multi-word comparison and conditional jump to LABEL
5863    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5864    now a thin wrapper around do_compare_rtx_and_jump.  */
5865
5866 static void
5867 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5868                  rtx label)
5869 {
5870   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5871   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5872                            NULL_RTX, NULL_RTX, label, -1);
5873 }