gcc-4_9/gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2014 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "tm.h"
  26 #include "diagnostic-core.h"
  27 #include "rtl.h"
  28 #include "tree.h"
  29 #include "stor-layout.h"
  30 #include "tm_p.h"
  31 #include "flags.h"
  32 #include "insn-config.h"
  33 #include "expr.h"
  34 #include "optabs.h"
  35 #include "recog.h"
  36 #include "langhooks.h"
  37 #include "df.h"
  38 #include "target.h"
  39 #include "expmed.h"
  40
  41 struct target_expmed default_target_expmed;
  42 #if SWITCHABLE_TARGET
  43 struct target_expmed *this_target_expmed = &default_target_expmed;
  44 #endif
  45
  46 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  47                                    unsigned HOST_WIDE_INT,
  48                                    unsigned HOST_WIDE_INT,
  49                                    unsigned HOST_WIDE_INT,
  50                                    rtx);
  51 static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT,
  52                                      unsigned HOST_WIDE_INT,
  53                                      rtx);
  54 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  55                                    unsigned HOST_WIDE_INT,
  56                                    unsigned HOST_WIDE_INT,
  57                                    unsigned HOST_WIDE_INT,
  58                                    rtx);
  59 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  60                                     unsigned HOST_WIDE_INT,
  61                                     unsigned HOST_WIDE_INT, rtx, int);
  62 static rtx extract_fixed_bit_field_1 (enum machine_mode, rtx,
  63                                       unsigned HOST_WIDE_INT,
  64                                       unsigned HOST_WIDE_INT, rtx, int);
  65 static rtx mask_rtx (enum machine_mode, int, int, int);
  66 static rtx lshift_value (enum machine_mode, unsigned HOST_WIDE_INT, int);
  67 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  68                                     unsigned HOST_WIDE_INT, int);
  69 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  70 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  71 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  72
  73 /* Test whether a value is zero of a power of two.  */
  74 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
  75   (((x) & ((x) - (unsigned HOST_WIDE_INT) 1)) == 0)
  76
  77 struct init_expmed_rtl
  78 {
  79   struct rtx_def reg;
  80   struct rtx_def plus;
  81   struct rtx_def neg;
  82   struct rtx_def mult;
  83   struct rtx_def sdiv;
  84   struct rtx_def udiv;
  85   struct rtx_def sdiv_32;
  86   struct rtx_def smod_32;
  87   struct rtx_def wide_mult;
  88   struct rtx_def wide_lshr;
  89   struct rtx_def wide_trunc;
  90   struct rtx_def shift;
  91   struct rtx_def shift_mult;
  92   struct rtx_def shift_add;
  93   struct rtx_def shift_sub0;
  94   struct rtx_def shift_sub1;
  95   struct rtx_def zext;
  96   struct rtx_def trunc;
  97
  98   rtx pow2[MAX_BITS_PER_WORD];
  99   rtx cint[MAX_BITS_PER_WORD];
 100 };
 101
 102 static void
 103 init_expmed_one_conv (struct init_expmed_rtl *all, enum machine_mode to_mode,
 104                       enum machine_mode from_mode, bool speed)
 105 {
 106   int to_size, from_size;
 107   rtx which;
 108
 109   /* We're given no information about the true size of a partial integer,
 110      only the size of the "full" integer it requires for storage.  For
 111      comparison purposes here, reduce the bit size by one in that case.  */
 112   to_size = (GET_MODE_BITSIZE (to_mode)
 113              - (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT));
 114   from_size = (GET_MODE_BITSIZE (from_mode)
 115                - (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT));
 116
 117   /* Assume cost of zero-extend and sign-extend is the same.  */
 118   which = (to_size < from_size ? &all->trunc : &all->zext);
 119
 120   PUT_MODE (&all->reg, from_mode);
 121   set_convert_cost (to_mode, from_mode, speed, set_src_cost (which, speed));
 122 }
 123
 124 static void
 125 init_expmed_one_mode (struct init_expmed_rtl *all,
 126                       enum machine_mode mode, int speed)
 127 {
 128   int m, n, mode_bitsize;
 129   enum machine_mode mode_from;
 130
 131   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 132
 133   PUT_MODE (&all->reg, mode);
 134   PUT_MODE (&all->plus, mode);
 135   PUT_MODE (&all->neg, mode);
 136   PUT_MODE (&all->mult, mode);
 137   PUT_MODE (&all->sdiv, mode);
 138   PUT_MODE (&all->udiv, mode);
 139   PUT_MODE (&all->sdiv_32, mode);
 140   PUT_MODE (&all->smod_32, mode);
 141   PUT_MODE (&all->wide_trunc, mode);
 142   PUT_MODE (&all->shift, mode);
 143   PUT_MODE (&all->shift_mult, mode);
 144   PUT_MODE (&all->shift_add, mode);
 145   PUT_MODE (&all->shift_sub0, mode);
 146   PUT_MODE (&all->shift_sub1, mode);
 147   PUT_MODE (&all->zext, mode);
 148   PUT_MODE (&all->trunc, mode);
 149
 150   set_add_cost (speed, mode, set_src_cost (&all->plus, speed));
 151   set_neg_cost (speed, mode, set_src_cost (&all->neg, speed));
 152   set_mul_cost (speed, mode, set_src_cost (&all->mult, speed));
 153   set_sdiv_cost (speed, mode, set_src_cost (&all->sdiv, speed));
 154   set_udiv_cost (speed, mode, set_src_cost (&all->udiv, speed));
 155
 156   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (&all->sdiv_32, speed)
 157                                      <= 2 * add_cost (speed, mode)));
 158   set_smod_pow2_cheap (speed, mode, (set_src_cost (&all->smod_32, speed)
 159                                      <= 4 * add_cost (speed, mode)));
 160
 161   set_shift_cost (speed, mode, 0, 0);
 162   {
 163     int cost = add_cost (speed, mode);
 164     set_shiftadd_cost (speed, mode, 0, cost);
 165     set_shiftsub0_cost (speed, mode, 0, cost);
 166     set_shiftsub1_cost (speed, mode, 0, cost);
 167   }
 168
 169   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 170   for (m = 1; m < n; m++)
 171     {
 172       XEXP (&all->shift, 1) = all->cint[m];
 173       XEXP (&all->shift_mult, 1) = all->pow2[m];
 174
 175       set_shift_cost (speed, mode, m, set_src_cost (&all->shift, speed));
 176       set_shiftadd_cost (speed, mode, m, set_src_cost (&all->shift_add, speed));
 177       set_shiftsub0_cost (speed, mode, m, set_src_cost (&all->shift_sub0, speed));
 178       set_shiftsub1_cost (speed, mode, m, set_src_cost (&all->shift_sub1, speed));
 179     }
 180
 181   if (SCALAR_INT_MODE_P (mode))
 182     {
 183       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 184            mode_from = (enum machine_mode)(mode_from + 1))
 185         init_expmed_one_conv (all, mode, mode_from, speed);
 186     }
 187   if (GET_MODE_CLASS (mode) == MODE_INT)
 188     {
 189       enum machine_mode  wider_mode = GET_MODE_WIDER_MODE (mode);
 190       if (wider_mode != VOIDmode)
 191         {
 192           PUT_MODE (&all->zext, wider_mode);
 193           PUT_MODE (&all->wide_mult, wider_mode);
 194           PUT_MODE (&all->wide_lshr, wider_mode);
 195           XEXP (&all->wide_lshr, 1) = GEN_INT (mode_bitsize);
 196
 197           set_mul_widen_cost (speed, wider_mode,
 198                               set_src_cost (&all->wide_mult, speed));
 199           set_mul_highpart_cost (speed, mode,
 200                                  set_src_cost (&all->wide_trunc, speed));
 201         }
 202     }
 203 }
 204
 205 void
 206 init_expmed (void)
 207 {
 208   struct init_expmed_rtl all;
 209   enum machine_mode mode;
 210   int m, speed;
 211
 212   memset (&all, 0, sizeof all);
 213   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 214     {
 215       all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 216       all.cint[m] = GEN_INT (m);
 217     }
 218
 219   PUT_CODE (&all.reg, REG);
 220   /* Avoid using hard regs in ways which may be unsupported.  */
 221   SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
 222
 223   PUT_CODE (&all.plus, PLUS);
 224   XEXP (&all.plus, 0) = &all.reg;
 225   XEXP (&all.plus, 1) = &all.reg;
 226
 227   PUT_CODE (&all.neg, NEG);
 228   XEXP (&all.neg, 0) = &all.reg;
 229
 230   PUT_CODE (&all.mult, MULT);
 231   XEXP (&all.mult, 0) = &all.reg;
 232   XEXP (&all.mult, 1) = &all.reg;
 233
 234   PUT_CODE (&all.sdiv, DIV);
 235   XEXP (&all.sdiv, 0) = &all.reg;
 236   XEXP (&all.sdiv, 1) = &all.reg;
 237
 238   PUT_CODE (&all.udiv, UDIV);
 239   XEXP (&all.udiv, 0) = &all.reg;
 240   XEXP (&all.udiv, 1) = &all.reg;
 241
 242   PUT_CODE (&all.sdiv_32, DIV);
 243   XEXP (&all.sdiv_32, 0) = &all.reg;
 244   XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? all.cint[32] : GEN_INT (32);
 245
 246   PUT_CODE (&all.smod_32, MOD);
 247   XEXP (&all.smod_32, 0) = &all.reg;
 248   XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
 249
 250   PUT_CODE (&all.zext, ZERO_EXTEND);
 251   XEXP (&all.zext, 0) = &all.reg;
 252
 253   PUT_CODE (&all.wide_mult, MULT);
 254   XEXP (&all.wide_mult, 0) = &all.zext;
 255   XEXP (&all.wide_mult, 1) = &all.zext;
 256
 257   PUT_CODE (&all.wide_lshr, LSHIFTRT);
 258   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
 259
 260   PUT_CODE (&all.wide_trunc, TRUNCATE);
 261   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
 262
 263   PUT_CODE (&all.shift, ASHIFT);
 264   XEXP (&all.shift, 0) = &all.reg;
 265
 266   PUT_CODE (&all.shift_mult, MULT);
 267   XEXP (&all.shift_mult, 0) = &all.reg;
 268
 269   PUT_CODE (&all.shift_add, PLUS);
 270   XEXP (&all.shift_add, 0) = &all.shift_mult;
 271   XEXP (&all.shift_add, 1) = &all.reg;
 272
 273   PUT_CODE (&all.shift_sub0, MINUS);
 274   XEXP (&all.shift_sub0, 0) = &all.shift_mult;
 275   XEXP (&all.shift_sub0, 1) = &all.reg;
 276
 277   PUT_CODE (&all.shift_sub1, MINUS);
 278   XEXP (&all.shift_sub1, 0) = &all.reg;
 279   XEXP (&all.shift_sub1, 1) = &all.shift_mult;
 280
 281   PUT_CODE (&all.trunc, TRUNCATE);
 282   XEXP (&all.trunc, 0) = &all.reg;
 283
 284   for (speed = 0; speed < 2; speed++)
 285     {
 286       crtl->maybe_hot_insn_p = speed;
 287       set_zero_cost (speed, set_src_cost (const0_rtx, speed));
 288
 289       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 290            mode = (enum machine_mode)(mode + 1))
 291         init_expmed_one_mode (&all, mode, speed);
 292
 293       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 294         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 295              mode = (enum machine_mode)(mode + 1))
 296           init_expmed_one_mode (&all, mode, speed);
 297
 298       if (MIN_MODE_VECTOR_INT != VOIDmode)
 299         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 300              mode = (enum machine_mode)(mode + 1))
 301           init_expmed_one_mode (&all, mode, speed);
 302     }
 303
 304   if (alg_hash_used_p ())
 305     {
 306       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 307       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 308     }
 309   else
 310     set_alg_hash_used_p (true);
 311   default_rtl_profile ();
 312 }
 313
 314 /* Return an rtx representing minus the value of X.
 315    MODE is the intended mode of the result,
 316    useful if X is a CONST_INT.  */
 317
 318 rtx
 319 negate_rtx (enum machine_mode mode, rtx x)
 320 {
 321   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 322
 323   if (result == 0)
 324     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 325
 326   return result;
 327 }
 328
 329 /* Adjust bitfield memory MEM so that it points to the first unit of mode
 330    MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
 331    If MODE is BLKmode, return a reference to every byte in the bitfield.
 332    Set *NEW_BITNUM to the bit position of the field within the new memory.  */
 333
 334 static rtx
 335 narrow_bit_field_mem (rtx mem, enum machine_mode mode,
 336                       unsigned HOST_WIDE_INT bitsize,
 337                       unsigned HOST_WIDE_INT bitnum,
 338                       unsigned HOST_WIDE_INT *new_bitnum)
 339 {
 340   if (mode == BLKmode)
 341     {
 342       *new_bitnum = bitnum % BITS_PER_UNIT;
 343       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 344       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 345                             / BITS_PER_UNIT);
 346       return adjust_bitfield_address_size (mem, mode, offset, size);
 347     }
 348   else
 349     {
 350       unsigned int unit = GET_MODE_BITSIZE (mode);
 351       *new_bitnum = bitnum % unit;
 352       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 353       return adjust_bitfield_address (mem, mode, offset);
 354     }
 355 }
 356
 357 /* The caller wants to perform insertion or extraction PATTERN on a
 358    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 359    BITREGION_START and BITREGION_END are as for store_bit_field
 360    and FIELDMODE is the natural mode of the field.
 361
 362    Search for a mode that is compatible with the memory access
 363    restrictions and (where applicable) with a register insertion or
 364    extraction.  Return the new memory on success, storing the adjusted
 365    bit position in *NEW_BITNUM.  Return null otherwise.  */
 366
 367 static rtx
 368 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 369                               rtx op0, HOST_WIDE_INT bitsize,
 370                               HOST_WIDE_INT bitnum,
 371                               unsigned HOST_WIDE_INT bitregion_start,
 372                               unsigned HOST_WIDE_INT bitregion_end,
 373                               enum machine_mode fieldmode,
 374                               unsigned HOST_WIDE_INT *new_bitnum)
 375 {
 376   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 377                                 bitregion_end, MEM_ALIGN (op0),
 378                                 MEM_VOLATILE_P (op0));
 379   enum machine_mode best_mode;
 380   if (iter.next_mode (&best_mode))
 381     {
 382       /* We can use a memory in BEST_MODE.  See whether this is true for
 383          any wider modes.  All other things being equal, we prefer to
 384          use the widest mode possible because it tends to expose more
 385          CSE opportunities.  */
 386       if (!iter.prefer_smaller_modes ())
 387         {
 388           /* Limit the search to the mode required by the corresponding
 389              register insertion or extraction instruction, if any.  */
 390           enum machine_mode limit_mode = word_mode;
 391           extraction_insn insn;
 392           if (get_best_reg_extraction_insn (&insn, pattern,
 393                                             GET_MODE_BITSIZE (best_mode),
 394                                             fieldmode))
 395             limit_mode = insn.field_mode;
 396
 397           enum machine_mode wider_mode;
 398           while (iter.next_mode (&wider_mode)
 399                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 400             best_mode = wider_mode;
 401         }
 402       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 403                                    new_bitnum);
 404     }
 405   return NULL_RTX;
 406 }
 407
 408 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 409    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 410    offset is then BITNUM / BITS_PER_UNIT.  */
 411
 412 static bool
 413 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
 414                      unsigned HOST_WIDE_INT bitsize,
 415                      enum machine_mode struct_mode)
 416 {
 417   if (BYTES_BIG_ENDIAN)
 418     return (bitnum % BITS_PER_UNIT == 0
 419             && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
 420                 || (bitnum + bitsize) % BITS_PER_WORD == 0));
 421   else
 422     return bitnum % BITS_PER_WORD == 0;
 423 }
 424
 425 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
 426    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
 427    Return false if the access would touch memory outside the range
 428    BITREGION_START to BITREGION_END for conformance to the C++ memory
 429    model.  */
 430
 431 static bool
 432 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 433                             unsigned HOST_WIDE_INT bitnum,
 434                             enum machine_mode fieldmode,
 435                             unsigned HOST_WIDE_INT bitregion_start,
 436                             unsigned HOST_WIDE_INT bitregion_end)
 437 {
 438   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
 439
 440   /* -fstrict-volatile-bitfields must be enabled and we must have a
 441      volatile MEM.  */
 442   if (!MEM_P (op0)
 443       || !MEM_VOLATILE_P (op0)
 444       || flag_strict_volatile_bitfields <= 0)
 445     return false;
 446
 447   /* Non-integral modes likely only happen with packed structures.
 448      Punt.  */
 449   if (!SCALAR_INT_MODE_P (fieldmode))
 450     return false;
 451
 452   /* The bit size must not be larger than the field mode, and
 453      the field mode must not be larger than a word.  */
 454   if (bitsize > modesize || modesize > BITS_PER_WORD)
 455     return false;
 456
 457   /* Check for cases of unaligned fields that must be split.  */
 458   if (bitnum % BITS_PER_UNIT + bitsize > modesize
 459       || (STRICT_ALIGNMENT
 460           && bitnum % GET_MODE_ALIGNMENT (fieldmode) + bitsize > modesize))
 461     return false;
 462
 463   /* Check for cases where the C++ memory model applies.  */
 464   if (bitregion_end != 0
 465       && (bitnum - bitnum % modesize < bitregion_start
 466           || bitnum - bitnum % modesize + modesize - 1 > bitregion_end))
 467     return false;
 468
 469   return true;
 470 }
 471
 472 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 473    bit number BITNUM can be treated as a simple value of mode MODE.  */
 474
 475 static bool
 476 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 477                        unsigned HOST_WIDE_INT bitnum, enum machine_mode mode)
 478 {
 479   return (MEM_P (op0)
 480           && bitnum % BITS_PER_UNIT == 0
 481           && bitsize == GET_MODE_BITSIZE (mode)
 482           && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
 483               || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
 484                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 485 }
 486 \f
 487 /* Try to use instruction INSV to store VALUE into a field of OP0.
 488    BITSIZE and BITNUM are as for store_bit_field.  */
 489
 490 static bool
 491 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 492                             unsigned HOST_WIDE_INT bitsize,
 493                             unsigned HOST_WIDE_INT bitnum,
 494                             rtx value)
 495 {
 496   struct expand_operand ops[4];
 497   rtx value1;
 498   rtx xop0 = op0;
 499   rtx last = get_last_insn ();
 500   bool copy_back = false;
 501
 502   enum machine_mode op_mode = insv->field_mode;
 503   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 504   if (bitsize == 0 || bitsize > unit)
 505     return false;
 506
 507   if (MEM_P (xop0))
 508     /* Get a reference to the first byte of the field.  */
 509     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 510                                  &bitnum);
 511   else
 512     {
 513       /* Convert from counting within OP0 to counting in OP_MODE.  */
 514       if (BYTES_BIG_ENDIAN)
 515         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 516
 517       /* If xop0 is a register, we need it in OP_MODE
 518          to make it acceptable to the format of insv.  */
 519       if (GET_CODE (xop0) == SUBREG)
 520         /* We can't just change the mode, because this might clobber op0,
 521            and we will need the original value of op0 if insv fails.  */
 522         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 523       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 524         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 525     }
 526
 527   /* If the destination is a paradoxical subreg such that we need a
 528      truncate to the inner mode, perform the insertion on a temporary and
 529      truncate the result to the original destination.  Note that we can't
 530      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 531      X) 0)) is (reg:N X).  */
 532   if (GET_CODE (xop0) == SUBREG
 533       && REG_P (SUBREG_REG (xop0))
 534       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 535                                          op_mode))
 536     {
 537       rtx tem = gen_reg_rtx (op_mode);
 538       emit_move_insn (tem, xop0);
 539       xop0 = tem;
 540       copy_back = true;
 541     }
 542
 543   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 544      "backwards" from the size of the unit we are inserting into.
 545      Otherwise, we count bits from the most significant on a
 546      BYTES/BITS_BIG_ENDIAN machine.  */
 547
 548   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 549     bitnum = unit - bitsize - bitnum;
 550
 551   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 552   value1 = value;
 553   if (GET_MODE (value) != op_mode)
 554     {
 555       if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 556         {
 557           /* Optimization: Don't bother really extending VALUE
 558              if it has all the bits we will actually use.  However,
 559              if we must narrow it, be sure we do it correctly.  */
 560
 561           if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 562             {
 563               rtx tmp;
 564
 565               tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 566               if (! tmp)
 567                 tmp = simplify_gen_subreg (op_mode,
 568                                            force_reg (GET_MODE (value),
 569                                                       value1),
 570                                            GET_MODE (value), 0);
 571               value1 = tmp;
 572             }
 573           else
 574             value1 = gen_lowpart (op_mode, value1);
 575         }
 576       else if (CONST_INT_P (value))
 577         value1 = gen_int_mode (INTVAL (value), op_mode);
 578       else
 579         /* Parse phase is supposed to make VALUE's data type
 580            match that of the component reference, which is a type
 581            at least as wide as the field; so VALUE should have
 582            a mode that corresponds to that type.  */
 583         gcc_assert (CONSTANT_P (value));
 584     }
 585
 586   create_fixed_operand (&ops[0], xop0);
 587   create_integer_operand (&ops[1], bitsize);
 588   create_integer_operand (&ops[2], bitnum);
 589   create_input_operand (&ops[3], value1, op_mode);
 590   if (maybe_expand_insn (insv->icode, 4, ops))
 591     {
 592       if (copy_back)
 593         convert_move (op0, xop0, true);
 594       return true;
 595     }
 596   delete_insns_since (last);
 597   return false;
 598 }
 599
 600 /* A subroutine of store_bit_field, with the same arguments.  Return true
 601    if the operation could be implemented.
 602
 603    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 604    no other way of implementing the operation.  If FALLBACK_P is false,
 605    return false instead.  */
 606
 607 static bool
 608 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 609                    unsigned HOST_WIDE_INT bitnum,
 610                    unsigned HOST_WIDE_INT bitregion_start,
 611                    unsigned HOST_WIDE_INT bitregion_end,
 612                    enum machine_mode fieldmode,
 613                    rtx value, bool fallback_p)
 614 {
 615   rtx op0 = str_rtx;
 616   rtx orig_value;
 617
 618   while (GET_CODE (op0) == SUBREG)
 619     {
 620       /* The following line once was done only if WORDS_BIG_ENDIAN,
 621          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 622          meaningful at a much higher level; when structures are copied
 623          between memory and regs, the higher-numbered regs
 624          always get higher addresses.  */
 625       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 626       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 627       int byte_offset = 0;
 628
 629       /* Paradoxical subregs need special handling on big endian machines.  */
 630       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 631         {
 632           int difference = inner_mode_size - outer_mode_size;
 633
 634           if (WORDS_BIG_ENDIAN)
 635             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 636           if (BYTES_BIG_ENDIAN)
 637             byte_offset += difference % UNITS_PER_WORD;
 638         }
 639       else
 640         byte_offset = SUBREG_BYTE (op0);
 641
 642       bitnum += byte_offset * BITS_PER_UNIT;
 643       op0 = SUBREG_REG (op0);
 644     }
 645
 646   /* No action is needed if the target is a register and if the field
 647      lies completely outside that register.  This can occur if the source
 648      code contains an out-of-bounds access to a small array.  */
 649   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 650     return true;
 651
 652   /* Use vec_set patterns for inserting parts of vectors whenever
 653      available.  */
 654   if (VECTOR_MODE_P (GET_MODE (op0))
 655       && !MEM_P (op0)
 656       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
 657       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 658       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 659       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 660     {
 661       struct expand_operand ops[3];
 662       enum machine_mode outermode = GET_MODE (op0);
 663       enum machine_mode innermode = GET_MODE_INNER (outermode);
 664       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 665       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 666
 667       create_fixed_operand (&ops[0], op0);
 668       create_input_operand (&ops[1], value, innermode);
 669       create_integer_operand (&ops[2], pos);
 670       if (maybe_expand_insn (icode, 3, ops))
 671         return true;
 672     }
 673
 674   /* If the target is a register, overwriting the entire object, or storing
 675      a full-word or multi-word field can be done with just a SUBREG.  */
 676   if (!MEM_P (op0)
 677       && bitsize == GET_MODE_BITSIZE (fieldmode)
 678       && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
 679           || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
 680     {
 681       /* Use the subreg machinery either to narrow OP0 to the required
 682          words or to cope with mode punning between equal-sized modes.  */
 683       rtx sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 684                                      bitnum / BITS_PER_UNIT);
 685       if (sub)
 686         {
 687           emit_move_insn (sub, value);
 688           return true;
 689         }
 690     }
 691
 692   /* If the target is memory, storing any naturally aligned field can be
 693      done with a simple store.  For targets that support fast unaligned
 694      memory, any naturally sized, unit aligned field can be done directly.  */
 695   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
 696     {
 697       op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
 698       emit_move_insn (op0, value);
 699       return true;
 700     }
 701
 702   /* Make sure we are playing with integral modes.  Pun with subregs
 703      if we aren't.  This must come after the entire register case above,
 704      since that case is valid for any mode.  The following cases are only
 705      valid for integral modes.  */
 706   {
 707     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 708     if (imode != GET_MODE (op0))
 709       {
 710         if (MEM_P (op0))
 711           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
 712         else
 713           {
 714             gcc_assert (imode != BLKmode);
 715             op0 = gen_lowpart (imode, op0);
 716           }
 717       }
 718   }
 719
 720   /* Storing an lsb-aligned field in a register
 721      can be done with a movstrict instruction.  */
 722
 723   if (!MEM_P (op0)
 724       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
 725       && bitsize == GET_MODE_BITSIZE (fieldmode)
 726       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 727     {
 728       struct expand_operand ops[2];
 729       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 730       rtx arg0 = op0;
 731       unsigned HOST_WIDE_INT subreg_off;
 732
 733       if (GET_CODE (arg0) == SUBREG)
 734         {
 735           /* Else we've got some float mode source being extracted into
 736              a different float mode destination -- this combination of
 737              subregs results in Severe Tire Damage.  */
 738           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 739                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 740                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 741           arg0 = SUBREG_REG (arg0);
 742         }
 743
 744       subreg_off = bitnum / BITS_PER_UNIT;
 745       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 746         {
 747           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 748
 749           create_fixed_operand (&ops[0], arg0);
 750           /* Shrink the source operand to FIELDMODE.  */
 751           create_convert_operand_to (&ops[1], value, fieldmode, false);
 752           if (maybe_expand_insn (icode, 2, ops))
 753             return true;
 754         }
 755     }
 756
 757   /* Handle fields bigger than a word.  */
 758
 759   if (bitsize > BITS_PER_WORD)
 760     {
 761       /* Here we transfer the words of the field
 762          in the order least significant first.
 763          This is because the most significant word is the one which may
 764          be less than full.
 765          However, only do that if the value is not BLKmode.  */
 766
 767       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 768       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 769       unsigned int i;
 770       rtx last;
 771
 772       /* This is the mode we must force value to, so that there will be enough
 773          subwords to extract.  Note that fieldmode will often (always?) be
 774          VOIDmode, because that is what store_field uses to indicate that this
 775          is a bit field, but passing VOIDmode to operand_subword_force
 776          is not allowed.  */
 777       fieldmode = GET_MODE (value);
 778       if (fieldmode == VOIDmode)
 779         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 780
 781       last = get_last_insn ();
 782       for (i = 0; i < nwords; i++)
 783         {
 784           /* If I is 0, use the low-order word in both field and target;
 785              if I is 1, use the next to lowest word; and so on.  */
 786           unsigned int wordnum = (backwards
 787                                   ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
 788                                   - i - 1
 789                                   : i);
 790           unsigned int bit_offset = (backwards
 791                                      ? MAX ((int) bitsize - ((int) i + 1)
 792                                             * BITS_PER_WORD,
 793                                             0)
 794                                      : (int) i * BITS_PER_WORD);
 795           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 796           unsigned HOST_WIDE_INT new_bitsize =
 797             MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 798
 799           /* If the remaining chunk doesn't have full wordsize we have
 800              to make sure that for big endian machines the higher order
 801              bits are used.  */
 802           if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
 803             value_word = simplify_expand_binop (word_mode, lshr_optab,
 804                                                 value_word,
 805                                                 GEN_INT (BITS_PER_WORD
 806                                                          - new_bitsize),
 807                                                 NULL_RTX, true,
 808                                                 OPTAB_LIB_WIDEN);
 809
 810           if (!store_bit_field_1 (op0, new_bitsize,
 811                                   bitnum + bit_offset,
 812                                   bitregion_start, bitregion_end,
 813                                   word_mode,
 814                                   value_word, fallback_p))
 815             {
 816               delete_insns_since (last);
 817               return false;
 818             }
 819         }
 820       return true;
 821     }
 822
 823   /* If VALUE has a floating-point or complex mode, access it as an
 824      integer of the corresponding size.  This can occur on a machine
 825      with 64 bit registers that uses SFmode for float.  It can also
 826      occur for unaligned float or complex fields.  */
 827   orig_value = value;
 828   if (GET_MODE (value) != VOIDmode
 829       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 830       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 831     {
 832       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 833       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 834     }
 835
 836   /* If OP0 is a multi-word register, narrow it to the affected word.
 837      If the region spans two words, defer to store_split_bit_field.  */
 838   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 839     {
 840       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
 841                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
 842       gcc_assert (op0);
 843       bitnum %= BITS_PER_WORD;
 844       if (bitnum + bitsize > BITS_PER_WORD)
 845         {
 846           if (!fallback_p)
 847             return false;
 848
 849           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
 850                                  bitregion_end, value);
 851           return true;
 852         }
 853     }
 854
 855   /* From here on we can assume that the field to be stored in fits
 856      within a word.  If the destination is a register, it too fits
 857      in a word.  */
 858
 859   extraction_insn insv;
 860   if (!MEM_P (op0)
 861       && get_best_reg_extraction_insn (&insv, EP_insv,
 862                                        GET_MODE_BITSIZE (GET_MODE (op0)),
 863                                        fieldmode)
 864       && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 865     return true;
 866
 867   /* If OP0 is a memory, try copying it to a register and seeing if a
 868      cheap register alternative is available.  */
 869   if (MEM_P (op0))
 870     {
 871       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
 872                                         fieldmode)
 873           && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 874         return true;
 875
 876       rtx last = get_last_insn ();
 877
 878       /* Try loading part of OP0 into a register, inserting the bitfield
 879          into that, and then copying the result back to OP0.  */
 880       unsigned HOST_WIDE_INT bitpos;
 881       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
 882                                                bitregion_start, bitregion_end,
 883                                                fieldmode, &bitpos);
 884       if (xop0)
 885         {
 886           rtx tempreg = copy_to_reg (xop0);
 887           if (store_bit_field_1 (tempreg, bitsize, bitpos,
 888                                  bitregion_start, bitregion_end,
 889                                  fieldmode, orig_value, false))
 890             {
 891               emit_move_insn (xop0, tempreg);
 892               return true;
 893             }
 894           delete_insns_since (last);
 895         }
 896     }
 897
 898   if (!fallback_p)
 899     return false;
 900
 901   store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
 902                          bitregion_end, value);
 903   return true;
 904 }
 905
 906 /* Generate code to store value from rtx VALUE
 907    into a bit-field within structure STR_RTX
 908    containing BITSIZE bits starting at bit BITNUM.
 909
 910    BITREGION_START is bitpos of the first bitfield in this region.
 911    BITREGION_END is the bitpos of the ending bitfield in this region.
 912    These two fields are 0, if the C++ memory model does not apply,
 913    or we are not interested in keeping track of bitfield regions.
 914
 915    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.  */
 916
 917 void
 918 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 919                  unsigned HOST_WIDE_INT bitnum,
 920                  unsigned HOST_WIDE_INT bitregion_start,
 921                  unsigned HOST_WIDE_INT bitregion_end,
 922                  enum machine_mode fieldmode,
 923                  rtx value)
 924 {
 925   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
 926   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode,
 927                                   bitregion_start, bitregion_end))
 928     {
 929       /* Storing any naturally aligned field can be done with a simple
 930          store.  For targets that support fast unaligned memory, any
 931          naturally sized, unit aligned field can be done directly.  */
 932       if (simple_mem_bitfield_p (str_rtx, bitsize, bitnum, fieldmode))
 933         {
 934           str_rtx = adjust_bitfield_address (str_rtx, fieldmode,
 935                                              bitnum / BITS_PER_UNIT);
 936           emit_move_insn (str_rtx, value);
 937         }
 938       else
 939         {
 940           str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum,
 941                                           &bitnum);
 942           /* Explicitly override the C/C++ memory model; ignore the
 943              bit range so that we can do the access in the mode mandated
 944              by -fstrict-volatile-bitfields instead.  */
 945           store_fixed_bit_field_1 (str_rtx, bitsize, bitnum, value);
 946         }
 947
 948       return;
 949     }
 950
 951   /* Under the C++0x memory model, we must not touch bits outside the
 952      bit region.  Adjust the address to start at the beginning of the
 953      bit region.  */
 954   if (MEM_P (str_rtx) && bitregion_start > 0)
 955     {
 956       enum machine_mode bestmode;
 957       HOST_WIDE_INT offset, size;
 958
 959       gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
 960
 961       offset = bitregion_start / BITS_PER_UNIT;
 962       bitnum -= bitregion_start;
 963       size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
 964       bitregion_end -= bitregion_start;
 965       bitregion_start = 0;
 966       bestmode = get_best_mode (bitsize, bitnum,
 967                                 bitregion_start, bitregion_end,
 968                                 MEM_ALIGN (str_rtx), VOIDmode,
 969                                 MEM_VOLATILE_P (str_rtx));
 970       str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
 971     }
 972
 973   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
 974                           bitregion_start, bitregion_end,
 975                           fieldmode, value, true))
 976     gcc_unreachable ();
 977 }
 978 \f
 979 /* Use shifts and boolean operations to store VALUE into a bit field of
 980    width BITSIZE in OP0, starting at bit BITNUM.  */
 981
 982 static void
 983 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
 984                        unsigned HOST_WIDE_INT bitnum,
 985                        unsigned HOST_WIDE_INT bitregion_start,
 986                        unsigned HOST_WIDE_INT bitregion_end,
 987                        rtx value)
 988 {
 989   /* There is a case not handled here:
 990      a structure with a known alignment of just a halfword
 991      and a field split across two aligned halfwords within the structure.
 992      Or likewise a structure with a known alignment of just a byte
 993      and a field split across two bytes.
 994      Such cases are not supposed to be able to occur.  */
 995
 996   if (MEM_P (op0))
 997     {
 998       enum machine_mode mode = GET_MODE (op0);
 999       if (GET_MODE_BITSIZE (mode) == 0
1000           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
1001         mode = word_mode;
1002       mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1003                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
1004
1005       if (mode == VOIDmode)
1006         {
1007           /* The only way this should occur is if the field spans word
1008              boundaries.  */
1009           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
1010                                  bitregion_end, value);
1011           return;
1012         }
1013
1014       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1015     }
1016
1017   store_fixed_bit_field_1 (op0, bitsize, bitnum, value);
1018 }
1019
1020 /* Helper function for store_fixed_bit_field, stores
1021    the bit field always using the MODE of OP0.  */
1022
1023 static void
1024 store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize,
1025                          unsigned HOST_WIDE_INT bitnum,
1026                          rtx value)
1027 {
1028   enum machine_mode mode;
1029   rtx temp;
1030   int all_zero = 0;
1031   int all_one = 0;
1032
1033   mode = GET_MODE (op0);
1034   gcc_assert (SCALAR_INT_MODE_P (mode));
1035
1036   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1037      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1038
1039   if (BYTES_BIG_ENDIAN)
1040     /* BITNUM is the distance between our msb
1041        and that of the containing datum.
1042        Convert it to the distance from the lsb.  */
1043     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1044
1045   /* Now BITNUM is always the distance between our lsb
1046      and that of OP0.  */
1047
1048   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1049      we must first convert its mode to MODE.  */
1050
1051   if (CONST_INT_P (value))
1052     {
1053       HOST_WIDE_INT v = INTVAL (value);
1054
1055       if (bitsize < HOST_BITS_PER_WIDE_INT)
1056         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
1057
1058       if (v == 0)
1059         all_zero = 1;
1060       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1061                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
1062                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
1063         all_one = 1;
1064
1065       value = lshift_value (mode, v, bitnum);
1066     }
1067   else
1068     {
1069       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1070                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1071
1072       if (GET_MODE (value) != mode)
1073         value = convert_to_mode (mode, value, 1);
1074
1075       if (must_and)
1076         value = expand_binop (mode, and_optab, value,
1077                               mask_rtx (mode, 0, bitsize, 0),
1078                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1079       if (bitnum > 0)
1080         value = expand_shift (LSHIFT_EXPR, mode, value,
1081                               bitnum, NULL_RTX, 1);
1082     }
1083
1084   /* Now clear the chosen bits in OP0,
1085      except that if VALUE is -1 we need not bother.  */
1086   /* We keep the intermediates in registers to allow CSE to combine
1087      consecutive bitfield assignments.  */
1088
1089   temp = force_reg (mode, op0);
1090
1091   if (! all_one)
1092     {
1093       temp = expand_binop (mode, and_optab, temp,
1094                            mask_rtx (mode, bitnum, bitsize, 1),
1095                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1096       temp = force_reg (mode, temp);
1097     }
1098
1099   /* Now logical-or VALUE into OP0, unless it is zero.  */
1100
1101   if (! all_zero)
1102     {
1103       temp = expand_binop (mode, ior_optab, temp, value,
1104                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1105       temp = force_reg (mode, temp);
1106     }
1107
1108   if (op0 != temp)
1109     {
1110       op0 = copy_rtx (op0);
1111       emit_move_insn (op0, temp);
1112     }
1113 }
1114 \f
1115 /* Store a bit field that is split across multiple accessible memory objects.
1116
1117    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1118    BITSIZE is the field width; BITPOS the position of its first bit
1119    (within the word).
1120    VALUE is the value to store.
1121
1122    This does not yet handle fields wider than BITS_PER_WORD.  */
1123
1124 static void
1125 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1126                        unsigned HOST_WIDE_INT bitpos,
1127                        unsigned HOST_WIDE_INT bitregion_start,
1128                        unsigned HOST_WIDE_INT bitregion_end,
1129                        rtx value)
1130 {
1131   unsigned int unit;
1132   unsigned int bitsdone = 0;
1133
1134   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1135      much at a time.  */
1136   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1137     unit = BITS_PER_WORD;
1138   else
1139     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1140
1141   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1142      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1143      again, and we will mutually recurse forever.  */
1144   if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1145     unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0)));
1146
1147   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1148      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1149      that VALUE might be a floating-point constant.  */
1150   if (CONSTANT_P (value) && !CONST_INT_P (value))
1151     {
1152       rtx word = gen_lowpart_common (word_mode, value);
1153
1154       if (word && (value != word))
1155         value = word;
1156       else
1157         value = gen_lowpart_common (word_mode,
1158                                     force_reg (GET_MODE (value) != VOIDmode
1159                                                ? GET_MODE (value)
1160                                                : word_mode, value));
1161     }
1162
1163   while (bitsdone < bitsize)
1164     {
1165       unsigned HOST_WIDE_INT thissize;
1166       rtx part, word;
1167       unsigned HOST_WIDE_INT thispos;
1168       unsigned HOST_WIDE_INT offset;
1169
1170       offset = (bitpos + bitsdone) / unit;
1171       thispos = (bitpos + bitsdone) % unit;
1172
1173       /* When region of bytes we can touch is restricted, decrease
1174          UNIT close to the end of the region as needed.  If op0 is a REG
1175          or SUBREG of REG, don't do this, as there can't be data races
1176          on a register and we can expand shorter code in some cases.  */
1177       if (bitregion_end
1178           && unit > BITS_PER_UNIT
1179           && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1180           && !REG_P (op0)
1181           && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1182         {
1183           unit = unit / 2;
1184           continue;
1185         }
1186
1187       /* THISSIZE must not overrun a word boundary.  Otherwise,
1188          store_fixed_bit_field will call us again, and we will mutually
1189          recurse forever.  */
1190       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1191       thissize = MIN (thissize, unit - thispos);
1192
1193       if (BYTES_BIG_ENDIAN)
1194         {
1195           /* Fetch successively less significant portions.  */
1196           if (CONST_INT_P (value))
1197             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1198                              >> (bitsize - bitsdone - thissize))
1199                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1200           else
1201             {
1202               int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1203               /* The args are chosen so that the last part includes the
1204                  lsb.  Give extract_bit_field the value it needs (with
1205                  endianness compensation) to fetch the piece we want.  */
1206               part = extract_fixed_bit_field (word_mode, value, thissize,
1207                                               total_bits - bitsize + bitsdone,
1208                                               NULL_RTX, 1);
1209             }
1210         }
1211       else
1212         {
1213           /* Fetch successively more significant portions.  */
1214           if (CONST_INT_P (value))
1215             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1216                              >> bitsdone)
1217                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1218           else
1219             part = extract_fixed_bit_field (word_mode, value, thissize,
1220                                             bitsdone, NULL_RTX, 1);
1221         }
1222
1223       /* If OP0 is a register, then handle OFFSET here.
1224
1225          When handling multiword bitfields, extract_bit_field may pass
1226          down a word_mode SUBREG of a larger REG for a bitfield that actually
1227          crosses a word boundary.  Thus, for a SUBREG, we must find
1228          the current word starting from the base register.  */
1229       if (GET_CODE (op0) == SUBREG)
1230         {
1231           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD)
1232                             + (offset * unit / BITS_PER_WORD);
1233           enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1234           if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1235             word = word_offset ? const0_rtx : op0;
1236           else
1237             word = operand_subword_force (SUBREG_REG (op0), word_offset,
1238                                           GET_MODE (SUBREG_REG (op0)));
1239           offset &= BITS_PER_WORD / unit - 1;
1240         }
1241       else if (REG_P (op0))
1242         {
1243           enum machine_mode op0_mode = GET_MODE (op0);
1244           if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1245             word = offset ? const0_rtx : op0;
1246           else
1247             word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1248                                           GET_MODE (op0));
1249           offset &= BITS_PER_WORD / unit - 1;
1250         }
1251       else
1252         word = op0;
1253
1254       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1255          it is just an out-of-bounds access.  Ignore it.  */
1256       if (word != const0_rtx)
1257         store_fixed_bit_field (word, thissize, offset * unit + thispos,
1258                                bitregion_start, bitregion_end, part);
1259       bitsdone += thissize;
1260     }
1261 }
1262 \f
1263 /* A subroutine of extract_bit_field_1 that converts return value X
1264    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1265    to extract_bit_field.  */
1266
1267 static rtx
1268 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1269                              enum machine_mode tmode, bool unsignedp)
1270 {
1271   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1272     return x;
1273
1274   /* If the x mode is not a scalar integral, first convert to the
1275      integer mode of that size and then access it as a floating-point
1276      value via a SUBREG.  */
1277   if (!SCALAR_INT_MODE_P (tmode))
1278     {
1279       enum machine_mode smode;
1280
1281       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1282       x = convert_to_mode (smode, x, unsignedp);
1283       x = force_reg (smode, x);
1284       return gen_lowpart (tmode, x);
1285     }
1286
1287   return convert_to_mode (tmode, x, unsignedp);
1288 }
1289
1290 /* Try to use an ext(z)v pattern to extract a field from OP0.
1291    Return the extracted value on success, otherwise return null.
1292    EXT_MODE is the mode of the extraction and the other arguments
1293    are as for extract_bit_field.  */
1294
1295 static rtx
1296 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1297                               unsigned HOST_WIDE_INT bitsize,
1298                               unsigned HOST_WIDE_INT bitnum,
1299                               int unsignedp, rtx target,
1300                               enum machine_mode mode, enum machine_mode tmode)
1301 {
1302   struct expand_operand ops[4];
1303   rtx spec_target = target;
1304   rtx spec_target_subreg = 0;
1305   enum machine_mode ext_mode = extv->field_mode;
1306   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1307
1308   if (bitsize == 0 || unit < bitsize)
1309     return NULL_RTX;
1310
1311   if (MEM_P (op0))
1312     /* Get a reference to the first byte of the field.  */
1313     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1314                                 &bitnum);
1315   else
1316     {
1317       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1318       if (BYTES_BIG_ENDIAN)
1319         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1320
1321       /* If op0 is a register, we need it in EXT_MODE to make it
1322          acceptable to the format of ext(z)v.  */
1323       if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1324         return NULL_RTX;
1325       if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1326         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1327     }
1328
1329   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1330      "backwards" from the size of the unit we are extracting from.
1331      Otherwise, we count bits from the most significant on a
1332      BYTES/BITS_BIG_ENDIAN machine.  */
1333
1334   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1335     bitnum = unit - bitsize - bitnum;
1336
1337   if (target == 0)
1338     target = spec_target = gen_reg_rtx (tmode);
1339
1340   if (GET_MODE (target) != ext_mode)
1341     {
1342       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1343          between the mode of the extraction (word_mode) and the target
1344          mode.  Instead, create a temporary and use convert_move to set
1345          the target.  */
1346       if (REG_P (target)
1347           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1348         {
1349           target = gen_lowpart (ext_mode, target);
1350           if (GET_MODE_PRECISION (ext_mode)
1351               > GET_MODE_PRECISION (GET_MODE (spec_target)))
1352             spec_target_subreg = target;
1353         }
1354       else
1355         target = gen_reg_rtx (ext_mode);
1356     }
1357
1358   create_output_operand (&ops[0], target, ext_mode);
1359   create_fixed_operand (&ops[1], op0);
1360   create_integer_operand (&ops[2], bitsize);
1361   create_integer_operand (&ops[3], bitnum);
1362   if (maybe_expand_insn (extv->icode, 4, ops))
1363     {
1364       target = ops[0].value;
1365       if (target == spec_target)
1366         return target;
1367       if (target == spec_target_subreg)
1368         return spec_target;
1369       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1370     }
1371   return NULL_RTX;
1372 }
1373
1374 /* A subroutine of extract_bit_field, with the same arguments.
1375    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1376    if we can find no other means of implementing the operation.
1377    if FALLBACK_P is false, return NULL instead.  */
1378
1379 static rtx
1380 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1381                      unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1382                      enum machine_mode mode, enum machine_mode tmode,
1383                      bool fallback_p)
1384 {
1385   rtx op0 = str_rtx;
1386   enum machine_mode int_mode;
1387   enum machine_mode mode1;
1388
1389   if (tmode == VOIDmode)
1390     tmode = mode;
1391
1392   while (GET_CODE (op0) == SUBREG)
1393     {
1394       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1395       op0 = SUBREG_REG (op0);
1396     }
1397
1398   /* If we have an out-of-bounds access to a register, just return an
1399      uninitialized register of the required mode.  This can occur if the
1400      source code contains an out-of-bounds access to a small array.  */
1401   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1402     return gen_reg_rtx (tmode);
1403
1404   if (REG_P (op0)
1405       && mode == GET_MODE (op0)
1406       && bitnum == 0
1407       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1408     {
1409       /* We're trying to extract a full register from itself.  */
1410       return op0;
1411     }
1412
1413   /* See if we can get a better vector mode before extracting.  */
1414   if (VECTOR_MODE_P (GET_MODE (op0))
1415       && !MEM_P (op0)
1416       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1417     {
1418       enum machine_mode new_mode;
1419
1420       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1421         new_mode = MIN_MODE_VECTOR_FLOAT;
1422       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1423         new_mode = MIN_MODE_VECTOR_FRACT;
1424       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1425         new_mode = MIN_MODE_VECTOR_UFRACT;
1426       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1427         new_mode = MIN_MODE_VECTOR_ACCUM;
1428       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1429         new_mode = MIN_MODE_VECTOR_UACCUM;
1430       else
1431         new_mode = MIN_MODE_VECTOR_INT;
1432
1433       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1434         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1435             && targetm.vector_mode_supported_p (new_mode))
1436           break;
1437       if (new_mode != VOIDmode)
1438         op0 = gen_lowpart (new_mode, op0);
1439     }
1440
1441   /* Use vec_extract patterns for extracting parts of vectors whenever
1442      available.  */
1443   if (VECTOR_MODE_P (GET_MODE (op0))
1444       && !MEM_P (op0)
1445       && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1446       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1447           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1448     {
1449       struct expand_operand ops[3];
1450       enum machine_mode outermode = GET_MODE (op0);
1451       enum machine_mode innermode = GET_MODE_INNER (outermode);
1452       enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1453       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1454
1455       create_output_operand (&ops[0], target, innermode);
1456       create_input_operand (&ops[1], op0, outermode);
1457       create_integer_operand (&ops[2], pos);
1458       if (maybe_expand_insn (icode, 3, ops))
1459         {
1460           target = ops[0].value;
1461           if (GET_MODE (target) != mode)
1462             return gen_lowpart (tmode, target);
1463           return target;
1464         }
1465     }
1466
1467   /* Make sure we are playing with integral modes.  Pun with subregs
1468      if we aren't.  */
1469   {
1470     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1471     if (imode != GET_MODE (op0))
1472       {
1473         if (MEM_P (op0))
1474           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1475         else if (imode != BLKmode)
1476           {
1477             op0 = gen_lowpart (imode, op0);
1478
1479             /* If we got a SUBREG, force it into a register since we
1480                aren't going to be able to do another SUBREG on it.  */
1481             if (GET_CODE (op0) == SUBREG)
1482               op0 = force_reg (imode, op0);
1483           }
1484         else if (REG_P (op0))
1485           {
1486             rtx reg, subreg;
1487             imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1488                                             MODE_INT);
1489             reg = gen_reg_rtx (imode);
1490             subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1491             emit_move_insn (subreg, op0);
1492             op0 = reg;
1493             bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1494           }
1495         else
1496           {
1497             HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1498             rtx mem = assign_stack_temp (GET_MODE (op0), size);
1499             emit_move_insn (mem, op0);
1500             op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1501           }
1502       }
1503   }
1504
1505   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1506      If that's wrong, the solution is to test for it and set TARGET to 0
1507      if needed.  */
1508
1509   /* Get the mode of the field to use for atomic access or subreg
1510      conversion.  */
1511   mode1 = mode;
1512   if (SCALAR_INT_MODE_P (tmode))
1513     {
1514       enum machine_mode try_mode = mode_for_size (bitsize,
1515                                                   GET_MODE_CLASS (tmode), 0);
1516       if (try_mode != BLKmode)
1517         mode1 = try_mode;
1518     }
1519   gcc_assert (mode1 != BLKmode);
1520
1521   /* Extraction of a full MODE1 value can be done with a subreg as long
1522      as the least significant bit of the value is the least significant
1523      bit of either OP0 or a word of OP0.  */
1524   if (!MEM_P (op0)
1525       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1526       && bitsize == GET_MODE_BITSIZE (mode1)
1527       && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1528     {
1529       rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1530                                      bitnum / BITS_PER_UNIT);
1531       if (sub)
1532         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1533     }
1534
1535   /* Extraction of a full MODE1 value can be done with a load as long as
1536      the field is on a byte boundary and is sufficiently aligned.  */
1537   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1538     {
1539       op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1540       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1541     }
1542
1543   /* Handle fields bigger than a word.  */
1544
1545   if (bitsize > BITS_PER_WORD)
1546     {
1547       /* Here we transfer the words of the field
1548          in the order least significant first.
1549          This is because the most significant word is the one which may
1550          be less than full.  */
1551
1552       unsigned int backwards = WORDS_BIG_ENDIAN;
1553       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1554       unsigned int i;
1555       rtx last;
1556
1557       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1558         target = gen_reg_rtx (mode);
1559
1560       /* Indicate for flow that the entire target reg is being set.  */
1561       emit_clobber (target);
1562
1563       last = get_last_insn ();
1564       for (i = 0; i < nwords; i++)
1565         {
1566           /* If I is 0, use the low-order word in both field and target;
1567              if I is 1, use the next to lowest word; and so on.  */
1568           /* Word number in TARGET to use.  */
1569           unsigned int wordnum
1570             = (backwards
1571                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1572                : i);
1573           /* Offset from start of field in OP0.  */
1574           unsigned int bit_offset = (backwards
1575                                      ? MAX ((int) bitsize - ((int) i + 1)
1576                                             * BITS_PER_WORD,
1577                                             0)
1578                                      : (int) i * BITS_PER_WORD);
1579           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1580           rtx result_part
1581             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1582                                              bitsize - i * BITS_PER_WORD),
1583                                    bitnum + bit_offset, 1, target_part,
1584                                    mode, word_mode, fallback_p);
1585
1586           gcc_assert (target_part);
1587           if (!result_part)
1588             {
1589               delete_insns_since (last);
1590               return NULL;
1591             }
1592
1593           if (result_part != target_part)
1594             emit_move_insn (target_part, result_part);
1595         }
1596
1597       if (unsignedp)
1598         {
1599           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1600              need to be zero'd out.  */
1601           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1602             {
1603               unsigned int i, total_words;
1604
1605               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1606               for (i = nwords; i < total_words; i++)
1607                 emit_move_insn
1608                   (operand_subword (target,
1609                                     backwards ? total_words - i - 1 : i,
1610                                     1, VOIDmode),
1611                    const0_rtx);
1612             }
1613           return target;
1614         }
1615
1616       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1617       target = expand_shift (LSHIFT_EXPR, mode, target,
1618                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1619       return expand_shift (RSHIFT_EXPR, mode, target,
1620                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1621     }
1622
1623   /* If OP0 is a multi-word register, narrow it to the affected word.
1624      If the region spans two words, defer to extract_split_bit_field.  */
1625   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1626     {
1627       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1628                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1629       bitnum %= BITS_PER_WORD;
1630       if (bitnum + bitsize > BITS_PER_WORD)
1631         {
1632           if (!fallback_p)
1633             return NULL_RTX;
1634           target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1635           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1636         }
1637     }
1638
1639   /* From here on we know the desired field is smaller than a word.
1640      If OP0 is a register, it too fits within a word.  */
1641   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1642   extraction_insn extv;
1643   if (!MEM_P (op0)
1644       /* ??? We could limit the structure size to the part of OP0 that
1645          contains the field, with appropriate checks for endianness
1646          and TRULY_NOOP_TRUNCATION.  */
1647       && get_best_reg_extraction_insn (&extv, pattern,
1648                                        GET_MODE_BITSIZE (GET_MODE (op0)),
1649                                        tmode))
1650     {
1651       rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1652                                                  unsignedp, target, mode,
1653                                                  tmode);
1654       if (result)
1655         return result;
1656     }
1657
1658   /* If OP0 is a memory, try copying it to a register and seeing if a
1659      cheap register alternative is available.  */
1660   if (MEM_P (op0))
1661     {
1662       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1663                                         tmode))
1664         {
1665           rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1666                                                      bitnum, unsignedp,
1667                                                      target, mode,
1668                                                      tmode);
1669           if (result)
1670             return result;
1671         }
1672
1673       rtx last = get_last_insn ();
1674
1675       /* Try loading part of OP0 into a register and extracting the
1676          bitfield from that.  */
1677       unsigned HOST_WIDE_INT bitpos;
1678       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1679                                                0, 0, tmode, &bitpos);
1680       if (xop0)
1681         {
1682           xop0 = copy_to_reg (xop0);
1683           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1684                                             unsignedp, target,
1685                                             mode, tmode, false);
1686           if (result)
1687             return result;
1688           delete_insns_since (last);
1689         }
1690     }
1691
1692   if (!fallback_p)
1693     return NULL;
1694
1695   /* Find a correspondingly-sized integer field, so we can apply
1696      shifts and masks to it.  */
1697   int_mode = int_mode_for_mode (tmode);
1698   if (int_mode == BLKmode)
1699     int_mode = int_mode_for_mode (mode);
1700   /* Should probably push op0 out to memory and then do a load.  */
1701   gcc_assert (int_mode != BLKmode);
1702
1703   target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum,
1704                                     target, unsignedp);
1705   return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1706 }
1707
1708 /* Generate code to extract a byte-field from STR_RTX
1709    containing BITSIZE bits, starting at BITNUM,
1710    and put it in TARGET if possible (if TARGET is nonzero).
1711    Regardless of TARGET, we return the rtx for where the value is placed.
1712
1713    STR_RTX is the structure containing the byte (a REG or MEM).
1714    UNSIGNEDP is nonzero if this is an unsigned bit field.
1715    MODE is the natural mode of the field value once extracted.
1716    TMODE is the mode the caller would like the value to have;
1717    but the value may be returned with type MODE instead.
1718
1719    If a TARGET is specified and we can store in it at no extra cost,
1720    we do so, and return TARGET.
1721    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1722    if they are equally easy.  */
1723
1724 rtx
1725 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1726                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1727                    enum machine_mode mode, enum machine_mode tmode)
1728 {
1729   enum machine_mode mode1;
1730
1731   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1732   if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1733     mode1 = GET_MODE (str_rtx);
1734   else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1735     mode1 = GET_MODE (target);
1736   else
1737     mode1 = tmode;
1738
1739   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0))
1740     {
1741       rtx result;
1742
1743       /* Extraction of a full MODE1 value can be done with a load as long as
1744          the field is on a byte boundary and is sufficiently aligned.  */
1745       if (simple_mem_bitfield_p (str_rtx, bitsize, bitnum, mode1))
1746         result = adjust_bitfield_address (str_rtx, mode1,
1747                                           bitnum / BITS_PER_UNIT);
1748       else
1749         {
1750           str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum,
1751                                           &bitnum);
1752           result = extract_fixed_bit_field_1 (mode, str_rtx, bitsize, bitnum,
1753                                               target, unsignedp);
1754         }
1755
1756       return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1757     }
1758
1759   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1760                               target, mode, tmode, true);
1761 }
1762 \f
1763 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1764    from bit BITNUM of OP0.
1765
1766    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1767    If TARGET is nonzero, attempts to store the value there
1768    and return TARGET, but this is not guaranteed.
1769    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1770
1771 static rtx
1772 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1773                          unsigned HOST_WIDE_INT bitsize,
1774                          unsigned HOST_WIDE_INT bitnum, rtx target,
1775                          int unsignedp)
1776 {
1777   if (MEM_P (op0))
1778     {
1779       enum machine_mode mode
1780         = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode,
1781                          MEM_VOLATILE_P (op0));
1782
1783       if (mode == VOIDmode)
1784         /* The only way this should occur is if the field spans word
1785            boundaries.  */
1786         return extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1787
1788       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1789     }
1790
1791   return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum,
1792                                     target, unsignedp);
1793 }
1794
1795 /* Helper function for extract_fixed_bit_field, extracts
1796    the bit field always using the MODE of OP0.  */
1797
1798 static rtx
1799 extract_fixed_bit_field_1 (enum machine_mode tmode, rtx op0,
1800                            unsigned HOST_WIDE_INT bitsize,
1801                            unsigned HOST_WIDE_INT bitnum, rtx target,
1802                            int unsignedp)
1803 {
1804   enum machine_mode mode = GET_MODE (op0);
1805   gcc_assert (SCALAR_INT_MODE_P (mode));
1806
1807   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1808      for invalid input, such as extract equivalent of f5 from
1809      gcc.dg/pr48335-2.c.  */
1810
1811   if (BYTES_BIG_ENDIAN)
1812     /* BITNUM is the distance between our msb and that of OP0.
1813        Convert it to the distance from the lsb.  */
1814     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1815
1816   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1817      We have reduced the big-endian case to the little-endian case.  */
1818
1819   if (unsignedp)
1820     {
1821       if (bitnum)
1822         {
1823           /* If the field does not already start at the lsb,
1824              shift it so it does.  */
1825           /* Maybe propagate the target for the shift.  */
1826           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1827           if (tmode != mode)
1828             subtarget = 0;
1829           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
1830         }
1831       /* Convert the value to the desired mode.  */
1832       if (mode != tmode)
1833         op0 = convert_to_mode (tmode, op0, 1);
1834
1835       /* Unless the msb of the field used to be the msb when we shifted,
1836          mask out the upper bits.  */
1837
1838       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
1839         return expand_binop (GET_MODE (op0), and_optab, op0,
1840                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1841                              target, 1, OPTAB_LIB_WIDEN);
1842       return op0;
1843     }
1844
1845   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1846      then arithmetic-shift its lsb to the lsb of the word.  */
1847   op0 = force_reg (mode, op0);
1848
1849   /* Find the narrowest integer mode that contains the field.  */
1850
1851   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1852        mode = GET_MODE_WIDER_MODE (mode))
1853     if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
1854       {
1855         op0 = convert_to_mode (mode, op0, 0);
1856         break;
1857       }
1858
1859   if (mode != tmode)
1860     target = 0;
1861
1862   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
1863     {
1864       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
1865       /* Maybe propagate the target for the shift.  */
1866       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1867       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1868     }
1869
1870   return expand_shift (RSHIFT_EXPR, mode, op0,
1871                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1872 }
1873 \f
1874 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1875    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1876    complement of that if COMPLEMENT.  The mask is truncated if
1877    necessary to the width of mode MODE.  The mask is zero-extended if
1878    BITSIZE+BITPOS is too small for MODE.  */
1879
1880 static rtx
1881 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1882 {
1883   double_int mask;
1884
1885   mask = double_int::mask (bitsize);
1886   mask = mask.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);
1887
1888   if (complement)
1889     mask = ~mask;
1890
1891   return immed_double_int_const (mask, mode);
1892 }
1893
1894 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1895    VALUE << BITPOS.  */
1896
1897 static rtx
1898 lshift_value (enum machine_mode mode, unsigned HOST_WIDE_INT value,
1899               int bitpos)
1900 {
1901   double_int val;
1902
1903   val = double_int::from_uhwi (value);
1904   val = val.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);
1905
1906   return immed_double_int_const (val, mode);
1907 }
1908 \f
1909 /* Extract a bit field that is split across two words
1910    and return an RTX for the result.
1911
1912    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1913    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1914    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1915
1916 static rtx
1917 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1918                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1919 {
1920   unsigned int unit;
1921   unsigned int bitsdone = 0;
1922   rtx result = NULL_RTX;
1923   int first = 1;
1924
1925   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1926      much at a time.  */
1927   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1928     unit = BITS_PER_WORD;
1929   else
1930     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1931
1932   while (bitsdone < bitsize)
1933     {
1934       unsigned HOST_WIDE_INT thissize;
1935       rtx part, word;
1936       unsigned HOST_WIDE_INT thispos;
1937       unsigned HOST_WIDE_INT offset;
1938
1939       offset = (bitpos + bitsdone) / unit;
1940       thispos = (bitpos + bitsdone) % unit;
1941
1942       /* THISSIZE must not overrun a word boundary.  Otherwise,
1943          extract_fixed_bit_field will call us again, and we will mutually
1944          recurse forever.  */
1945       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1946       thissize = MIN (thissize, unit - thispos);
1947
1948       /* If OP0 is a register, then handle OFFSET here.
1949
1950          When handling multiword bitfields, extract_bit_field may pass
1951          down a word_mode SUBREG of a larger REG for a bitfield that actually
1952          crosses a word boundary.  Thus, for a SUBREG, we must find
1953          the current word starting from the base register.  */
1954       if (GET_CODE (op0) == SUBREG)
1955         {
1956           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1957           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1958                                         GET_MODE (SUBREG_REG (op0)));
1959           offset = 0;
1960         }
1961       else if (REG_P (op0))
1962         {
1963           word = operand_subword_force (op0, offset, GET_MODE (op0));
1964           offset = 0;
1965         }
1966       else
1967         word = op0;
1968
1969       /* Extract the parts in bit-counting order,
1970          whose meaning is determined by BYTES_PER_UNIT.
1971          OFFSET is in UNITs, and UNIT is in bits.  */
1972       part = extract_fixed_bit_field (word_mode, word, thissize,
1973                                       offset * unit + thispos, 0, 1);
1974       bitsdone += thissize;
1975
1976       /* Shift this part into place for the result.  */
1977       if (BYTES_BIG_ENDIAN)
1978         {
1979           if (bitsize != bitsdone)
1980             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1981                                  bitsize - bitsdone, 0, 1);
1982         }
1983       else
1984         {
1985           if (bitsdone != thissize)
1986             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1987                                  bitsdone - thissize, 0, 1);
1988         }
1989
1990       if (first)
1991         result = part;
1992       else
1993         /* Combine the parts with bitwise or.  This works
1994            because we extracted each part as an unsigned bit field.  */
1995         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1996                                OPTAB_LIB_WIDEN);
1997
1998       first = 0;
1999     }
2000
2001   /* Unsigned bit field: we are done.  */
2002   if (unsignedp)
2003     return result;
2004   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2005   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2006                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2007   return expand_shift (RSHIFT_EXPR, word_mode, result,
2008                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2009 }
2010 \f
2011 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2012    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2013    MODE, fill the upper bits with zeros.  Fail if the layout of either
2014    mode is unknown (as for CC modes) or if the extraction would involve
2015    unprofitable mode punning.  Return the value on success, otherwise
2016    return null.
2017
2018    This is different from gen_lowpart* in these respects:
2019
2020      - the returned value must always be considered an rvalue
2021
2022      - when MODE is wider than SRC_MODE, the extraction involves
2023        a zero extension
2024
2025      - when MODE is smaller than SRC_MODE, the extraction involves
2026        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2027
2028    In other words, this routine performs a computation, whereas the
2029    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2030    operations.  */
2031
2032 rtx
2033 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
2034 {
2035   enum machine_mode int_mode, src_int_mode;
2036
2037   if (mode == src_mode)
2038     return src;
2039
2040   if (CONSTANT_P (src))
2041     {
2042       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2043          fails, it will happily create (subreg (symbol_ref)) or similar
2044          invalid SUBREGs.  */
2045       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2046       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2047       if (ret)
2048         return ret;
2049
2050       if (GET_MODE (src) == VOIDmode
2051           || !validate_subreg (mode, src_mode, src, byte))
2052         return NULL_RTX;
2053
2054       src = force_reg (GET_MODE (src), src);
2055       return gen_rtx_SUBREG (mode, src, byte);
2056     }
2057
2058   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2059     return NULL_RTX;
2060
2061   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2062       && MODES_TIEABLE_P (mode, src_mode))
2063     {
2064       rtx x = gen_lowpart_common (mode, src);
2065       if (x)
2066         return x;
2067     }
2068
2069   src_int_mode = int_mode_for_mode (src_mode);
2070   int_mode = int_mode_for_mode (mode);
2071   if (src_int_mode == BLKmode || int_mode == BLKmode)
2072     return NULL_RTX;
2073
2074   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2075     return NULL_RTX;
2076   if (!MODES_TIEABLE_P (int_mode, mode))
2077     return NULL_RTX;
2078
2079   src = gen_lowpart (src_int_mode, src);
2080   src = convert_modes (int_mode, src_int_mode, src, true);
2081   src = gen_lowpart (mode, src);
2082   return src;
2083 }
2084 \f
2085 /* Add INC into TARGET.  */
2086
2087 void
2088 expand_inc (rtx target, rtx inc)
2089 {
2090   rtx value = expand_binop (GET_MODE (target), add_optab,
2091                             target, inc,
2092                             target, 0, OPTAB_LIB_WIDEN);
2093   if (value != target)
2094     emit_move_insn (target, value);
2095 }
2096
2097 /* Subtract DEC from TARGET.  */
2098
2099 void
2100 expand_dec (rtx target, rtx dec)
2101 {
2102   rtx value = expand_binop (GET_MODE (target), sub_optab,
2103                             target, dec,
2104                             target, 0, OPTAB_LIB_WIDEN);
2105   if (value != target)
2106     emit_move_insn (target, value);
2107 }
2108 \f
2109 /* Output a shift instruction for expression code CODE,
2110    with SHIFTED being the rtx for the value to shift,
2111    and AMOUNT the rtx for the amount to shift by.
2112    Store the result in the rtx TARGET, if that is convenient.
2113    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2114    Return the rtx for where the value is.  */
2115
2116 static rtx
2117 expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted,
2118                 rtx amount, rtx target, int unsignedp)
2119 {
2120   rtx op1, temp = 0;
2121   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2122   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2123   optab lshift_optab = ashl_optab;
2124   optab rshift_arith_optab = ashr_optab;
2125   optab rshift_uns_optab = lshr_optab;
2126   optab lrotate_optab = rotl_optab;
2127   optab rrotate_optab = rotr_optab;
2128   enum machine_mode op1_mode;
2129   enum machine_mode scalar_mode = mode;
2130   int attempt;
2131   bool speed = optimize_insn_for_speed_p ();
2132
2133   if (VECTOR_MODE_P (mode))
2134     scalar_mode = GET_MODE_INNER (mode);
2135   op1 = amount;
2136   op1_mode = GET_MODE (op1);
2137
2138   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2139      shift amount is a vector, use the vector/vector shift patterns.  */
2140   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2141     {
2142       lshift_optab = vashl_optab;
2143       rshift_arith_optab = vashr_optab;
2144       rshift_uns_optab = vlshr_optab;
2145       lrotate_optab = vrotl_optab;
2146       rrotate_optab = vrotr_optab;
2147     }
2148
2149   /* Previously detected shift-counts computed by NEGATE_EXPR
2150      and shifted in the other direction; but that does not work
2151      on all machines.  */
2152
2153   if (SHIFT_COUNT_TRUNCATED)
2154     {
2155       if (CONST_INT_P (op1)
2156           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2157               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2158         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2159                        % GET_MODE_BITSIZE (scalar_mode));
2160       else if (GET_CODE (op1) == SUBREG
2161                && subreg_lowpart_p (op1)
2162                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2163                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2164         op1 = SUBREG_REG (op1);
2165     }
2166
2167   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2168      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2169      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2170      amount instead.  */
2171   if (rotate
2172       && CONST_INT_P (op1)
2173       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2174                    GET_MODE_BITSIZE (scalar_mode) - 1))
2175     {
2176       op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2177       left = !left;
2178       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2179     }
2180
2181   if (op1 == const0_rtx)
2182     return shifted;
2183
2184   /* Check whether its cheaper to implement a left shift by a constant
2185      bit count by a sequence of additions.  */
2186   if (code == LSHIFT_EXPR
2187       && CONST_INT_P (op1)
2188       && INTVAL (op1) > 0
2189       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2190       && INTVAL (op1) < MAX_BITS_PER_WORD
2191       && (shift_cost (speed, mode, INTVAL (op1))
2192           > INTVAL (op1) * add_cost (speed, mode))
2193       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2194     {
2195       int i;
2196       for (i = 0; i < INTVAL (op1); i++)
2197         {
2198           temp = force_reg (mode, shifted);
2199           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2200                                   unsignedp, OPTAB_LIB_WIDEN);
2201         }
2202       return shifted;
2203     }
2204
2205   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2206     {
2207       enum optab_methods methods;
2208
2209       if (attempt == 0)
2210         methods = OPTAB_DIRECT;
2211       else if (attempt == 1)
2212         methods = OPTAB_WIDEN;
2213       else
2214         methods = OPTAB_LIB_WIDEN;
2215
2216       if (rotate)
2217         {
2218           /* Widening does not work for rotation.  */
2219           if (methods == OPTAB_WIDEN)
2220             continue;
2221           else if (methods == OPTAB_LIB_WIDEN)
2222             {
2223               /* If we have been unable to open-code this by a rotation,
2224                  do it as the IOR of two shifts.  I.e., to rotate A
2225                  by N bits, compute
2226                  (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2227                  where C is the bitsize of A.
2228
2229                  It is theoretically possible that the target machine might
2230                  not be able to perform either shift and hence we would
2231                  be making two libcalls rather than just the one for the
2232                  shift (similarly if IOR could not be done).  We will allow
2233                  this extremely unlikely lossage to avoid complicating the
2234                  code below.  */
2235
2236               rtx subtarget = target == shifted ? 0 : target;
2237               rtx new_amount, other_amount;
2238               rtx temp1;
2239
2240               new_amount = op1;
2241               if (op1 == const0_rtx)
2242                 return shifted;
2243               else if (CONST_INT_P (op1))
2244                 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
2245                                         - INTVAL (op1));
2246               else
2247                 {
2248                   other_amount
2249                     = simplify_gen_unary (NEG, GET_MODE (op1),
2250                                           op1, GET_MODE (op1));
2251                   HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2252                   other_amount
2253                     = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2254                                            gen_int_mode (mask, GET_MODE (op1)));
2255                 }
2256
2257               shifted = force_reg (mode, shifted);
2258
2259               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2260                                      mode, shifted, new_amount, 0, 1);
2261               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2262                                       mode, shifted, other_amount,
2263                                       subtarget, 1);
2264               return expand_binop (mode, ior_optab, temp, temp1, target,
2265                                    unsignedp, methods);
2266             }
2267
2268           temp = expand_binop (mode,
2269                                left ? lrotate_optab : rrotate_optab,
2270                                shifted, op1, target, unsignedp, methods);
2271         }
2272       else if (unsignedp)
2273         temp = expand_binop (mode,
2274                              left ? lshift_optab : rshift_uns_optab,
2275                              shifted, op1, target, unsignedp, methods);
2276
2277       /* Do arithmetic shifts.
2278          Also, if we are going to widen the operand, we can just as well
2279          use an arithmetic right-shift instead of a logical one.  */
2280       if (temp == 0 && ! rotate
2281           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2282         {
2283           enum optab_methods methods1 = methods;
2284
2285           /* If trying to widen a log shift to an arithmetic shift,
2286              don't accept an arithmetic shift of the same size.  */
2287           if (unsignedp)
2288             methods1 = OPTAB_MUST_WIDEN;
2289
2290           /* Arithmetic shift */
2291
2292           temp = expand_binop (mode,
2293                                left ? lshift_optab : rshift_arith_optab,
2294                                shifted, op1, target, unsignedp, methods1);
2295         }
2296
2297       /* We used to try extzv here for logical right shifts, but that was
2298          only useful for one machine, the VAX, and caused poor code
2299          generation there for lshrdi3, so the code was deleted and a
2300          define_expand for lshrsi3 was added to vax.md.  */
2301     }
2302
2303   gcc_assert (temp);
2304   return temp;
2305 }
2306
2307 /* Output a shift instruction for expression code CODE,
2308    with SHIFTED being the rtx for the value to shift,
2309    and AMOUNT the amount to shift by.
2310    Store the result in the rtx TARGET, if that is convenient.
2311    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2312    Return the rtx for where the value is.  */
2313
2314 rtx
2315 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2316               int amount, rtx target, int unsignedp)
2317 {
2318   return expand_shift_1 (code, mode,
2319                          shifted, GEN_INT (amount), target, unsignedp);
2320 }
2321
2322 /* Output a shift instruction for expression code CODE,
2323    with SHIFTED being the rtx for the value to shift,
2324    and AMOUNT the tree for the amount to shift by.
2325    Store the result in the rtx TARGET, if that is convenient.
2326    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2327    Return the rtx for where the value is.  */
2328
2329 rtx
2330 expand_variable_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2331                        tree amount, rtx target, int unsignedp)
2332 {
2333   return expand_shift_1 (code, mode,
2334                          shifted, expand_normal (amount), target, unsignedp);
2335 }
2336
2337 \f
2338 /* Indicates the type of fixup needed after a constant multiplication.
2339    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2340    the result should be negated, and ADD_VARIANT means that the
2341    multiplicand should be added to the result.  */
2342 enum mult_variant {basic_variant, negate_variant, add_variant};
2343
2344 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2345                         const struct mult_cost *, enum machine_mode mode);
2346 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2347                                  struct algorithm *, enum mult_variant *, int);
2348 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2349                               const struct algorithm *, enum mult_variant);
2350 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2351 static rtx extract_high_half (enum machine_mode, rtx);
2352 static rtx expmed_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2353 static rtx expmed_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2354                                        int, int);
2355 /* Compute and return the best algorithm for multiplying by T.
2356    The algorithm must cost less than cost_limit
2357    If retval.cost >= COST_LIMIT, no algorithm was found and all
2358    other field of the returned struct are undefined.
2359    MODE is the machine mode of the multiplication.  */
2360
2361 static void
2362 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2363             const struct mult_cost *cost_limit, enum machine_mode mode)
2364 {
2365   int m;
2366   struct algorithm *alg_in, *best_alg;
2367   struct mult_cost best_cost;
2368   struct mult_cost new_limit;
2369   int op_cost, op_latency;
2370   unsigned HOST_WIDE_INT orig_t = t;
2371   unsigned HOST_WIDE_INT q;
2372   int maxm, hash_index;
2373   bool cache_hit = false;
2374   enum alg_code cache_alg = alg_zero;
2375   bool speed = optimize_insn_for_speed_p ();
2376   enum machine_mode imode;
2377   struct alg_hash_entry *entry_ptr;
2378
2379   /* Indicate that no algorithm is yet found.  If no algorithm
2380      is found, this value will be returned and indicate failure.  */
2381   alg_out->cost.cost = cost_limit->cost + 1;
2382   alg_out->cost.latency = cost_limit->latency + 1;
2383
2384   if (cost_limit->cost < 0
2385       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2386     return;
2387
2388   /* Be prepared for vector modes.  */
2389   imode = GET_MODE_INNER (mode);
2390   if (imode == VOIDmode)
2391     imode = mode;
2392
2393   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2394
2395   /* Restrict the bits of "t" to the multiplication's mode.  */
2396   t &= GET_MODE_MASK (imode);
2397
2398   /* t == 1 can be done in zero cost.  */
2399   if (t == 1)
2400     {
2401       alg_out->ops = 1;
2402       alg_out->cost.cost = 0;
2403       alg_out->cost.latency = 0;
2404       alg_out->op[0] = alg_m;
2405       return;
2406     }
2407
2408   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2409      fail now.  */
2410   if (t == 0)
2411     {
2412       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2413         return;
2414       else
2415         {
2416           alg_out->ops = 1;
2417           alg_out->cost.cost = zero_cost (speed);
2418           alg_out->cost.latency = zero_cost (speed);
2419           alg_out->op[0] = alg_zero;
2420           return;
2421         }
2422     }
2423
2424   /* We'll be needing a couple extra algorithm structures now.  */
2425
2426   alg_in = XALLOCA (struct algorithm);
2427   best_alg = XALLOCA (struct algorithm);
2428   best_cost = *cost_limit;
2429
2430   /* Compute the hash index.  */
2431   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2432
2433   /* See if we already know what to do for T.  */
2434   entry_ptr = alg_hash_entry_ptr (hash_index);
2435   if (entry_ptr->t == t
2436       && entry_ptr->mode == mode
2437       && entry_ptr->mode == mode
2438       && entry_ptr->speed == speed
2439       && entry_ptr->alg != alg_unknown)
2440     {
2441       cache_alg = entry_ptr->alg;
2442
2443       if (cache_alg == alg_impossible)
2444         {
2445           /* The cache tells us that it's impossible to synthesize
2446              multiplication by T within entry_ptr->cost.  */
2447           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2448             /* COST_LIMIT is at least as restrictive as the one
2449                recorded in the hash table, in which case we have no
2450                hope of synthesizing a multiplication.  Just
2451                return.  */
2452             return;
2453
2454           /* If we get here, COST_LIMIT is less restrictive than the
2455              one recorded in the hash table, so we may be able to
2456              synthesize a multiplication.  Proceed as if we didn't
2457              have the cache entry.  */
2458         }
2459       else
2460         {
2461           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2462             /* The cached algorithm shows that this multiplication
2463                requires more cost than COST_LIMIT.  Just return.  This
2464                way, we don't clobber this cache entry with
2465                alg_impossible but retain useful information.  */
2466             return;
2467
2468           cache_hit = true;
2469
2470           switch (cache_alg)
2471             {
2472             case alg_shift:
2473               goto do_alg_shift;
2474
2475             case alg_add_t_m2:
2476             case alg_sub_t_m2:
2477               goto do_alg_addsub_t_m2;
2478
2479             case alg_add_factor:
2480             case alg_sub_factor:
2481               goto do_alg_addsub_factor;
2482
2483             case alg_add_t2_m:
2484               goto do_alg_add_t2_m;
2485
2486             case alg_sub_t2_m:
2487               goto do_alg_sub_t2_m;
2488
2489             default:
2490               gcc_unreachable ();
2491             }
2492         }
2493     }
2494
2495   /* If we have a group of zero bits at the low-order part of T, try
2496      multiplying by the remaining bits and then doing a shift.  */
2497
2498   if ((t & 1) == 0)
2499     {
2500     do_alg_shift:
2501       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2502       if (m < maxm)
2503         {
2504           q = t >> m;
2505           /* The function expand_shift will choose between a shift and
2506              a sequence of additions, so the observed cost is given as
2507              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2508           op_cost = m * add_cost (speed, mode);
2509           if (shift_cost (speed, mode, m) < op_cost)
2510             op_cost = shift_cost (speed, mode, m);
2511           new_limit.cost = best_cost.cost - op_cost;
2512           new_limit.latency = best_cost.latency - op_cost;
2513           synth_mult (alg_in, q, &new_limit, mode);
2514
2515           alg_in->cost.cost += op_cost;
2516           alg_in->cost.latency += op_cost;
2517           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2518             {
2519               struct algorithm *x;
2520               best_cost = alg_in->cost;
2521               x = alg_in, alg_in = best_alg, best_alg = x;
2522               best_alg->log[best_alg->ops] = m;
2523               best_alg->op[best_alg->ops] = alg_shift;
2524             }
2525
2526           /* See if treating ORIG_T as a signed number yields a better
2527              sequence.  Try this sequence only for a negative ORIG_T
2528              as it would be useless for a non-negative ORIG_T.  */
2529           if ((HOST_WIDE_INT) orig_t < 0)
2530             {
2531               /* Shift ORIG_T as follows because a right shift of a
2532                  negative-valued signed type is implementation
2533                  defined.  */
2534               q = ~(~orig_t >> m);
2535               /* The function expand_shift will choose between a shift
2536                  and a sequence of additions, so the observed cost is
2537                  given as MIN (m * add_cost(speed, mode),
2538                  shift_cost(speed, mode, m)).  */
2539               op_cost = m * add_cost (speed, mode);
2540               if (shift_cost (speed, mode, m) < op_cost)
2541                 op_cost = shift_cost (speed, mode, m);
2542               new_limit.cost = best_cost.cost - op_cost;
2543               new_limit.latency = best_cost.latency - op_cost;
2544               synth_mult (alg_in, q, &new_limit, mode);
2545
2546               alg_in->cost.cost += op_cost;
2547               alg_in->cost.latency += op_cost;
2548               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2549                 {
2550                   struct algorithm *x;
2551                   best_cost = alg_in->cost;
2552                   x = alg_in, alg_in = best_alg, best_alg = x;
2553                   best_alg->log[best_alg->ops] = m;
2554                   best_alg->op[best_alg->ops] = alg_shift;
2555                 }
2556             }
2557         }
2558       if (cache_hit)
2559         goto done;
2560     }
2561
2562   /* If we have an odd number, add or subtract one.  */
2563   if ((t & 1) != 0)
2564     {
2565       unsigned HOST_WIDE_INT w;
2566
2567     do_alg_addsub_t_m2:
2568       for (w = 1; (w & t) != 0; w <<= 1)
2569         ;
2570       /* If T was -1, then W will be zero after the loop.  This is another
2571          case where T ends with ...111.  Handling this with (T + 1) and
2572          subtract 1 produces slightly better code and results in algorithm
2573          selection much faster than treating it like the ...0111 case
2574          below.  */
2575       if (w == 0
2576           || (w > 2
2577               /* Reject the case where t is 3.
2578                  Thus we prefer addition in that case.  */
2579               && t != 3))
2580         {
2581           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2582
2583           op_cost = add_cost (speed, mode);
2584           new_limit.cost = best_cost.cost - op_cost;
2585           new_limit.latency = best_cost.latency - op_cost;
2586           synth_mult (alg_in, t + 1, &new_limit, mode);
2587
2588           alg_in->cost.cost += op_cost;
2589           alg_in->cost.latency += op_cost;
2590           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2591             {
2592               struct algorithm *x;
2593               best_cost = alg_in->cost;
2594               x = alg_in, alg_in = best_alg, best_alg = x;
2595               best_alg->log[best_alg->ops] = 0;
2596               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2597             }
2598         }
2599       else
2600         {
2601           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2602
2603           op_cost = add_cost (speed, mode);
2604           new_limit.cost = best_cost.cost - op_cost;
2605           new_limit.latency = best_cost.latency - op_cost;
2606           synth_mult (alg_in, t - 1, &new_limit, mode);
2607
2608           alg_in->cost.cost += op_cost;
2609           alg_in->cost.latency += op_cost;
2610           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2611             {
2612               struct algorithm *x;
2613               best_cost = alg_in->cost;
2614               x = alg_in, alg_in = best_alg, best_alg = x;
2615               best_alg->log[best_alg->ops] = 0;
2616               best_alg->op[best_alg->ops] = alg_add_t_m2;
2617             }
2618         }
2619
2620       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2621          quickly with a - a * n for some appropriate constant n.  */
2622       m = exact_log2 (-orig_t + 1);
2623       if (m >= 0 && m < maxm)
2624         {
2625           op_cost = shiftsub1_cost (speed, mode, m);
2626           new_limit.cost = best_cost.cost - op_cost;
2627           new_limit.latency = best_cost.latency - op_cost;
2628           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2629                       &new_limit, mode);
2630
2631           alg_in->cost.cost += op_cost;
2632           alg_in->cost.latency += op_cost;
2633           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2634             {
2635               struct algorithm *x;
2636               best_cost = alg_in->cost;
2637               x = alg_in, alg_in = best_alg, best_alg = x;
2638               best_alg->log[best_alg->ops] = m;
2639               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2640             }
2641         }
2642
2643       if (cache_hit)
2644         goto done;
2645     }
2646
2647   /* Look for factors of t of the form
2648      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2649      If we find such a factor, we can multiply by t using an algorithm that
2650      multiplies by q, shift the result by m and add/subtract it to itself.
2651
2652      We search for large factors first and loop down, even if large factors
2653      are less probable than small; if we find a large factor we will find a
2654      good sequence quickly, and therefore be able to prune (by decreasing
2655      COST_LIMIT) the search.  */
2656
2657  do_alg_addsub_factor:
2658   for (m = floor_log2 (t - 1); m >= 2; m--)
2659     {
2660       unsigned HOST_WIDE_INT d;
2661
2662       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2663       if (t % d == 0 && t > d && m < maxm
2664           && (!cache_hit || cache_alg == alg_add_factor))
2665         {
2666           /* If the target has a cheap shift-and-add instruction use
2667              that in preference to a shift insn followed by an add insn.
2668              Assume that the shift-and-add is "atomic" with a latency
2669              equal to its cost, otherwise assume that on superscalar
2670              hardware the shift may be executed concurrently with the
2671              earlier steps in the algorithm.  */
2672           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2673           if (shiftadd_cost (speed, mode, m) < op_cost)
2674             {
2675               op_cost = shiftadd_cost (speed, mode, m);
2676               op_latency = op_cost;
2677             }
2678           else
2679             op_latency = add_cost (speed, mode);
2680
2681           new_limit.cost = best_cost.cost - op_cost;
2682           new_limit.latency = best_cost.latency - op_latency;
2683           synth_mult (alg_in, t / d, &new_limit, mode);
2684
2685           alg_in->cost.cost += op_cost;
2686           alg_in->cost.latency += op_latency;
2687           if (alg_in->cost.latency < op_cost)
2688             alg_in->cost.latency = op_cost;
2689           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2690             {
2691               struct algorithm *x;
2692               best_cost = alg_in->cost;
2693               x = alg_in, alg_in = best_alg, best_alg = x;
2694               best_alg->log[best_alg->ops] = m;
2695               best_alg->op[best_alg->ops] = alg_add_factor;
2696             }
2697           /* Other factors will have been taken care of in the recursion.  */
2698           break;
2699         }
2700
2701       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2702       if (t % d == 0 && t > d && m < maxm
2703           && (!cache_hit || cache_alg == alg_sub_factor))
2704         {
2705           /* If the target has a cheap shift-and-subtract insn use
2706              that in preference to a shift insn followed by a sub insn.
2707              Assume that the shift-and-sub is "atomic" with a latency
2708              equal to it's cost, otherwise assume that on superscalar
2709              hardware the shift may be executed concurrently with the
2710              earlier steps in the algorithm.  */
2711           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2712           if (shiftsub0_cost (speed, mode, m) < op_cost)
2713             {
2714               op_cost = shiftsub0_cost (speed, mode, m);
2715               op_latency = op_cost;
2716             }
2717           else
2718             op_latency = add_cost (speed, mode);
2719
2720           new_limit.cost = best_cost.cost - op_cost;
2721           new_limit.latency = best_cost.latency - op_latency;
2722           synth_mult (alg_in, t / d, &new_limit, mode);
2723
2724           alg_in->cost.cost += op_cost;
2725           alg_in->cost.latency += op_latency;
2726           if (alg_in->cost.latency < op_cost)
2727             alg_in->cost.latency = op_cost;
2728           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2729             {
2730               struct algorithm *x;
2731               best_cost = alg_in->cost;
2732               x = alg_in, alg_in = best_alg, best_alg = x;
2733               best_alg->log[best_alg->ops] = m;
2734               best_alg->op[best_alg->ops] = alg_sub_factor;
2735             }
2736           break;
2737         }
2738     }
2739   if (cache_hit)
2740     goto done;
2741
2742   /* Try shift-and-add (load effective address) instructions,
2743      i.e. do a*3, a*5, a*9.  */
2744   if ((t & 1) != 0)
2745     {
2746     do_alg_add_t2_m:
2747       q = t - 1;
2748       q = q & -q;
2749       m = exact_log2 (q);
2750       if (m >= 0 && m < maxm)
2751         {
2752           op_cost = shiftadd_cost (speed, mode, m);
2753           new_limit.cost = best_cost.cost - op_cost;
2754           new_limit.latency = best_cost.latency - op_cost;
2755           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2756
2757           alg_in->cost.cost += op_cost;
2758           alg_in->cost.latency += op_cost;
2759           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2760             {
2761               struct algorithm *x;
2762               best_cost = alg_in->cost;
2763               x = alg_in, alg_in = best_alg, best_alg = x;
2764               best_alg->log[best_alg->ops] = m;
2765               best_alg->op[best_alg->ops] = alg_add_t2_m;
2766             }
2767         }
2768       if (cache_hit)
2769         goto done;
2770
2771     do_alg_sub_t2_m:
2772       q = t + 1;
2773       q = q & -q;
2774       m = exact_log2 (q);
2775       if (m >= 0 && m < maxm)
2776         {
2777           op_cost = shiftsub0_cost (speed, mode, m);
2778           new_limit.cost = best_cost.cost - op_cost;
2779           new_limit.latency = best_cost.latency - op_cost;
2780           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2781
2782           alg_in->cost.cost += op_cost;
2783           alg_in->cost.latency += op_cost;
2784           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2785             {
2786               struct algorithm *x;
2787               best_cost = alg_in->cost;
2788               x = alg_in, alg_in = best_alg, best_alg = x;
2789               best_alg->log[best_alg->ops] = m;
2790               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2791             }
2792         }
2793       if (cache_hit)
2794         goto done;
2795     }
2796
2797  done:
2798   /* If best_cost has not decreased, we have not found any algorithm.  */
2799   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2800     {
2801       /* We failed to find an algorithm.  Record alg_impossible for
2802          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2803          we are asked to find an algorithm for T within the same or
2804          lower COST_LIMIT, we can immediately return to the
2805          caller.  */
2806       entry_ptr->t = t;
2807       entry_ptr->mode = mode;
2808       entry_ptr->speed = speed;
2809       entry_ptr->alg = alg_impossible;
2810       entry_ptr->cost = *cost_limit;
2811       return;
2812     }
2813
2814   /* Cache the result.  */
2815   if (!cache_hit)
2816     {
2817       entry_ptr->t = t;
2818       entry_ptr->mode = mode;
2819       entry_ptr->speed = speed;
2820       entry_ptr->alg = best_alg->op[best_alg->ops];
2821       entry_ptr->cost.cost = best_cost.cost;
2822       entry_ptr->cost.latency = best_cost.latency;
2823     }
2824
2825   /* If we are getting a too long sequence for `struct algorithm'
2826      to record, make this search fail.  */
2827   if (best_alg->ops == MAX_BITS_PER_WORD)
2828     return;
2829
2830   /* Copy the algorithm from temporary space to the space at alg_out.
2831      We avoid using structure assignment because the majority of
2832      best_alg is normally undefined, and this is a critical function.  */
2833   alg_out->ops = best_alg->ops + 1;
2834   alg_out->cost = best_cost;
2835   memcpy (alg_out->op, best_alg->op,
2836           alg_out->ops * sizeof *alg_out->op);
2837   memcpy (alg_out->log, best_alg->log,
2838           alg_out->ops * sizeof *alg_out->log);
2839 }
2840 \f
2841 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2842    Try three variations:
2843
2844        - a shift/add sequence based on VAL itself
2845        - a shift/add sequence based on -VAL, followed by a negation
2846        - a shift/add sequence based on VAL - 1, followed by an addition.
2847
2848    Return true if the cheapest of these cost less than MULT_COST,
2849    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2850
2851 static bool
2852 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2853                      struct algorithm *alg, enum mult_variant *variant,
2854                      int mult_cost)
2855 {
2856   struct algorithm alg2;
2857   struct mult_cost limit;
2858   int op_cost;
2859   bool speed = optimize_insn_for_speed_p ();
2860
2861   /* Fail quickly for impossible bounds.  */
2862   if (mult_cost < 0)
2863     return false;
2864
2865   /* Ensure that mult_cost provides a reasonable upper bound.
2866      Any constant multiplication can be performed with less
2867      than 2 * bits additions.  */
2868   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
2869   if (mult_cost > op_cost)
2870     mult_cost = op_cost;
2871
2872   *variant = basic_variant;
2873   limit.cost = mult_cost;
2874   limit.latency = mult_cost;
2875   synth_mult (alg, val, &limit, mode);
2876
2877   /* This works only if the inverted value actually fits in an
2878      `unsigned int' */
2879   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
2880     {
2881       op_cost = neg_cost (speed, mode);
2882       if (MULT_COST_LESS (&alg->cost, mult_cost))
2883         {
2884           limit.cost = alg->cost.cost - op_cost;
2885           limit.latency = alg->cost.latency - op_cost;
2886         }
2887       else
2888         {
2889           limit.cost = mult_cost - op_cost;
2890           limit.latency = mult_cost - op_cost;
2891         }
2892
2893       synth_mult (&alg2, -val, &limit, mode);
2894       alg2.cost.cost += op_cost;
2895       alg2.cost.latency += op_cost;
2896       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2897         *alg = alg2, *variant = negate_variant;
2898     }
2899
2900   /* This proves very useful for division-by-constant.  */
2901   op_cost = add_cost (speed, mode);
2902   if (MULT_COST_LESS (&alg->cost, mult_cost))
2903     {
2904       limit.cost = alg->cost.cost - op_cost;
2905       limit.latency = alg->cost.latency - op_cost;
2906     }
2907   else
2908     {
2909       limit.cost = mult_cost - op_cost;
2910       limit.latency = mult_cost - op_cost;
2911     }
2912
2913   synth_mult (&alg2, val - 1, &limit, mode);
2914   alg2.cost.cost += op_cost;
2915   alg2.cost.latency += op_cost;
2916   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2917     *alg = alg2, *variant = add_variant;
2918
2919   return MULT_COST_LESS (&alg->cost, mult_cost);
2920 }
2921
2922 /* A subroutine of expand_mult, used for constant multiplications.
2923    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2924    convenient.  Use the shift/add sequence described by ALG and apply
2925    the final fixup specified by VARIANT.  */
2926
2927 static rtx
2928 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2929                    rtx target, const struct algorithm *alg,
2930                    enum mult_variant variant)
2931 {
2932   HOST_WIDE_INT val_so_far;
2933   rtx insn, accum, tem;
2934   int opno;
2935   enum machine_mode nmode;
2936
2937   /* Avoid referencing memory over and over and invalid sharing
2938      on SUBREGs.  */
2939   op0 = force_reg (mode, op0);
2940
2941   /* ACCUM starts out either as OP0 or as a zero, depending on
2942      the first operation.  */
2943
2944   if (alg->op[0] == alg_zero)
2945     {
2946       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
2947       val_so_far = 0;
2948     }
2949   else if (alg->op[0] == alg_m)
2950     {
2951       accum = copy_to_mode_reg (mode, op0);
2952       val_so_far = 1;
2953     }
2954   else
2955     gcc_unreachable ();
2956
2957   for (opno = 1; opno < alg->ops; opno++)
2958     {
2959       int log = alg->log[opno];
2960       rtx shift_subtarget = optimize ? 0 : accum;
2961       rtx add_target
2962         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2963            && !optimize)
2964           ? target : 0;
2965       rtx accum_target = optimize ? 0 : accum;
2966       rtx accum_inner;
2967
2968       switch (alg->op[opno])
2969         {
2970         case alg_shift:
2971           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2972           /* REG_EQUAL note will be attached to the following insn.  */
2973           emit_move_insn (accum, tem);
2974           val_so_far <<= log;
2975           break;
2976
2977         case alg_add_t_m2:
2978           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2979           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2980                                  add_target ? add_target : accum_target);
2981           val_so_far += (HOST_WIDE_INT) 1 << log;
2982           break;
2983
2984         case alg_sub_t_m2:
2985           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2986           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2987                                  add_target ? add_target : accum_target);
2988           val_so_far -= (HOST_WIDE_INT) 1 << log;
2989           break;
2990
2991         case alg_add_t2_m:
2992           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2993                                 log, shift_subtarget, 0);
2994           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2995                                  add_target ? add_target : accum_target);
2996           val_so_far = (val_so_far << log) + 1;
2997           break;
2998
2999         case alg_sub_t2_m:
3000           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3001                                 log, shift_subtarget, 0);
3002           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3003                                  add_target ? add_target : accum_target);
3004           val_so_far = (val_so_far << log) - 1;
3005           break;
3006
3007         case alg_add_factor:
3008           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3009           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3010                                  add_target ? add_target : accum_target);
3011           val_so_far += val_so_far << log;
3012           break;
3013
3014         case alg_sub_factor:
3015           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3016           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3017                                  (add_target
3018                                   ? add_target : (optimize ? 0 : tem)));
3019           val_so_far = (val_so_far << log) - val_so_far;
3020           break;
3021
3022         default:
3023           gcc_unreachable ();
3024         }
3025
3026       if (SCALAR_INT_MODE_P (mode))
3027         {
3028           /* Write a REG_EQUAL note on the last insn so that we can cse
3029              multiplication sequences.  Note that if ACCUM is a SUBREG,
3030              we've set the inner register and must properly indicate that.  */
3031           tem = op0, nmode = mode;
3032           accum_inner = accum;
3033           if (GET_CODE (accum) == SUBREG)
3034             {
3035               accum_inner = SUBREG_REG (accum);
3036               nmode = GET_MODE (accum_inner);
3037               tem = gen_lowpart (nmode, op0);
3038             }
3039
3040           insn = get_last_insn ();
3041           set_dst_reg_note (insn, REG_EQUAL,
3042                             gen_rtx_MULT (nmode, tem,
3043                                           gen_int_mode (val_so_far, nmode)),
3044                             accum_inner);
3045         }
3046     }
3047
3048   if (variant == negate_variant)
3049     {
3050       val_so_far = -val_so_far;
3051       accum = expand_unop (mode, neg_optab, accum, target, 0);
3052     }
3053   else if (variant == add_variant)
3054     {
3055       val_so_far = val_so_far + 1;
3056       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3057     }
3058
3059   /* Compare only the bits of val and val_so_far that are significant
3060      in the result mode, to avoid sign-/zero-extension confusion.  */
3061   nmode = GET_MODE_INNER (mode);
3062   if (nmode == VOIDmode)
3063     nmode = mode;
3064   val &= GET_MODE_MASK (nmode);
3065   val_so_far &= GET_MODE_MASK (nmode);
3066   gcc_assert (val == val_so_far);
3067
3068   return accum;
3069 }
3070
3071 /* Perform a multiplication and return an rtx for the result.
3072    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3073    TARGET is a suggestion for where to store the result (an rtx).
3074
3075    We check specially for a constant integer as OP1.
3076    If you want this check for OP0 as well, then before calling
3077    you should swap the two operands if OP0 would be constant.  */
3078
3079 rtx
3080 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3081              int unsignedp)
3082 {
3083   enum mult_variant variant;
3084   struct algorithm algorithm;
3085   rtx scalar_op1;
3086   int max_cost;
3087   bool speed = optimize_insn_for_speed_p ();
3088   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3089
3090   if (CONSTANT_P (op0))
3091     {
3092       rtx temp = op0;
3093       op0 = op1;
3094       op1 = temp;
3095     }
3096
3097   /* For vectors, there are several simplifications that can be made if
3098      all elements of the vector constant are identical.  */
3099   scalar_op1 = op1;
3100   if (GET_CODE (op1) == CONST_VECTOR)
3101     {
3102       int i, n = CONST_VECTOR_NUNITS (op1);
3103       scalar_op1 = CONST_VECTOR_ELT (op1, 0);
3104       for (i = 1; i < n; ++i)
3105         if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i)))
3106           goto skip_scalar;
3107     }
3108
3109   if (INTEGRAL_MODE_P (mode))
3110     {
3111       rtx fake_reg;
3112       HOST_WIDE_INT coeff;
3113       bool is_neg;
3114       int mode_bitsize;
3115
3116       if (op1 == CONST0_RTX (mode))
3117         return op1;
3118       if (op1 == CONST1_RTX (mode))
3119         return op0;
3120       if (op1 == CONSTM1_RTX (mode))
3121         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3122                             op0, target, 0);
3123
3124       if (do_trapv)
3125         goto skip_synth;
3126
3127       /* If mode is integer vector mode, check if the backend supports
3128          vector lshift (by scalar or vector) at all.  If not, we can't use
3129          synthetized multiply.  */
3130       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3131           && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3132           && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3133         goto skip_synth;
3134
3135       /* These are the operations that are potentially turned into
3136          a sequence of shifts and additions.  */
3137       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3138
3139       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3140          less than or equal in size to `unsigned int' this doesn't matter.
3141          If the mode is larger than `unsigned int', then synth_mult works
3142          only if the constant value exactly fits in an `unsigned int' without
3143          any truncation.  This means that multiplying by negative values does
3144          not work; results are off by 2^32 on a 32 bit machine.  */
3145
3146       if (CONST_INT_P (scalar_op1))
3147         {
3148           coeff = INTVAL (scalar_op1);
3149           is_neg = coeff < 0;
3150         }
3151       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3152         {
3153           /* If we are multiplying in DImode, it may still be a win
3154              to try to work with shifts and adds.  */
3155           if (CONST_DOUBLE_HIGH (scalar_op1) == 0
3156               && (CONST_DOUBLE_LOW (scalar_op1) > 0
3157                   || (CONST_DOUBLE_LOW (scalar_op1) < 0
3158                       && EXACT_POWER_OF_2_OR_ZERO_P
3159                            (CONST_DOUBLE_LOW (scalar_op1)))))
3160             {
3161               coeff = CONST_DOUBLE_LOW (scalar_op1);
3162               is_neg = false;
3163             }
3164           else if (CONST_DOUBLE_LOW (scalar_op1) == 0)
3165             {
3166               coeff = CONST_DOUBLE_HIGH (scalar_op1);
3167               if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3168                 {
3169                   int shift = floor_log2 (coeff) + HOST_BITS_PER_WIDE_INT;
3170                   if (shift < HOST_BITS_PER_DOUBLE_INT - 1
3171                       || mode_bitsize <= HOST_BITS_PER_DOUBLE_INT)
3172                     return expand_shift (LSHIFT_EXPR, mode, op0,
3173                                          shift, target, unsignedp);
3174                 }
3175               goto skip_synth;
3176             }
3177           else
3178             goto skip_synth;
3179         }
3180       else
3181         goto skip_synth;
3182
3183       /* We used to test optimize here, on the grounds that it's better to
3184          produce a smaller program when -O is not used.  But this causes
3185          such a terrible slowdown sometimes that it seems better to always
3186          use synth_mult.  */
3187
3188       /* Special case powers of two.  */
3189       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3190           && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3191         return expand_shift (LSHIFT_EXPR, mode, op0,
3192                              floor_log2 (coeff), target, unsignedp);
3193
3194       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3195
3196       /* Attempt to handle multiplication of DImode values by negative
3197          coefficients, by performing the multiplication by a positive
3198          multiplier and then inverting the result.  */
3199       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3200         {
3201           /* Its safe to use -coeff even for INT_MIN, as the
3202              result is interpreted as an unsigned coefficient.
3203              Exclude cost of op0 from max_cost to match the cost
3204              calculation of the synth_mult.  */
3205           coeff = -(unsigned HOST_WIDE_INT) coeff;
3206           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
3207                       - neg_cost (speed, mode));
3208           if (max_cost <= 0)
3209             goto skip_synth;
3210
3211           /* Special case powers of two.  */
3212           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3213             {
3214               rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3215                                        floor_log2 (coeff), target, unsignedp);
3216               return expand_unop (mode, neg_optab, temp, target, 0);
3217             }
3218
3219           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3220                                    max_cost))
3221             {
3222               rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3223                                             &algorithm, variant);
3224               return expand_unop (mode, neg_optab, temp, target, 0);
3225             }
3226           goto skip_synth;
3227         }
3228
3229       /* Exclude cost of op0 from max_cost to match the cost
3230          calculation of the synth_mult.  */
3231       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3232       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3233         return expand_mult_const (mode, op0, coeff, target,
3234                                   &algorithm, variant);
3235     }
3236  skip_synth:
3237
3238   /* Expand x*2.0 as x+x.  */
3239   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1))
3240     {
3241       REAL_VALUE_TYPE d;
3242       REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1);
3243
3244       if (REAL_VALUES_EQUAL (d, dconst2))
3245         {
3246           op0 = force_reg (GET_MODE (op0), op0);
3247           return expand_binop (mode, add_optab, op0, op0,
3248                                target, unsignedp, OPTAB_LIB_WIDEN);
3249         }
3250     }
3251  skip_scalar:
3252
3253   /* This used to use umul_optab if unsigned, but for non-widening multiply
3254      there is no difference between signed and unsigned.  */
3255   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3256                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3257   gcc_assert (op0);
3258   return op0;
3259 }
3260
3261 /* Return a cost estimate for multiplying a register by the given
3262    COEFFicient in the given MODE and SPEED.  */
3263
3264 int
3265 mult_by_coeff_cost (HOST_WIDE_INT coeff, enum machine_mode mode, bool speed)
3266 {
3267   int max_cost;
3268   struct algorithm algorithm;
3269   enum mult_variant variant;
3270
3271   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3272   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), speed);
3273   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3274     return algorithm.cost.cost;
3275   else
3276     return max_cost;
3277 }
3278
3279 /* Perform a widening multiplication and return an rtx for the result.
3280    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3281    TARGET is a suggestion for where to store the result (an rtx).
3282    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3283    or smul_widen_optab.
3284
3285    We check specially for a constant integer as OP1, comparing the
3286    cost of a widening multiply against the cost of a sequence of shifts
3287    and adds.  */
3288
3289 rtx
3290 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3291                       int unsignedp, optab this_optab)
3292 {
3293   bool speed = optimize_insn_for_speed_p ();
3294   rtx cop1;
3295
3296   if (CONST_INT_P (op1)
3297       && GET_MODE (op0) != VOIDmode
3298       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3299                                 this_optab == umul_widen_optab))
3300       && CONST_INT_P (cop1)
3301       && (INTVAL (cop1) >= 0
3302           || HWI_COMPUTABLE_MODE_P (mode)))
3303     {
3304       HOST_WIDE_INT coeff = INTVAL (cop1);
3305       int max_cost;
3306       enum mult_variant variant;
3307       struct algorithm algorithm;
3308
3309       if (coeff == 0)
3310         return CONST0_RTX (mode);
3311
3312       /* Special case powers of two.  */
3313       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3314         {
3315           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3316           return expand_shift (LSHIFT_EXPR, mode, op0,
3317                                floor_log2 (coeff), target, unsignedp);
3318         }
3319
3320       /* Exclude cost of op0 from max_cost to match the cost
3321          calculation of the synth_mult.  */
3322       max_cost = mul_widen_cost (speed, mode);
3323       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3324                                max_cost))
3325         {
3326           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3327           return expand_mult_const (mode, op0, coeff, target,
3328                                     &algorithm, variant);
3329         }
3330     }
3331   return expand_binop (mode, this_optab, op0, op1, target,
3332                        unsignedp, OPTAB_LIB_WIDEN);
3333 }
3334 \f
3335 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3336    replace division by D, and put the least significant N bits of the result
3337    in *MULTIPLIER_PTR and return the most significant bit.
3338
3339    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3340    needed precision is in PRECISION (should be <= N).
3341
3342    PRECISION should be as small as possible so this function can choose
3343    multiplier more freely.
3344
3345    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3346    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3347
3348    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3349    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3350
3351 unsigned HOST_WIDE_INT
3352 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3353                    unsigned HOST_WIDE_INT *multiplier_ptr,
3354                    int *post_shift_ptr, int *lgup_ptr)
3355 {
3356   double_int mhigh, mlow;
3357   int lgup, post_shift;
3358   int pow, pow2;
3359
3360   /* lgup = ceil(log2(divisor)); */
3361   lgup = ceil_log2 (d);
3362
3363   gcc_assert (lgup <= n);
3364
3365   pow = n + lgup;
3366   pow2 = n + lgup - precision;
3367
3368   /* We could handle this with some effort, but this case is much
3369      better handled directly with a scc insn, so rely on caller using
3370      that.  */
3371   gcc_assert (pow != HOST_BITS_PER_DOUBLE_INT);
3372
3373   /* mlow = 2^(N + lgup)/d */
3374   double_int val = double_int_zero.set_bit (pow);
3375   mlow = val.div (double_int::from_uhwi (d), true, TRUNC_DIV_EXPR);
3376
3377   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3378   val |= double_int_zero.set_bit (pow2);
3379   mhigh = val.div (double_int::from_uhwi (d), true, TRUNC_DIV_EXPR);
3380
3381   gcc_assert (!mhigh.high || val.high - d < d);
3382   gcc_assert (mhigh.high <= 1 && mlow.high <= 1);
3383   /* Assert that mlow < mhigh.  */
3384   gcc_assert (mlow.ult (mhigh));
3385
3386   /* If precision == N, then mlow, mhigh exceed 2^N
3387      (but they do not exceed 2^(N+1)).  */
3388
3389   /* Reduce to lowest terms.  */
3390   for (post_shift = lgup; post_shift > 0; post_shift--)
3391     {
3392       int shft = HOST_BITS_PER_WIDE_INT - 1;
3393       unsigned HOST_WIDE_INT ml_lo = (mlow.high << shft) | (mlow.low >> 1);
3394       unsigned HOST_WIDE_INT mh_lo = (mhigh.high << shft) | (mhigh.low >> 1);
3395       if (ml_lo >= mh_lo)
3396         break;
3397
3398       mlow = double_int::from_uhwi (ml_lo);
3399       mhigh = double_int::from_uhwi (mh_lo);
3400     }
3401
3402   *post_shift_ptr = post_shift;
3403   *lgup_ptr = lgup;
3404   if (n < HOST_BITS_PER_WIDE_INT)
3405     {
3406       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3407       *multiplier_ptr = mhigh.low & mask;
3408       return mhigh.low >= mask;
3409     }
3410   else
3411     {
3412       *multiplier_ptr = mhigh.low;
3413       return mhigh.high;
3414     }
3415 }
3416
3417 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3418    congruent to 1 (mod 2**N).  */
3419
3420 static unsigned HOST_WIDE_INT
3421 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3422 {
3423   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3424
3425   /* The algorithm notes that the choice y = x satisfies
3426      x*y == 1 mod 2^3, since x is assumed odd.
3427      Each iteration doubles the number of bits of significance in y.  */
3428
3429   unsigned HOST_WIDE_INT mask;
3430   unsigned HOST_WIDE_INT y = x;
3431   int nbit = 3;
3432
3433   mask = (n == HOST_BITS_PER_WIDE_INT
3434           ? ~(unsigned HOST_WIDE_INT) 0
3435           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3436
3437   while (nbit < n)
3438     {
3439       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3440       nbit *= 2;
3441     }
3442   return y;
3443 }
3444
3445 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3446    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3447    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3448    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3449    become signed.
3450
3451    The result is put in TARGET if that is convenient.
3452
3453    MODE is the mode of operation.  */
3454
3455 rtx
3456 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3457                              rtx op1, rtx target, int unsignedp)
3458 {
3459   rtx tem;
3460   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3461
3462   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3463                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3464   tem = expand_and (mode, tem, op1, NULL_RTX);
3465   adj_operand
3466     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3467                      adj_operand);
3468
3469   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3470                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3471   tem = expand_and (mode, tem, op0, NULL_RTX);
3472   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3473                           target);
3474
3475   return target;
3476 }
3477
3478 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3479
3480 static rtx
3481 extract_high_half (enum machine_mode mode, rtx op)
3482 {
3483   enum machine_mode wider_mode;
3484
3485   if (mode == word_mode)
3486     return gen_highpart (mode, op);
3487
3488   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3489
3490   wider_mode = GET_MODE_WIDER_MODE (mode);
3491   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3492                      GET_MODE_BITSIZE (mode), 0, 1);
3493   return convert_modes (mode, wider_mode, op, 0);
3494 }
3495
3496 /* Like expmed_mult_highpart, but only consider using a multiplication
3497    optab.  OP1 is an rtx for the constant operand.  */
3498
3499 static rtx
3500 expmed_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3501                             rtx target, int unsignedp, int max_cost)
3502 {
3503   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3504   enum machine_mode wider_mode;
3505   optab moptab;
3506   rtx tem;
3507   int size;
3508   bool speed = optimize_insn_for_speed_p ();
3509
3510   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3511
3512   wider_mode = GET_MODE_WIDER_MODE (mode);
3513   size = GET_MODE_BITSIZE (mode);
3514
3515   /* Firstly, try using a multiplication insn that only generates the needed
3516      high part of the product, and in the sign flavor of unsignedp.  */
3517   if (mul_highpart_cost (speed, mode) < max_cost)
3518     {
3519       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3520       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3521                           unsignedp, OPTAB_DIRECT);
3522       if (tem)
3523         return tem;
3524     }
3525
3526   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3527      Need to adjust the result after the multiplication.  */
3528   if (size - 1 < BITS_PER_WORD
3529       && (mul_highpart_cost (speed, mode)
3530           + 2 * shift_cost (speed, mode, size-1)
3531           + 4 * add_cost (speed, mode) < max_cost))
3532     {
3533       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3534       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3535                           unsignedp, OPTAB_DIRECT);
3536       if (tem)
3537         /* We used the wrong signedness.  Adjust the result.  */
3538         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3539                                             tem, unsignedp);
3540     }
3541
3542   /* Try widening multiplication.  */
3543   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3544   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3545       && mul_widen_cost (speed, wider_mode) < max_cost)
3546     {
3547       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3548                           unsignedp, OPTAB_WIDEN);
3549       if (tem)
3550         return extract_high_half (mode, tem);
3551     }
3552
3553   /* Try widening the mode and perform a non-widening multiplication.  */
3554   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3555       && size - 1 < BITS_PER_WORD
3556       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3557           < max_cost))
3558     {
3559       rtx insns, wop0, wop1;
3560
3561       /* We need to widen the operands, for example to ensure the
3562          constant multiplier is correctly sign or zero extended.
3563          Use a sequence to clean-up any instructions emitted by
3564          the conversions if things don't work out.  */
3565       start_sequence ();
3566       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3567       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3568       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3569                           unsignedp, OPTAB_WIDEN);
3570       insns = get_insns ();
3571       end_sequence ();
3572
3573       if (tem)
3574         {
3575           emit_insn (insns);
3576           return extract_high_half (mode, tem);
3577         }
3578     }
3579
3580   /* Try widening multiplication of opposite signedness, and adjust.  */
3581   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3582   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3583       && size - 1 < BITS_PER_WORD
3584       && (mul_widen_cost (speed, wider_mode)
3585           + 2 * shift_cost (speed, mode, size-1)
3586           + 4 * add_cost (speed, mode) < max_cost))
3587     {
3588       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3589                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3590       if (tem != 0)
3591         {
3592           tem = extract_high_half (mode, tem);
3593           /* We used the wrong signedness.  Adjust the result.  */
3594           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3595                                               target, unsignedp);
3596         }
3597     }
3598
3599   return 0;
3600 }
3601
3602 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3603    putting the high half of the result in TARGET if that is convenient,
3604    and return where the result is.  If the operation can not be performed,
3605    0 is returned.
3606
3607    MODE is the mode of operation and result.
3608
3609    UNSIGNEDP nonzero means unsigned multiply.
3610
3611    MAX_COST is the total allowed cost for the expanded RTL.  */
3612
3613 static rtx
3614 expmed_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3615                       rtx target, int unsignedp, int max_cost)
3616 {
3617   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3618   unsigned HOST_WIDE_INT cnst1;
3619   int extra_cost;
3620   bool sign_adjust = false;
3621   enum mult_variant variant;
3622   struct algorithm alg;
3623   rtx tem;
3624   bool speed = optimize_insn_for_speed_p ();
3625
3626   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3627   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3628   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3629
3630   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3631
3632   /* We can't optimize modes wider than BITS_PER_WORD.
3633      ??? We might be able to perform double-word arithmetic if
3634      mode == word_mode, however all the cost calculations in
3635      synth_mult etc. assume single-word operations.  */
3636   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3637     return expmed_mult_highpart_optab (mode, op0, op1, target,
3638                                        unsignedp, max_cost);
3639
3640   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3641
3642   /* Check whether we try to multiply by a negative constant.  */
3643   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3644     {
3645       sign_adjust = true;
3646       extra_cost += add_cost (speed, mode);
3647     }
3648
3649   /* See whether shift/add multiplication is cheap enough.  */
3650   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3651                            max_cost - extra_cost))
3652     {
3653       /* See whether the specialized multiplication optabs are
3654          cheaper than the shift/add version.  */
3655       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3656                                         alg.cost.cost + extra_cost);
3657       if (tem)
3658         return tem;
3659
3660       tem = convert_to_mode (wider_mode, op0, unsignedp);
3661       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3662       tem = extract_high_half (mode, tem);
3663
3664       /* Adjust result for signedness.  */
3665       if (sign_adjust)
3666         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3667
3668       return tem;
3669     }
3670   return expmed_mult_highpart_optab (mode, op0, op1, target,
3671                                      unsignedp, max_cost);
3672 }
3673
3674
3675 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3676
3677 static rtx
3678 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3679 {
3680   unsigned HOST_WIDE_INT masklow, maskhigh;
3681   rtx result, temp, shift, label;
3682   int logd;
3683
3684   logd = floor_log2 (d);
3685   result = gen_reg_rtx (mode);
3686
3687   /* Avoid conditional branches when they're expensive.  */
3688   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3689       && optimize_insn_for_speed_p ())
3690     {
3691       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3692                                       mode, 0, -1);
3693       if (signmask)
3694         {
3695           signmask = force_reg (mode, signmask);
3696           masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3697           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3698
3699           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3700              which instruction sequence to use.  If logical right shifts
3701              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3702              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3703
3704           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3705           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3706               || (set_src_cost (temp, optimize_insn_for_speed_p ())
3707                   > COSTS_N_INSNS (2)))
3708             {
3709               temp = expand_binop (mode, xor_optab, op0, signmask,
3710                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3711               temp = expand_binop (mode, sub_optab, temp, signmask,
3712                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3713               temp = expand_binop (mode, and_optab, temp,
3714                                    gen_int_mode (masklow, mode),
3715                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3716               temp = expand_binop (mode, xor_optab, temp, signmask,
3717                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3718               temp = expand_binop (mode, sub_optab, temp, signmask,
3719                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3720             }
3721           else
3722             {
3723               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3724                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3725               signmask = force_reg (mode, signmask);
3726
3727               temp = expand_binop (mode, add_optab, op0, signmask,
3728                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3729               temp = expand_binop (mode, and_optab, temp,
3730                                    gen_int_mode (masklow, mode),
3731                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3732               temp = expand_binop (mode, sub_optab, temp, signmask,
3733                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3734             }
3735           return temp;
3736         }
3737     }
3738
3739   /* Mask contains the mode's signbit and the significant bits of the
3740      modulus.  By including the signbit in the operation, many targets
3741      can avoid an explicit compare operation in the following comparison
3742      against zero.  */
3743
3744   masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3745   if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3746     {
3747       masklow |= HOST_WIDE_INT_M1U << (GET_MODE_BITSIZE (mode) - 1);
3748       maskhigh = -1;
3749     }
3750   else
3751     maskhigh = HOST_WIDE_INT_M1U
3752                  << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3753
3754   temp = expand_binop (mode, and_optab, op0,
3755                        immed_double_const (masklow, maskhigh, mode),
3756                        result, 1, OPTAB_LIB_WIDEN);
3757   if (temp != result)
3758     emit_move_insn (result, temp);
3759
3760   label = gen_label_rtx ();
3761   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3762
3763   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3764                        0, OPTAB_LIB_WIDEN);
3765   masklow = HOST_WIDE_INT_M1U << logd;
3766   maskhigh = -1;
3767   temp = expand_binop (mode, ior_optab, temp,
3768                        immed_double_const (masklow, maskhigh, mode),
3769                        result, 1, OPTAB_LIB_WIDEN);
3770   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3771                        0, OPTAB_LIB_WIDEN);
3772   if (temp != result)
3773     emit_move_insn (result, temp);
3774   emit_label (label);
3775   return result;
3776 }
3777
3778 /* Expand signed division of OP0 by a power of two D in mode MODE.
3779    This routine is only called for positive values of D.  */
3780
3781 static rtx
3782 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3783 {
3784   rtx temp, label;
3785   int logd;
3786
3787   logd = floor_log2 (d);
3788
3789   if (d == 2
3790       && BRANCH_COST (optimize_insn_for_speed_p (),
3791                       false) >= 1)
3792     {
3793       temp = gen_reg_rtx (mode);
3794       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3795       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3796                            0, OPTAB_LIB_WIDEN);
3797       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3798     }
3799
3800 #ifdef HAVE_conditional_move
3801   if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3802       >= 2)
3803     {
3804       rtx temp2;
3805
3806       start_sequence ();
3807       temp2 = copy_to_mode_reg (mode, op0);
3808       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3809                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3810       temp = force_reg (mode, temp);
3811
3812       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3813       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3814                                      mode, temp, temp2, mode, 0);
3815       if (temp2)
3816         {
3817           rtx seq = get_insns ();
3818           end_sequence ();
3819           emit_insn (seq);
3820           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3821         }
3822       end_sequence ();
3823     }
3824 #endif
3825
3826   if (BRANCH_COST (optimize_insn_for_speed_p (),
3827                    false) >= 2)
3828     {
3829       int ushift = GET_MODE_BITSIZE (mode) - logd;
3830
3831       temp = gen_reg_rtx (mode);
3832       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3833       if (shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3834           > COSTS_N_INSNS (1))
3835         temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3836                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3837       else
3838         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3839                              ushift, NULL_RTX, 1);
3840       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3841                            0, OPTAB_LIB_WIDEN);
3842       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3843     }
3844
3845   label = gen_label_rtx ();
3846   temp = copy_to_mode_reg (mode, op0);
3847   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3848   expand_inc (temp, gen_int_mode (d - 1, mode));
3849   emit_label (label);
3850   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3851 }
3852 \f
3853 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3854    if that is convenient, and returning where the result is.
3855    You may request either the quotient or the remainder as the result;
3856    specify REM_FLAG nonzero to get the remainder.
3857
3858    CODE is the expression code for which kind of division this is;
3859    it controls how rounding is done.  MODE is the machine mode to use.
3860    UNSIGNEDP nonzero means do unsigned division.  */
3861
3862 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3863    and then correct it by or'ing in missing high bits
3864    if result of ANDI is nonzero.
3865    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3866    This could optimize to a bfexts instruction.
3867    But C doesn't use these operations, so their optimizations are
3868    left for later.  */
3869 /* ??? For modulo, we don't actually need the highpart of the first product,
3870    the low part will do nicely.  And for small divisors, the second multiply
3871    can also be a low-part only multiply or even be completely left out.
3872    E.g. to calculate the remainder of a division by 3 with a 32 bit
3873    multiply, multiply with 0x55555556 and extract the upper two bits;
3874    the result is exact for inputs up to 0x1fffffff.
3875    The input range can be reduced by using cross-sum rules.
3876    For odd divisors >= 3, the following table gives right shift counts
3877    so that if a number is shifted by an integer multiple of the given
3878    amount, the remainder stays the same:
3879    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3880    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3881    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3882    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3883    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3884
3885    Cross-sum rules for even numbers can be derived by leaving as many bits
3886    to the right alone as the divisor has zeros to the right.
3887    E.g. if x is an unsigned 32 bit number:
3888    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3889    */
3890
3891 rtx
3892 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3893                rtx op0, rtx op1, rtx target, int unsignedp)
3894 {
3895   enum machine_mode compute_mode;
3896   rtx tquotient;
3897   rtx quotient = 0, remainder = 0;
3898   rtx last;
3899   int size;
3900   rtx insn;
3901   optab optab1, optab2;
3902   int op1_is_constant, op1_is_pow2 = 0;
3903   int max_cost, extra_cost;
3904   static HOST_WIDE_INT last_div_const = 0;
3905   bool speed = optimize_insn_for_speed_p ();
3906
3907   op1_is_constant = CONST_INT_P (op1);
3908   if (op1_is_constant)
3909     {
3910       unsigned HOST_WIDE_INT ext_op1 = UINTVAL (op1);
3911       if (unsignedp)
3912         ext_op1 &= GET_MODE_MASK (mode);
3913       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3914                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3915     }
3916
3917   /*
3918      This is the structure of expand_divmod:
3919
3920      First comes code to fix up the operands so we can perform the operations
3921      correctly and efficiently.
3922
3923      Second comes a switch statement with code specific for each rounding mode.
3924      For some special operands this code emits all RTL for the desired
3925      operation, for other cases, it generates only a quotient and stores it in
3926      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3927      to indicate that it has not done anything.
3928
3929      Last comes code that finishes the operation.  If QUOTIENT is set and
3930      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3931      QUOTIENT is not set, it is computed using trunc rounding.
3932
3933      We try to generate special code for division and remainder when OP1 is a
3934      constant.  If |OP1| = 2**n we can use shifts and some other fast
3935      operations.  For other values of OP1, we compute a carefully selected
3936      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3937      by m.
3938
3939      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3940      half of the product.  Different strategies for generating the product are
3941      implemented in expmed_mult_highpart.
3942
3943      If what we actually want is the remainder, we generate that by another
3944      by-constant multiplication and a subtraction.  */
3945
3946   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3947      code below will malfunction if we are, so check here and handle
3948      the special case if so.  */
3949   if (op1 == const1_rtx)
3950     return rem_flag ? const0_rtx : op0;
3951
3952     /* When dividing by -1, we could get an overflow.
3953      negv_optab can handle overflows.  */
3954   if (! unsignedp && op1 == constm1_rtx)
3955     {
3956       if (rem_flag)
3957         return const0_rtx;
3958       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
3959                           ? negv_optab : neg_optab, op0, target, 0);
3960     }
3961
3962   if (target
3963       /* Don't use the function value register as a target
3964          since we have to read it as well as write it,
3965          and function-inlining gets confused by this.  */
3966       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3967           /* Don't clobber an operand while doing a multi-step calculation.  */
3968           || ((rem_flag || op1_is_constant)
3969               && (reg_mentioned_p (target, op0)
3970                   || (MEM_P (op0) && MEM_P (target))))
3971           || reg_mentioned_p (target, op1)
3972           || (MEM_P (op1) && MEM_P (target))))
3973     target = 0;
3974
3975   /* Get the mode in which to perform this computation.  Normally it will
3976      be MODE, but sometimes we can't do the desired operation in MODE.
3977      If so, pick a wider mode in which we can do the operation.  Convert
3978      to that mode at the start to avoid repeated conversions.
3979
3980      First see what operations we need.  These depend on the expression
3981      we are evaluating.  (We assume that divxx3 insns exist under the
3982      same conditions that modxx3 insns and that these insns don't normally
3983      fail.  If these assumptions are not correct, we may generate less
3984      efficient code in some cases.)
3985
3986      Then see if we find a mode in which we can open-code that operation
3987      (either a division, modulus, or shift).  Finally, check for the smallest
3988      mode for which we can do the operation with a library call.  */
3989
3990   /* We might want to refine this now that we have division-by-constant
3991      optimization.  Since expmed_mult_highpart tries so many variants, it is
3992      not straightforward to generalize this.  Maybe we should make an array
3993      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3994
3995   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3996             ? (unsignedp ? lshr_optab : ashr_optab)
3997             : (unsignedp ? udiv_optab : sdiv_optab));
3998   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3999             ? optab1
4000             : (unsignedp ? udivmod_optab : sdivmod_optab));
4001
4002   for (compute_mode = mode; compute_mode != VOIDmode;
4003        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4004     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4005         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4006       break;
4007
4008   if (compute_mode == VOIDmode)
4009     for (compute_mode = mode; compute_mode != VOIDmode;
4010          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4011       if (optab_libfunc (optab1, compute_mode)
4012           || optab_libfunc (optab2, compute_mode))
4013         break;
4014
4015   /* If we still couldn't find a mode, use MODE, but expand_binop will
4016      probably die.  */
4017   if (compute_mode == VOIDmode)
4018     compute_mode = mode;
4019
4020   if (target && GET_MODE (target) == compute_mode)
4021     tquotient = target;
4022   else
4023     tquotient = gen_reg_rtx (compute_mode);
4024
4025   size = GET_MODE_BITSIZE (compute_mode);
4026 #if 0
4027   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4028      (mode), and thereby get better code when OP1 is a constant.  Do that
4029      later.  It will require going over all usages of SIZE below.  */
4030   size = GET_MODE_BITSIZE (mode);
4031 #endif
4032
4033   /* Only deduct something for a REM if the last divide done was
4034      for a different constant.   Then set the constant of the last
4035      divide.  */
4036   max_cost = (unsignedp
4037               ? udiv_cost (speed, compute_mode)
4038               : sdiv_cost (speed, compute_mode));
4039   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4040                      && INTVAL (op1) == last_div_const))
4041     max_cost -= (mul_cost (speed, compute_mode)
4042                  + add_cost (speed, compute_mode));
4043
4044   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4045
4046   /* Now convert to the best mode to use.  */
4047   if (compute_mode != mode)
4048     {
4049       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4050       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4051
4052       /* convert_modes may have placed op1 into a register, so we
4053          must recompute the following.  */
4054       op1_is_constant = CONST_INT_P (op1);
4055       op1_is_pow2 = (op1_is_constant
4056                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4057                           || (! unsignedp
4058                               && EXACT_POWER_OF_2_OR_ZERO_P (-UINTVAL (op1))))));
4059     }
4060
4061   /* If one of the operands is a volatile MEM, copy it into a register.  */
4062
4063   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4064     op0 = force_reg (compute_mode, op0);
4065   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4066     op1 = force_reg (compute_mode, op1);
4067
4068   /* If we need the remainder or if OP1 is constant, we need to
4069      put OP0 in a register in case it has any queued subexpressions.  */
4070   if (rem_flag || op1_is_constant)
4071     op0 = force_reg (compute_mode, op0);
4072
4073   last = get_last_insn ();
4074
4075   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4076   if (unsignedp)
4077     {
4078       if (code == FLOOR_DIV_EXPR)
4079         code = TRUNC_DIV_EXPR;
4080       if (code == FLOOR_MOD_EXPR)
4081         code = TRUNC_MOD_EXPR;
4082       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4083         code = TRUNC_DIV_EXPR;
4084     }
4085
4086   if (op1 != const0_rtx)
4087     switch (code)
4088       {
4089       case TRUNC_MOD_EXPR:
4090       case TRUNC_DIV_EXPR:
4091         if (op1_is_constant)
4092           {
4093             if (unsignedp)
4094               {
4095                 unsigned HOST_WIDE_INT mh, ml;
4096                 int pre_shift, post_shift;
4097                 int dummy;
4098                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4099                                             & GET_MODE_MASK (compute_mode));
4100
4101                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4102                   {
4103                     pre_shift = floor_log2 (d);
4104                     if (rem_flag)
4105                       {
4106                         unsigned HOST_WIDE_INT mask
4107                           = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4108                         remainder
4109                           = expand_binop (compute_mode, and_optab, op0,
4110                                           gen_int_mode (mask, compute_mode),
4111                                           remainder, 1,
4112                                           OPTAB_LIB_WIDEN);
4113                         if (remainder)
4114                           return gen_lowpart (mode, remainder);
4115                       }
4116                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4117                                              pre_shift, tquotient, 1);
4118                   }
4119                 else if (size <= HOST_BITS_PER_WIDE_INT)
4120                   {
4121                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4122                       {
4123                         /* Most significant bit of divisor is set; emit an scc
4124                            insn.  */
4125                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4126                                                           compute_mode, 1, 1);
4127                       }
4128                     else
4129                       {
4130                         /* Find a suitable multiplier and right shift count
4131                            instead of multiplying with D.  */
4132
4133                         mh = choose_multiplier (d, size, size,
4134                                                 &ml, &post_shift, &dummy);
4135
4136                         /* If the suggested multiplier is more than SIZE bits,
4137                            we can do better for even divisors, using an
4138                            initial right shift.  */
4139                         if (mh != 0 && (d & 1) == 0)
4140                           {
4141                             pre_shift = floor_log2 (d & -d);
4142                             mh = choose_multiplier (d >> pre_shift, size,
4143                                                     size - pre_shift,
4144                                                     &ml, &post_shift, &dummy);
4145                             gcc_assert (!mh);
4146                           }
4147                         else
4148                           pre_shift = 0;
4149
4150                         if (mh != 0)
4151                           {
4152                             rtx t1, t2, t3, t4;
4153
4154                             if (post_shift - 1 >= BITS_PER_WORD)
4155                               goto fail1;
4156
4157                             extra_cost
4158                               = (shift_cost (speed, compute_mode, post_shift - 1)
4159                                  + shift_cost (speed, compute_mode, 1)
4160                                  + 2 * add_cost (speed, compute_mode));
4161                             t1 = expmed_mult_highpart
4162                               (compute_mode, op0,
4163                                gen_int_mode (ml, compute_mode),
4164                                NULL_RTX, 1, max_cost - extra_cost);
4165                             if (t1 == 0)
4166                               goto fail1;
4167                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4168                                                                op0, t1),
4169                                                 NULL_RTX);
4170                             t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4171                                                t2, 1, NULL_RTX, 1);
4172                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4173                                                               t1, t3),
4174                                                 NULL_RTX);
4175                             quotient = expand_shift
4176                               (RSHIFT_EXPR, compute_mode, t4,
4177                                post_shift - 1, tquotient, 1);
4178                           }
4179                         else
4180                           {
4181                             rtx t1, t2;
4182
4183                             if (pre_shift >= BITS_PER_WORD
4184                                 || post_shift >= BITS_PER_WORD)
4185                               goto fail1;
4186
4187                             t1 = expand_shift
4188                               (RSHIFT_EXPR, compute_mode, op0,
4189                                pre_shift, NULL_RTX, 1);
4190                             extra_cost
4191                               = (shift_cost (speed, compute_mode, pre_shift)
4192                                  + shift_cost (speed, compute_mode, post_shift));
4193                             t2 = expmed_mult_highpart
4194                               (compute_mode, t1,
4195                                gen_int_mode (ml, compute_mode),
4196                                NULL_RTX, 1, max_cost - extra_cost);
4197                             if (t2 == 0)
4198                               goto fail1;
4199                             quotient = expand_shift
4200                               (RSHIFT_EXPR, compute_mode, t2,
4201                                post_shift, tquotient, 1);
4202                           }
4203                       }
4204                   }
4205                 else            /* Too wide mode to use tricky code */
4206                   break;
4207
4208                 insn = get_last_insn ();
4209                 if (insn != last)
4210                   set_dst_reg_note (insn, REG_EQUAL,
4211                                     gen_rtx_UDIV (compute_mode, op0, op1),
4212                                     quotient);
4213               }
4214             else                /* TRUNC_DIV, signed */
4215               {
4216                 unsigned HOST_WIDE_INT ml;
4217                 int lgup, post_shift;
4218                 rtx mlr;
4219                 HOST_WIDE_INT d = INTVAL (op1);
4220                 unsigned HOST_WIDE_INT abs_d;
4221
4222                 /* Since d might be INT_MIN, we have to cast to
4223                    unsigned HOST_WIDE_INT before negating to avoid
4224                    undefined signed overflow.  */
4225                 abs_d = (d >= 0
4226                          ? (unsigned HOST_WIDE_INT) d
4227                          : - (unsigned HOST_WIDE_INT) d);
4228
4229                 /* n rem d = n rem -d */
4230                 if (rem_flag && d < 0)
4231                   {
4232                     d = abs_d;
4233                     op1 = gen_int_mode (abs_d, compute_mode);
4234                   }
4235
4236                 if (d == 1)
4237                   quotient = op0;
4238                 else if (d == -1)
4239                   quotient = expand_unop (compute_mode, neg_optab, op0,
4240                                           tquotient, 0);
4241                 else if (HOST_BITS_PER_WIDE_INT >= size
4242                          && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4243                   {
4244                     /* This case is not handled correctly below.  */
4245                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4246                                                 compute_mode, 1, 1);
4247                     if (quotient == 0)
4248                       goto fail1;
4249                   }
4250                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4251                          && (rem_flag
4252                              ? smod_pow2_cheap (speed, compute_mode)
4253                              : sdiv_pow2_cheap (speed, compute_mode))
4254                          /* We assume that cheap metric is true if the
4255                             optab has an expander for this mode.  */
4256                          && ((optab_handler ((rem_flag ? smod_optab
4257                                               : sdiv_optab),
4258                                              compute_mode)
4259                               != CODE_FOR_nothing)
4260                              || (optab_handler (sdivmod_optab,
4261                                                 compute_mode)
4262                                  != CODE_FOR_nothing)))
4263                   ;
4264                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4265                   {
4266                     if (rem_flag)
4267                       {
4268                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4269                         if (remainder)
4270                           return gen_lowpart (mode, remainder);
4271                       }
4272
4273                     if (sdiv_pow2_cheap (speed, compute_mode)
4274                         && ((optab_handler (sdiv_optab, compute_mode)
4275                              != CODE_FOR_nothing)
4276                             || (optab_handler (sdivmod_optab, compute_mode)
4277                                 != CODE_FOR_nothing)))
4278                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4279                                                 compute_mode, op0,
4280                                                 gen_int_mode (abs_d,
4281                                                               compute_mode),
4282                                                 NULL_RTX, 0);
4283                     else
4284                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4285
4286                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4287                        negate the quotient.  */
4288                     if (d < 0)
4289                       {
4290                         insn = get_last_insn ();
4291                         if (insn != last
4292                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4293                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4294                           set_dst_reg_note (insn, REG_EQUAL,
4295                                             gen_rtx_DIV (compute_mode, op0,
4296                                                          gen_int_mode
4297                                                            (abs_d,
4298                                                             compute_mode)),
4299                                             quotient);
4300
4301                         quotient = expand_unop (compute_mode, neg_optab,
4302                                                 quotient, quotient, 0);
4303                       }
4304                   }
4305                 else if (size <= HOST_BITS_PER_WIDE_INT)
4306                   {
4307                     choose_multiplier (abs_d, size, size - 1,
4308                                        &ml, &post_shift, &lgup);
4309                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4310                       {
4311                         rtx t1, t2, t3;
4312
4313                         if (post_shift >= BITS_PER_WORD
4314                             || size - 1 >= BITS_PER_WORD)
4315                           goto fail1;
4316
4317                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4318                                       + shift_cost (speed, compute_mode, size - 1)
4319                                       + add_cost (speed, compute_mode));
4320                         t1 = expmed_mult_highpart
4321                           (compute_mode, op0, gen_int_mode (ml, compute_mode),
4322                            NULL_RTX, 0, max_cost - extra_cost);
4323                         if (t1 == 0)
4324                           goto fail1;
4325                         t2 = expand_shift
4326                           (RSHIFT_EXPR, compute_mode, t1,
4327                            post_shift, NULL_RTX, 0);
4328                         t3 = expand_shift
4329                           (RSHIFT_EXPR, compute_mode, op0,
4330                            size - 1, NULL_RTX, 0);
4331                         if (d < 0)
4332                           quotient
4333                             = force_operand (gen_rtx_MINUS (compute_mode,
4334                                                             t3, t2),
4335                                              tquotient);
4336                         else
4337                           quotient
4338                             = force_operand (gen_rtx_MINUS (compute_mode,
4339                                                             t2, t3),
4340                                              tquotient);
4341                       }
4342                     else
4343                       {
4344                         rtx t1, t2, t3, t4;
4345
4346                         if (post_shift >= BITS_PER_WORD
4347                             || size - 1 >= BITS_PER_WORD)
4348                           goto fail1;
4349
4350                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4351                         mlr = gen_int_mode (ml, compute_mode);
4352                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4353                                       + shift_cost (speed, compute_mode, size - 1)
4354                                       + 2 * add_cost (speed, compute_mode));
4355                         t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4356                                                    NULL_RTX, 0,
4357                                                    max_cost - extra_cost);
4358                         if (t1 == 0)
4359                           goto fail1;
4360                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4361                                                           t1, op0),
4362                                             NULL_RTX);
4363                         t3 = expand_shift
4364                           (RSHIFT_EXPR, compute_mode, t2,
4365                            post_shift, NULL_RTX, 0);
4366                         t4 = expand_shift
4367                           (RSHIFT_EXPR, compute_mode, op0,
4368                            size - 1, NULL_RTX, 0);
4369                         if (d < 0)
4370                           quotient
4371                             = force_operand (gen_rtx_MINUS (compute_mode,
4372                                                             t4, t3),
4373                                              tquotient);
4374                         else
4375                           quotient
4376                             = force_operand (gen_rtx_MINUS (compute_mode,
4377                                                             t3, t4),
4378                                              tquotient);
4379                       }
4380                   }
4381                 else            /* Too wide mode to use tricky code */
4382                   break;
4383
4384                 insn = get_last_insn ();
4385                 if (insn != last)
4386                   set_dst_reg_note (insn, REG_EQUAL,
4387                                     gen_rtx_DIV (compute_mode, op0, op1),
4388                                     quotient);
4389               }
4390             break;
4391           }
4392       fail1:
4393         delete_insns_since (last);
4394         break;
4395
4396       case FLOOR_DIV_EXPR:
4397       case FLOOR_MOD_EXPR:
4398       /* We will come here only for signed operations.  */
4399         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4400           {
4401             unsigned HOST_WIDE_INT mh, ml;
4402             int pre_shift, lgup, post_shift;
4403             HOST_WIDE_INT d = INTVAL (op1);
4404
4405             if (d > 0)
4406               {
4407                 /* We could just as easily deal with negative constants here,
4408                    but it does not seem worth the trouble for GCC 2.6.  */
4409                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4410                   {
4411                     pre_shift = floor_log2 (d);
4412                     if (rem_flag)
4413                       {
4414                         unsigned HOST_WIDE_INT mask
4415                           = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4416                         remainder = expand_binop
4417                           (compute_mode, and_optab, op0,
4418                            gen_int_mode (mask, compute_mode),
4419                            remainder, 0, OPTAB_LIB_WIDEN);
4420                         if (remainder)
4421                           return gen_lowpart (mode, remainder);
4422                       }
4423                     quotient = expand_shift
4424                       (RSHIFT_EXPR, compute_mode, op0,
4425                        pre_shift, tquotient, 0);
4426                   }
4427                 else
4428                   {
4429                     rtx t1, t2, t3, t4;
4430
4431                     mh = choose_multiplier (d, size, size - 1,
4432                                             &ml, &post_shift, &lgup);
4433                     gcc_assert (!mh);
4434
4435                     if (post_shift < BITS_PER_WORD
4436                         && size - 1 < BITS_PER_WORD)
4437                       {
4438                         t1 = expand_shift
4439                           (RSHIFT_EXPR, compute_mode, op0,
4440                            size - 1, NULL_RTX, 0);
4441                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4442                                            NULL_RTX, 0, OPTAB_WIDEN);
4443                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4444                                       + shift_cost (speed, compute_mode, size - 1)
4445                                       + 2 * add_cost (speed, compute_mode));
4446                         t3 = expmed_mult_highpart
4447                           (compute_mode, t2, gen_int_mode (ml, compute_mode),
4448                            NULL_RTX, 1, max_cost - extra_cost);
4449                         if (t3 != 0)
4450                           {
4451                             t4 = expand_shift
4452                               (RSHIFT_EXPR, compute_mode, t3,
4453                                post_shift, NULL_RTX, 1);
4454                             quotient = expand_binop (compute_mode, xor_optab,
4455                                                      t4, t1, tquotient, 0,
4456                                                      OPTAB_WIDEN);
4457                           }
4458                       }
4459                   }
4460               }
4461             else
4462               {
4463                 rtx nsign, t1, t2, t3, t4;
4464                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4465                                                   op0, constm1_rtx), NULL_RTX);
4466                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4467                                    0, OPTAB_WIDEN);
4468                 nsign = expand_shift
4469                   (RSHIFT_EXPR, compute_mode, t2,
4470                    size - 1, NULL_RTX, 0);
4471                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4472                                     NULL_RTX);
4473                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4474                                     NULL_RTX, 0);
4475                 if (t4)
4476                   {
4477                     rtx t5;
4478                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4479                                       NULL_RTX, 0);
4480                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4481                                                             t4, t5),
4482                                               tquotient);
4483                   }
4484               }
4485           }
4486
4487         if (quotient != 0)
4488           break;
4489         delete_insns_since (last);
4490
4491         /* Try using an instruction that produces both the quotient and
4492            remainder, using truncation.  We can easily compensate the quotient
4493            or remainder to get floor rounding, once we have the remainder.
4494            Notice that we compute also the final remainder value here,
4495            and return the result right away.  */
4496         if (target == 0 || GET_MODE (target) != compute_mode)
4497           target = gen_reg_rtx (compute_mode);
4498
4499         if (rem_flag)
4500           {
4501             remainder
4502               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4503             quotient = gen_reg_rtx (compute_mode);
4504           }
4505         else
4506           {
4507             quotient
4508               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4509             remainder = gen_reg_rtx (compute_mode);
4510           }
4511
4512         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4513                                  quotient, remainder, 0))
4514           {
4515             /* This could be computed with a branch-less sequence.
4516                Save that for later.  */
4517             rtx tem;
4518             rtx label = gen_label_rtx ();
4519             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4520             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4521                                 NULL_RTX, 0, OPTAB_WIDEN);
4522             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4523             expand_dec (quotient, const1_rtx);
4524             expand_inc (remainder, op1);
4525             emit_label (label);
4526             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4527           }
4528
4529         /* No luck with division elimination or divmod.  Have to do it
4530            by conditionally adjusting op0 *and* the result.  */
4531         {
4532           rtx label1, label2, label3, label4, label5;
4533           rtx adjusted_op0;
4534           rtx tem;
4535
4536           quotient = gen_reg_rtx (compute_mode);
4537           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4538           label1 = gen_label_rtx ();
4539           label2 = gen_label_rtx ();
4540           label3 = gen_label_rtx ();
4541           label4 = gen_label_rtx ();
4542           label5 = gen_label_rtx ();
4543           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4544           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4545           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4546                               quotient, 0, OPTAB_LIB_WIDEN);
4547           if (tem != quotient)
4548             emit_move_insn (quotient, tem);
4549           emit_jump_insn (gen_jump (label5));
4550           emit_barrier ();
4551           emit_label (label1);
4552           expand_inc (adjusted_op0, const1_rtx);
4553           emit_jump_insn (gen_jump (label4));
4554           emit_barrier ();
4555           emit_label (label2);
4556           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4557           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4558                               quotient, 0, OPTAB_LIB_WIDEN);
4559           if (tem != quotient)
4560             emit_move_insn (quotient, tem);
4561           emit_jump_insn (gen_jump (label5));
4562           emit_barrier ();
4563           emit_label (label3);
4564           expand_dec (adjusted_op0, const1_rtx);
4565           emit_label (label4);
4566           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4567                               quotient, 0, OPTAB_LIB_WIDEN);
4568           if (tem != quotient)
4569             emit_move_insn (quotient, tem);
4570           expand_dec (quotient, const1_rtx);
4571           emit_label (label5);
4572         }
4573         break;
4574
4575       case CEIL_DIV_EXPR:
4576       case CEIL_MOD_EXPR:
4577         if (unsignedp)
4578           {
4579             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4580               {
4581                 rtx t1, t2, t3;
4582                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4583                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4584                                    floor_log2 (d), tquotient, 1);
4585                 t2 = expand_binop (compute_mode, and_optab, op0,
4586                                    gen_int_mode (d - 1, compute_mode),
4587                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4588                 t3 = gen_reg_rtx (compute_mode);
4589                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4590                                       compute_mode, 1, 1);
4591                 if (t3 == 0)
4592                   {
4593                     rtx lab;
4594                     lab = gen_label_rtx ();
4595                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4596                     expand_inc (t1, const1_rtx);
4597                     emit_label (lab);
4598                     quotient = t1;
4599                   }
4600                 else
4601                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4602                                                           t1, t3),
4603                                             tquotient);
4604                 break;
4605               }
4606
4607             /* Try using an instruction that produces both the quotient and
4608                remainder, using truncation.  We can easily compensate the
4609                quotient or remainder to get ceiling rounding, once we have the
4610                remainder.  Notice that we compute also the final remainder
4611                value here, and return the result right away.  */
4612             if (target == 0 || GET_MODE (target) != compute_mode)
4613               target = gen_reg_rtx (compute_mode);
4614
4615             if (rem_flag)
4616               {
4617                 remainder = (REG_P (target)
4618                              ? target : gen_reg_rtx (compute_mode));
4619                 quotient = gen_reg_rtx (compute_mode);
4620               }
4621             else
4622               {
4623                 quotient = (REG_P (target)
4624                             ? target : gen_reg_rtx (compute_mode));
4625                 remainder = gen_reg_rtx (compute_mode);
4626               }
4627
4628             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4629                                      remainder, 1))
4630               {
4631                 /* This could be computed with a branch-less sequence.
4632                    Save that for later.  */
4633                 rtx label = gen_label_rtx ();
4634                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4635                                  compute_mode, label);
4636                 expand_inc (quotient, const1_rtx);
4637                 expand_dec (remainder, op1);
4638                 emit_label (label);
4639                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4640               }
4641
4642             /* No luck with division elimination or divmod.  Have to do it
4643                by conditionally adjusting op0 *and* the result.  */
4644             {
4645               rtx label1, label2;
4646               rtx adjusted_op0, tem;
4647
4648               quotient = gen_reg_rtx (compute_mode);
4649               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4650               label1 = gen_label_rtx ();
4651               label2 = gen_label_rtx ();
4652               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4653                                compute_mode, label1);
4654               emit_move_insn  (quotient, const0_rtx);
4655               emit_jump_insn (gen_jump (label2));
4656               emit_barrier ();
4657               emit_label (label1);
4658               expand_dec (adjusted_op0, const1_rtx);
4659               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4660                                   quotient, 1, OPTAB_LIB_WIDEN);
4661               if (tem != quotient)
4662                 emit_move_insn (quotient, tem);
4663               expand_inc (quotient, const1_rtx);
4664               emit_label (label2);
4665             }
4666           }
4667         else /* signed */
4668           {
4669             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4670                 && INTVAL (op1) >= 0)
4671               {
4672                 /* This is extremely similar to the code for the unsigned case
4673                    above.  For 2.7 we should merge these variants, but for
4674                    2.6.1 I don't want to touch the code for unsigned since that
4675                    get used in C.  The signed case will only be used by other
4676                    languages (Ada).  */
4677
4678                 rtx t1, t2, t3;
4679                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4680                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4681                                    floor_log2 (d), tquotient, 0);
4682                 t2 = expand_binop (compute_mode, and_optab, op0,
4683                                    gen_int_mode (d - 1, compute_mode),
4684                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4685                 t3 = gen_reg_rtx (compute_mode);
4686                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4687                                       compute_mode, 1, 1);
4688                 if (t3 == 0)
4689                   {
4690                     rtx lab;
4691                     lab = gen_label_rtx ();
4692                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4693                     expand_inc (t1, const1_rtx);
4694                     emit_label (lab);
4695                     quotient = t1;
4696                   }
4697                 else
4698                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4699                                                           t1, t3),
4700                                             tquotient);
4701                 break;
4702               }
4703
4704             /* Try using an instruction that produces both the quotient and
4705                remainder, using truncation.  We can easily compensate the
4706                quotient or remainder to get ceiling rounding, once we have the
4707                remainder.  Notice that we compute also the final remainder
4708                value here, and return the result right away.  */
4709             if (target == 0 || GET_MODE (target) != compute_mode)
4710               target = gen_reg_rtx (compute_mode);
4711             if (rem_flag)
4712               {
4713                 remainder= (REG_P (target)
4714                             ? target : gen_reg_rtx (compute_mode));
4715                 quotient = gen_reg_rtx (compute_mode);
4716               }
4717             else
4718               {
4719                 quotient = (REG_P (target)
4720                             ? target : gen_reg_rtx (compute_mode));
4721                 remainder = gen_reg_rtx (compute_mode);
4722               }
4723
4724             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4725                                      remainder, 0))
4726               {
4727                 /* This could be computed with a branch-less sequence.
4728                    Save that for later.  */
4729                 rtx tem;
4730                 rtx label = gen_label_rtx ();
4731                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4732                                  compute_mode, label);
4733                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4734                                     NULL_RTX, 0, OPTAB_WIDEN);
4735                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4736                 expand_inc (quotient, const1_rtx);
4737                 expand_dec (remainder, op1);
4738                 emit_label (label);
4739                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4740               }
4741
4742             /* No luck with division elimination or divmod.  Have to do it
4743                by conditionally adjusting op0 *and* the result.  */
4744             {
4745               rtx label1, label2, label3, label4, label5;
4746               rtx adjusted_op0;
4747               rtx tem;
4748
4749               quotient = gen_reg_rtx (compute_mode);
4750               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4751               label1 = gen_label_rtx ();
4752               label2 = gen_label_rtx ();
4753               label3 = gen_label_rtx ();
4754               label4 = gen_label_rtx ();
4755               label5 = gen_label_rtx ();
4756               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4757               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4758                                compute_mode, label1);
4759               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4760                                   quotient, 0, OPTAB_LIB_WIDEN);
4761               if (tem != quotient)
4762                 emit_move_insn (quotient, tem);
4763               emit_jump_insn (gen_jump (label5));
4764               emit_barrier ();
4765               emit_label (label1);
4766               expand_dec (adjusted_op0, const1_rtx);
4767               emit_jump_insn (gen_jump (label4));
4768               emit_barrier ();
4769               emit_label (label2);
4770               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4771                                compute_mode, label3);
4772               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4773                                   quotient, 0, OPTAB_LIB_WIDEN);
4774               if (tem != quotient)
4775                 emit_move_insn (quotient, tem);
4776               emit_jump_insn (gen_jump (label5));
4777               emit_barrier ();
4778               emit_label (label3);
4779               expand_inc (adjusted_op0, const1_rtx);
4780               emit_label (label4);
4781               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4782                                   quotient, 0, OPTAB_LIB_WIDEN);
4783               if (tem != quotient)
4784                 emit_move_insn (quotient, tem);
4785               expand_inc (quotient, const1_rtx);
4786               emit_label (label5);
4787             }
4788           }
4789         break;
4790
4791       case EXACT_DIV_EXPR:
4792         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4793           {
4794             HOST_WIDE_INT d = INTVAL (op1);
4795             unsigned HOST_WIDE_INT ml;
4796             int pre_shift;
4797             rtx t1;
4798
4799             pre_shift = floor_log2 (d & -d);
4800             ml = invert_mod2n (d >> pre_shift, size);
4801             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4802                                pre_shift, NULL_RTX, unsignedp);
4803             quotient = expand_mult (compute_mode, t1,
4804                                     gen_int_mode (ml, compute_mode),
4805                                     NULL_RTX, 1);
4806
4807             insn = get_last_insn ();
4808             set_dst_reg_note (insn, REG_EQUAL,
4809                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4810                                               compute_mode, op0, op1),
4811                               quotient);
4812           }
4813         break;
4814
4815       case ROUND_DIV_EXPR:
4816       case ROUND_MOD_EXPR:
4817         if (unsignedp)
4818           {
4819             rtx tem;
4820             rtx label;
4821             label = gen_label_rtx ();
4822             quotient = gen_reg_rtx (compute_mode);
4823             remainder = gen_reg_rtx (compute_mode);
4824             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4825               {
4826                 rtx tem;
4827                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4828                                          quotient, 1, OPTAB_LIB_WIDEN);
4829                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4830                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4831                                           remainder, 1, OPTAB_LIB_WIDEN);
4832               }
4833             tem = plus_constant (compute_mode, op1, -1);
4834             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4835             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4836             expand_inc (quotient, const1_rtx);
4837             expand_dec (remainder, op1);
4838             emit_label (label);
4839           }
4840         else
4841           {
4842             rtx abs_rem, abs_op1, tem, mask;
4843             rtx label;
4844             label = gen_label_rtx ();
4845             quotient = gen_reg_rtx (compute_mode);
4846             remainder = gen_reg_rtx (compute_mode);
4847             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4848               {
4849                 rtx tem;
4850                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4851                                          quotient, 0, OPTAB_LIB_WIDEN);
4852                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4853                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4854                                           remainder, 0, OPTAB_LIB_WIDEN);
4855               }
4856             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4857             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4858             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4859                                 1, NULL_RTX, 1);
4860             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4861             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4862                                 NULL_RTX, 0, OPTAB_WIDEN);
4863             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4864                                  size - 1, NULL_RTX, 0);
4865             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4866                                 NULL_RTX, 0, OPTAB_WIDEN);
4867             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4868                                 NULL_RTX, 0, OPTAB_WIDEN);
4869             expand_inc (quotient, tem);
4870             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4871                                 NULL_RTX, 0, OPTAB_WIDEN);
4872             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4873                                 NULL_RTX, 0, OPTAB_WIDEN);
4874             expand_dec (remainder, tem);
4875             emit_label (label);
4876           }
4877         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4878
4879       default:
4880         gcc_unreachable ();
4881       }
4882
4883   if (quotient == 0)
4884     {
4885       if (target && GET_MODE (target) != compute_mode)
4886         target = 0;
4887
4888       if (rem_flag)
4889         {
4890           /* Try to produce the remainder without producing the quotient.
4891              If we seem to have a divmod pattern that does not require widening,
4892              don't try widening here.  We should really have a WIDEN argument
4893              to expand_twoval_binop, since what we'd really like to do here is
4894              1) try a mod insn in compute_mode
4895              2) try a divmod insn in compute_mode
4896              3) try a div insn in compute_mode and multiply-subtract to get
4897                 remainder
4898              4) try the same things with widening allowed.  */
4899           remainder
4900             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4901                                  op0, op1, target,
4902                                  unsignedp,
4903                                  ((optab_handler (optab2, compute_mode)
4904                                    != CODE_FOR_nothing)
4905                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4906           if (remainder == 0)
4907             {
4908               /* No luck there.  Can we do remainder and divide at once
4909                  without a library call?  */
4910               remainder = gen_reg_rtx (compute_mode);
4911               if (! expand_twoval_binop ((unsignedp
4912                                           ? udivmod_optab
4913                                           : sdivmod_optab),
4914                                          op0, op1,
4915                                          NULL_RTX, remainder, unsignedp))
4916                 remainder = 0;
4917             }
4918
4919           if (remainder)
4920             return gen_lowpart (mode, remainder);
4921         }
4922
4923       /* Produce the quotient.  Try a quotient insn, but not a library call.
4924          If we have a divmod in this mode, use it in preference to widening
4925          the div (for this test we assume it will not fail). Note that optab2
4926          is set to the one of the two optabs that the call below will use.  */
4927       quotient
4928         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4929                              op0, op1, rem_flag ? NULL_RTX : target,
4930                              unsignedp,
4931                              ((optab_handler (optab2, compute_mode)
4932                                != CODE_FOR_nothing)
4933                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4934
4935       if (quotient == 0)
4936         {
4937           /* No luck there.  Try a quotient-and-remainder insn,
4938              keeping the quotient alone.  */
4939           quotient = gen_reg_rtx (compute_mode);
4940           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4941                                      op0, op1,
4942                                      quotient, NULL_RTX, unsignedp))
4943             {
4944               quotient = 0;
4945               if (! rem_flag)
4946                 /* Still no luck.  If we are not computing the remainder,
4947                    use a library call for the quotient.  */
4948                 quotient = sign_expand_binop (compute_mode,
4949                                               udiv_optab, sdiv_optab,
4950                                               op0, op1, target,
4951                                               unsignedp, OPTAB_LIB_WIDEN);
4952             }
4953         }
4954     }
4955
4956   if (rem_flag)
4957     {
4958       if (target && GET_MODE (target) != compute_mode)
4959         target = 0;
4960
4961       if (quotient == 0)
4962         {
4963           /* No divide instruction either.  Use library for remainder.  */
4964           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4965                                          op0, op1, target,
4966                                          unsignedp, OPTAB_LIB_WIDEN);
4967           /* No remainder function.  Try a quotient-and-remainder
4968              function, keeping the remainder.  */
4969           if (!remainder)
4970             {
4971               remainder = gen_reg_rtx (compute_mode);
4972               if (!expand_twoval_binop_libfunc
4973                   (unsignedp ? udivmod_optab : sdivmod_optab,
4974                    op0, op1,
4975                    NULL_RTX, remainder,
4976                    unsignedp ? UMOD : MOD))
4977                 remainder = NULL_RTX;
4978             }
4979         }
4980       else
4981         {
4982           /* We divided.  Now finish doing X - Y * (X / Y).  */
4983           remainder = expand_mult (compute_mode, quotient, op1,
4984                                    NULL_RTX, unsignedp);
4985           remainder = expand_binop (compute_mode, sub_optab, op0,
4986                                     remainder, target, unsignedp,
4987                                     OPTAB_LIB_WIDEN);
4988         }
4989     }
4990
4991   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4992 }
4993 \f
4994 /* Return a tree node with data type TYPE, describing the value of X.
4995    Usually this is an VAR_DECL, if there is no obvious better choice.
4996    X may be an expression, however we only support those expressions
4997    generated by loop.c.  */
4998
4999 tree
5000 make_tree (tree type, rtx x)
5001 {
5002   tree t;
5003
5004   switch (GET_CODE (x))
5005     {
5006     case CONST_INT:
5007       {
5008         HOST_WIDE_INT hi = 0;
5009
5010         if (INTVAL (x) < 0
5011             && !(TYPE_UNSIGNED (type)
5012                  && (GET_MODE_BITSIZE (TYPE_MODE (type))
5013                      < HOST_BITS_PER_WIDE_INT)))
5014           hi = -1;
5015
5016         t = build_int_cst_wide (type, INTVAL (x), hi);
5017
5018         return t;
5019       }
5020
5021     case CONST_DOUBLE:
5022       if (GET_MODE (x) == VOIDmode)
5023         t = build_int_cst_wide (type,
5024                                 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
5025       else
5026         {
5027           REAL_VALUE_TYPE d;
5028
5029           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
5030           t = build_real (type, d);
5031         }
5032
5033       return t;
5034
5035     case CONST_VECTOR:
5036       {
5037         int units = CONST_VECTOR_NUNITS (x);
5038         tree itype = TREE_TYPE (type);
5039         tree *elts;
5040         int i;
5041
5042         /* Build a tree with vector elements.  */
5043         elts = XALLOCAVEC (tree, units);
5044         for (i = units - 1; i >= 0; --i)
5045           {
5046             rtx elt = CONST_VECTOR_ELT (x, i);
5047             elts[i] = make_tree (itype, elt);
5048           }
5049
5050         return build_vector (type, elts);
5051       }
5052
5053     case PLUS:
5054       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5055                           make_tree (type, XEXP (x, 1)));
5056
5057     case MINUS:
5058       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5059                           make_tree (type, XEXP (x, 1)));
5060
5061     case NEG:
5062       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5063
5064     case MULT:
5065       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5066                           make_tree (type, XEXP (x, 1)));
5067
5068     case ASHIFT:
5069       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5070                           make_tree (type, XEXP (x, 1)));
5071
5072     case LSHIFTRT:
5073       t = unsigned_type_for (type);
5074       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5075                                          make_tree (t, XEXP (x, 0)),
5076                                          make_tree (type, XEXP (x, 1))));
5077
5078     case ASHIFTRT:
5079       t = signed_type_for (type);
5080       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5081                                          make_tree (t, XEXP (x, 0)),
5082                                          make_tree (type, XEXP (x, 1))));
5083
5084     case DIV:
5085       if (TREE_CODE (type) != REAL_TYPE)
5086         t = signed_type_for (type);
5087       else
5088         t = type;
5089
5090       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5091                                          make_tree (t, XEXP (x, 0)),
5092                                          make_tree (t, XEXP (x, 1))));
5093     case UDIV:
5094       t = unsigned_type_for (type);
5095       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5096                                          make_tree (t, XEXP (x, 0)),
5097                                          make_tree (t, XEXP (x, 1))));
5098
5099     case SIGN_EXTEND:
5100     case ZERO_EXTEND:
5101       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5102                                           GET_CODE (x) == ZERO_EXTEND);
5103       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5104
5105     case CONST:
5106       return make_tree (type, XEXP (x, 0));
5107
5108     case SYMBOL_REF:
5109       t = SYMBOL_REF_DECL (x);
5110       if (t)
5111         return fold_convert (type, build_fold_addr_expr (t));
5112       /* else fall through.  */
5113
5114     default:
5115       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5116
5117       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5118          address mode to pointer mode.  */
5119       if (POINTER_TYPE_P (type))
5120         x = convert_memory_address_addr_space
5121               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5122
5123       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5124          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5125       t->decl_with_rtl.rtl = x;
5126
5127       return t;
5128     }
5129 }
5130 \f
5131 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5132    and returning TARGET.
5133
5134    If TARGET is 0, a pseudo-register or constant is returned.  */
5135
5136 rtx
5137 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5138 {
5139   rtx tem = 0;
5140
5141   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5142     tem = simplify_binary_operation (AND, mode, op0, op1);
5143   if (tem == 0)
5144     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5145
5146   if (target == 0)
5147     target = tem;
5148   else if (tem != target)
5149     emit_move_insn (target, tem);
5150   return target;
5151 }
5152
5153 /* Helper function for emit_store_flag.  */
5154 static rtx
5155 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5156              enum machine_mode mode, enum machine_mode compare_mode,
5157              int unsignedp, rtx x, rtx y, int normalizep,
5158              enum machine_mode target_mode)
5159 {
5160   struct expand_operand ops[4];
5161   rtx op0, last, comparison, subtarget;
5162   enum machine_mode result_mode = targetm.cstore_mode (icode);
5163
5164   last = get_last_insn ();
5165   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5166   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5167   if (!x || !y)
5168     {
5169       delete_insns_since (last);
5170       return NULL_RTX;
5171     }
5172
5173   if (target_mode == VOIDmode)
5174     target_mode = result_mode;
5175   if (!target)
5176     target = gen_reg_rtx (target_mode);
5177
5178   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5179
5180   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5181   create_fixed_operand (&ops[1], comparison);
5182   create_fixed_operand (&ops[2], x);
5183   create_fixed_operand (&ops[3], y);
5184   if (!maybe_expand_insn (icode, 4, ops))
5185     {
5186       delete_insns_since (last);
5187       return NULL_RTX;
5188     }
5189   subtarget = ops[0].value;
5190
5191   /* If we are converting to a wider mode, first convert to
5192      TARGET_MODE, then normalize.  This produces better combining
5193      opportunities on machines that have a SIGN_EXTRACT when we are
5194      testing a single bit.  This mostly benefits the 68k.
5195
5196      If STORE_FLAG_VALUE does not have the sign bit set when
5197      interpreted in MODE, we can do this conversion as unsigned, which
5198      is usually more efficient.  */
5199   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5200     {
5201       convert_move (target, subtarget,
5202                     val_signbit_known_clear_p (result_mode,
5203                                                STORE_FLAG_VALUE));
5204       op0 = target;
5205       result_mode = target_mode;
5206     }
5207   else
5208     op0 = subtarget;
5209
5210   /* If we want to keep subexpressions around, don't reuse our last
5211      target.  */
5212   if (optimize)
5213     subtarget = 0;
5214
5215   /* Now normalize to the proper value in MODE.  Sometimes we don't
5216      have to do anything.  */
5217   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5218     ;
5219   /* STORE_FLAG_VALUE might be the most negative number, so write
5220      the comparison this way to avoid a compiler-time warning.  */
5221   else if (- normalizep == STORE_FLAG_VALUE)
5222     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5223
5224   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5225      it hard to use a value of just the sign bit due to ANSI integer
5226      constant typing rules.  */
5227   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5228     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5229                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5230                         normalizep == 1);
5231   else
5232     {
5233       gcc_assert (STORE_FLAG_VALUE & 1);
5234
5235       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5236       if (normalizep == -1)
5237         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5238     }
5239
5240   /* If we were converting to a smaller mode, do the conversion now.  */
5241   if (target_mode != result_mode)
5242     {
5243       convert_move (target, op0, 0);
5244       return target;
5245     }
5246   else
5247     return op0;
5248 }
5249
5250
5251 /* A subroutine of emit_store_flag only including "tricks" that do not
5252    need a recursive call.  These are kept separate to avoid infinite
5253    loops.  */
5254
5255 static rtx
5256 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5257                    enum machine_mode mode, int unsignedp, int normalizep,
5258                    enum machine_mode target_mode)
5259 {
5260   rtx subtarget;
5261   enum insn_code icode;
5262   enum machine_mode compare_mode;
5263   enum mode_class mclass;
5264   enum rtx_code scode;
5265   rtx tem;
5266
5267   if (unsignedp)
5268     code = unsigned_condition (code);
5269   scode = swap_condition (code);
5270
5271   /* If one operand is constant, make it the second one.  Only do this
5272      if the other operand is not constant as well.  */
5273
5274   if (swap_commutative_operands_p (op0, op1))
5275     {
5276       tem = op0;
5277       op0 = op1;
5278       op1 = tem;
5279       code = swap_condition (code);
5280     }
5281
5282   if (mode == VOIDmode)
5283     mode = GET_MODE (op0);
5284
5285   /* For some comparisons with 1 and -1, we can convert this to
5286      comparisons with zero.  This will often produce more opportunities for
5287      store-flag insns.  */
5288
5289   switch (code)
5290     {
5291     case LT:
5292       if (op1 == const1_rtx)
5293         op1 = const0_rtx, code = LE;
5294       break;
5295     case LE:
5296       if (op1 == constm1_rtx)
5297         op1 = const0_rtx, code = LT;
5298       break;
5299     case GE:
5300       if (op1 == const1_rtx)
5301         op1 = const0_rtx, code = GT;
5302       break;
5303     case GT:
5304       if (op1 == constm1_rtx)
5305         op1 = const0_rtx, code = GE;
5306       break;
5307     case GEU:
5308       if (op1 == const1_rtx)
5309         op1 = const0_rtx, code = NE;
5310       break;
5311     case LTU:
5312       if (op1 == const1_rtx)
5313         op1 = const0_rtx, code = EQ;
5314       break;
5315     default:
5316       break;
5317     }
5318
5319   /* If we are comparing a double-word integer with zero or -1, we can
5320      convert the comparison into one involving a single word.  */
5321   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5322       && GET_MODE_CLASS (mode) == MODE_INT
5323       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5324     {
5325       if ((code == EQ || code == NE)
5326           && (op1 == const0_rtx || op1 == constm1_rtx))
5327         {
5328           rtx op00, op01;
5329
5330           /* Do a logical OR or AND of the two words and compare the
5331              result.  */
5332           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5333           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5334           tem = expand_binop (word_mode,
5335                               op1 == const0_rtx ? ior_optab : and_optab,
5336                               op00, op01, NULL_RTX, unsignedp,
5337                               OPTAB_DIRECT);
5338
5339           if (tem != 0)
5340             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5341                                    unsignedp, normalizep);
5342         }
5343       else if ((code == LT || code == GE) && op1 == const0_rtx)
5344         {
5345           rtx op0h;
5346
5347           /* If testing the sign bit, can just test on high word.  */
5348           op0h = simplify_gen_subreg (word_mode, op0, mode,
5349                                       subreg_highpart_offset (word_mode,
5350                                                               mode));
5351           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5352                                  unsignedp, normalizep);
5353         }
5354       else
5355         tem = NULL_RTX;
5356
5357       if (tem)
5358         {
5359           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5360             return tem;
5361           if (!target)
5362             target = gen_reg_rtx (target_mode);
5363
5364           convert_move (target, tem,
5365                         !val_signbit_known_set_p (word_mode,
5366                                                   (normalizep ? normalizep
5367                                                    : STORE_FLAG_VALUE)));
5368           return target;
5369         }
5370     }
5371
5372   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5373      complement of A (for GE) and shifting the sign bit to the low bit.  */
5374   if (op1 == const0_rtx && (code == LT || code == GE)
5375       && GET_MODE_CLASS (mode) == MODE_INT
5376       && (normalizep || STORE_FLAG_VALUE == 1
5377           || val_signbit_p (mode, STORE_FLAG_VALUE)))
5378     {
5379       subtarget = target;
5380
5381       if (!target)
5382         target_mode = mode;
5383
5384       /* If the result is to be wider than OP0, it is best to convert it
5385          first.  If it is to be narrower, it is *incorrect* to convert it
5386          first.  */
5387       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5388         {
5389           op0 = convert_modes (target_mode, mode, op0, 0);
5390           mode = target_mode;
5391         }
5392
5393       if (target_mode != mode)
5394         subtarget = 0;
5395
5396       if (code == GE)
5397         op0 = expand_unop (mode, one_cmpl_optab, op0,
5398                            ((STORE_FLAG_VALUE == 1 || normalizep)
5399                             ? 0 : subtarget), 0);
5400
5401       if (STORE_FLAG_VALUE == 1 || normalizep)
5402         /* If we are supposed to produce a 0/1 value, we want to do
5403            a logical shift from the sign bit to the low-order bit; for
5404            a -1/0 value, we do an arithmetic shift.  */
5405         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5406                             GET_MODE_BITSIZE (mode) - 1,
5407                             subtarget, normalizep != -1);
5408
5409       if (mode != target_mode)
5410         op0 = convert_modes (target_mode, mode, op0, 0);
5411
5412       return op0;
5413     }
5414
5415   mclass = GET_MODE_CLASS (mode);
5416   for (compare_mode = mode; compare_mode != VOIDmode;
5417        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5418     {
5419      enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5420      icode = optab_handler (cstore_optab, optab_mode);
5421      if (icode != CODE_FOR_nothing)
5422         {
5423           do_pending_stack_adjust ();
5424           tem = emit_cstore (target, icode, code, mode, compare_mode,
5425                              unsignedp, op0, op1, normalizep, target_mode);
5426           if (tem)
5427             return tem;
5428
5429           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5430             {
5431               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5432                                  unsignedp, op1, op0, normalizep, target_mode);
5433               if (tem)
5434                 return tem;
5435             }
5436           break;
5437         }
5438     }
5439
5440   return 0;
5441 }
5442
5443 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5444    and storing in TARGET.  Normally return TARGET.
5445    Return 0 if that cannot be done.
5446
5447    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5448    it is VOIDmode, they cannot both be CONST_INT.
5449
5450    UNSIGNEDP is for the case where we have to widen the operands
5451    to perform the operation.  It says to use zero-extension.
5452
5453    NORMALIZEP is 1 if we should convert the result to be either zero
5454    or one.  Normalize is -1 if we should convert the result to be
5455    either zero or -1.  If NORMALIZEP is zero, the result will be left
5456    "raw" out of the scc insn.  */
5457
5458 rtx
5459 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5460                  enum machine_mode mode, int unsignedp, int normalizep)
5461 {
5462   enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5463   enum rtx_code rcode;
5464   rtx subtarget;
5465   rtx tem, last, trueval;
5466
5467   /* If we compare constants, we shouldn't use a store-flag operation,
5468      but a constant load.  We can get there via the vanilla route that
5469      usually generates a compare-branch sequence, but will in this case
5470      fold the comparison to a constant, and thus elide the branch.  */
5471   if (CONSTANT_P (op0) && CONSTANT_P (op1))
5472     return NULL_RTX;
5473
5474   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5475                            target_mode);
5476   if (tem)
5477     return tem;
5478
5479   /* If we reached here, we can't do this with a scc insn, however there
5480      are some comparisons that can be done in other ways.  Don't do any
5481      of these cases if branches are very cheap.  */
5482   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5483     return 0;
5484
5485   /* See what we need to return.  We can only return a 1, -1, or the
5486      sign bit.  */
5487
5488   if (normalizep == 0)
5489     {
5490       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5491         normalizep = STORE_FLAG_VALUE;
5492
5493       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5494         ;
5495       else
5496         return 0;
5497     }
5498
5499   last = get_last_insn ();
5500
5501   /* If optimizing, use different pseudo registers for each insn, instead
5502      of reusing the same pseudo.  This leads to better CSE, but slows
5503      down the compiler, since there are more pseudos */
5504   subtarget = (!optimize
5505                && (target_mode == mode)) ? target : NULL_RTX;
5506   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5507
5508   /* For floating-point comparisons, try the reverse comparison or try
5509      changing the "orderedness" of the comparison.  */
5510   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5511     {
5512       enum rtx_code first_code;
5513       bool and_them;
5514
5515       rcode = reverse_condition_maybe_unordered (code);
5516       if (can_compare_p (rcode, mode, ccp_store_flag)
5517           && (code == ORDERED || code == UNORDERED
5518               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5519               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5520         {
5521           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5522                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5523
5524           /* For the reverse comparison, use either an addition or a XOR.  */
5525           if (want_add
5526               && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5527                            optimize_insn_for_speed_p ()) == 0)
5528             {
5529               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5530                                        STORE_FLAG_VALUE, target_mode);
5531               if (tem)
5532                 return expand_binop (target_mode, add_optab, tem,
5533                                      gen_int_mode (normalizep, target_mode),
5534                                      target, 0, OPTAB_WIDEN);
5535             }
5536           else if (!want_add
5537                    && rtx_cost (trueval, XOR, 1,
5538                                 optimize_insn_for_speed_p ()) == 0)
5539             {
5540               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5541                                        normalizep, target_mode);
5542               if (tem)
5543                 return expand_binop (target_mode, xor_optab, tem, trueval,
5544                                      target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5545             }
5546         }
5547
5548       delete_insns_since (last);
5549
5550       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5551       if (code == ORDERED || code == UNORDERED)
5552         return 0;
5553
5554       and_them = split_comparison (code, mode, &first_code, &code);
5555
5556       /* If there are no NaNs, the first comparison should always fall through.
5557          Effectively change the comparison to the other one.  */
5558       if (!HONOR_NANS (mode))
5559         {
5560           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5561           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5562                                     target_mode);
5563         }
5564
5565 #ifdef HAVE_conditional_move
5566       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5567          conditional move.  */
5568       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5569                                normalizep, target_mode);
5570       if (tem == 0)
5571         return 0;
5572
5573       if (and_them)
5574         tem = emit_conditional_move (target, code, op0, op1, mode,
5575                                      tem, const0_rtx, GET_MODE (tem), 0);
5576       else
5577         tem = emit_conditional_move (target, code, op0, op1, mode,
5578                                      trueval, tem, GET_MODE (tem), 0);
5579
5580       if (tem == 0)
5581         delete_insns_since (last);
5582       return tem;
5583 #else
5584       return 0;
5585 #endif
5586     }
5587
5588   /* The remaining tricks only apply to integer comparisons.  */
5589
5590   if (GET_MODE_CLASS (mode) != MODE_INT)
5591     return 0;
5592
5593   /* If this is an equality comparison of integers, we can try to exclusive-or
5594      (or subtract) the two operands and use a recursive call to try the
5595      comparison with zero.  Don't do any of these cases if branches are
5596      very cheap.  */
5597
5598   if ((code == EQ || code == NE) && op1 != const0_rtx)
5599     {
5600       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5601                           OPTAB_WIDEN);
5602
5603       if (tem == 0)
5604         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5605                             OPTAB_WIDEN);
5606       if (tem != 0)
5607         tem = emit_store_flag (target, code, tem, const0_rtx,
5608                                mode, unsignedp, normalizep);
5609       if (tem != 0)
5610         return tem;
5611
5612       delete_insns_since (last);
5613     }
5614
5615   /* For integer comparisons, try the reverse comparison.  However, for
5616      small X and if we'd have anyway to extend, implementing "X != 0"
5617      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5618   rcode = reverse_condition (code);
5619   if (can_compare_p (rcode, mode, ccp_store_flag)
5620       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5621             && code == NE
5622             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5623             && op1 == const0_rtx))
5624     {
5625       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5626                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5627
5628       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5629       if (want_add
5630           && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5631                        optimize_insn_for_speed_p ()) == 0)
5632         {
5633           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5634                                    STORE_FLAG_VALUE, target_mode);
5635           if (tem != 0)
5636             tem = expand_binop (target_mode, add_optab, tem,
5637                                 gen_int_mode (normalizep, target_mode),
5638                                 target, 0, OPTAB_WIDEN);
5639         }
5640       else if (!want_add
5641                && rtx_cost (trueval, XOR, 1,
5642                             optimize_insn_for_speed_p ()) == 0)
5643         {
5644           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5645                                    normalizep, target_mode);
5646           if (tem != 0)
5647             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5648                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5649         }
5650
5651       if (tem != 0)
5652         return tem;
5653       delete_insns_since (last);
5654     }
5655
5656   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5657      the constant zero.  Reject all other comparisons at this point.  Only
5658      do LE and GT if branches are expensive since they are expensive on
5659      2-operand machines.  */
5660
5661   if (op1 != const0_rtx
5662       || (code != EQ && code != NE
5663           && (BRANCH_COST (optimize_insn_for_speed_p (),
5664                            false) <= 1 || (code != LE && code != GT))))
5665     return 0;
5666
5667   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5668      do the necessary operation below.  */
5669
5670   tem = 0;
5671
5672   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5673      the sign bit set.  */
5674
5675   if (code == LE)
5676     {
5677       /* This is destructive, so SUBTARGET can't be OP0.  */
5678       if (rtx_equal_p (subtarget, op0))
5679         subtarget = 0;
5680
5681       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5682                           OPTAB_WIDEN);
5683       if (tem)
5684         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5685                             OPTAB_WIDEN);
5686     }
5687
5688   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5689      number of bits in the mode of OP0, minus one.  */
5690
5691   if (code == GT)
5692     {
5693       if (rtx_equal_p (subtarget, op0))
5694         subtarget = 0;
5695
5696       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5697                           GET_MODE_BITSIZE (mode) - 1,
5698                           subtarget, 0);
5699       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5700                           OPTAB_WIDEN);
5701     }
5702
5703   if (code == EQ || code == NE)
5704     {
5705       /* For EQ or NE, one way to do the comparison is to apply an operation
5706          that converts the operand into a positive number if it is nonzero
5707          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5708          for NE we negate.  This puts the result in the sign bit.  Then we
5709          normalize with a shift, if needed.
5710
5711          Two operations that can do the above actions are ABS and FFS, so try
5712          them.  If that doesn't work, and MODE is smaller than a full word,
5713          we can use zero-extension to the wider mode (an unsigned conversion)
5714          as the operation.  */
5715
5716       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5717          that is compensated by the subsequent overflow when subtracting
5718          one / negating.  */
5719
5720       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5721         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5722       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5723         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5724       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5725         {
5726           tem = convert_modes (word_mode, mode, op0, 1);
5727           mode = word_mode;
5728         }
5729
5730       if (tem != 0)
5731         {
5732           if (code == EQ)
5733             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5734                                 0, OPTAB_WIDEN);
5735           else
5736             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5737         }
5738
5739       /* If we couldn't do it that way, for NE we can "or" the two's complement
5740          of the value with itself.  For EQ, we take the one's complement of
5741          that "or", which is an extra insn, so we only handle EQ if branches
5742          are expensive.  */
5743
5744       if (tem == 0
5745           && (code == NE
5746               || BRANCH_COST (optimize_insn_for_speed_p (),
5747                               false) > 1))
5748         {
5749           if (rtx_equal_p (subtarget, op0))
5750             subtarget = 0;
5751
5752           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5753           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5754                               OPTAB_WIDEN);
5755
5756           if (tem && code == EQ)
5757             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5758         }
5759     }
5760
5761   if (tem && normalizep)
5762     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5763                         GET_MODE_BITSIZE (mode) - 1,
5764                         subtarget, normalizep == 1);
5765
5766   if (tem)
5767     {
5768       if (!target)
5769         ;
5770       else if (GET_MODE (tem) != target_mode)
5771         {
5772           convert_move (target, tem, 0);
5773           tem = target;
5774         }
5775       else if (!subtarget)
5776         {
5777           emit_move_insn (target, tem);
5778           tem = target;
5779         }
5780     }
5781   else
5782     delete_insns_since (last);
5783
5784   return tem;
5785 }
5786
5787 /* Like emit_store_flag, but always succeeds.  */
5788
5789 rtx
5790 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5791                        enum machine_mode mode, int unsignedp, int normalizep)
5792 {
5793   rtx tem, label;
5794   rtx trueval, falseval;
5795
5796   /* First see if emit_store_flag can do the job.  */
5797   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5798   if (tem != 0)
5799     return tem;
5800
5801   if (!target)
5802     target = gen_reg_rtx (word_mode);
5803
5804   /* If this failed, we have to do this with set/compare/jump/set code.
5805      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5806   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5807   if (code == NE
5808       && GET_MODE_CLASS (mode) == MODE_INT
5809       && REG_P (target)
5810       && op0 == target
5811       && op1 == const0_rtx)
5812     {
5813       label = gen_label_rtx ();
5814       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5815                                mode, NULL_RTX, NULL_RTX, label, -1);
5816       emit_move_insn (target, trueval);
5817       emit_label (label);
5818       return target;
5819     }
5820
5821   if (!REG_P (target)
5822       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5823     target = gen_reg_rtx (GET_MODE (target));
5824
5825   /* Jump in the right direction if the target cannot implement CODE
5826      but can jump on its reverse condition.  */
5827   falseval = const0_rtx;
5828   if (! can_compare_p (code, mode, ccp_jump)
5829       && (! FLOAT_MODE_P (mode)
5830           || code == ORDERED || code == UNORDERED
5831           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5832           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5833     {
5834       enum rtx_code rcode;
5835       if (FLOAT_MODE_P (mode))
5836         rcode = reverse_condition_maybe_unordered (code);
5837       else
5838         rcode = reverse_condition (code);
5839
5840       /* Canonicalize to UNORDERED for the libcall.  */
5841       if (can_compare_p (rcode, mode, ccp_jump)
5842           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5843         {
5844           falseval = trueval;
5845           trueval = const0_rtx;
5846           code = rcode;
5847         }
5848     }
5849
5850   emit_move_insn (target, trueval);
5851   label = gen_label_rtx ();
5852   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5853                            NULL_RTX, label, -1);
5854
5855   emit_move_insn (target, falseval);
5856   emit_label (label);
5857
5858   return target;
5859 }
5860 \f
5861 /* Perform possibly multi-word comparison and conditional jump to LABEL
5862    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5863    now a thin wrapper around do_compare_rtx_and_jump.  */
5864
5865 static void
5866 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5867                  rtx label)
5868 {
5869   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5870   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5871                            NULL_RTX, NULL_RTX, label, -1);
5872 }