gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
   5    2011, 2012
   6    Free Software Foundation, Inc.
   7
   8 This file is part of GCC.
   9
  10 GCC is free software; you can redistribute it and/or modify it under
  11 the terms of the GNU General Public License as published by the Free
  12 Software Foundation; either version 3, or (at your option) any later
  13 version.
  14
  15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  17 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  18 for more details.
  19
  20 You should have received a copy of the GNU General Public License
  21 along with GCC; see the file COPYING3.  If not see
  22 <http://www.gnu.org/licenses/>.  */
  23
  24
  25 #include "config.h"
  26 #include "system.h"
  27 #include "coretypes.h"
  28 #include "tm.h"
  29 #include "diagnostic-core.h"
  30 #include "rtl.h"
  31 #include "tree.h"
  32 #include "tm_p.h"
  33 #include "flags.h"
  34 #include "insn-config.h"
  35 #include "expr.h"
  36 #include "optabs.h"
  37 #include "recog.h"
  38 #include "langhooks.h"
  39 #include "df.h"
  40 #include "target.h"
  41 #include "expmed.h"
  42
  43 struct target_expmed default_target_expmed;
  44 #if SWITCHABLE_TARGET
  45 struct target_expmed *this_target_expmed = &default_target_expmed;
  46 #endif
  47
  48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  49                                    unsigned HOST_WIDE_INT,
  50                                    unsigned HOST_WIDE_INT,
  51                                    unsigned HOST_WIDE_INT,
  52                                    rtx);
  53 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  54                                    unsigned HOST_WIDE_INT,
  55                                    unsigned HOST_WIDE_INT,
  56                                    unsigned HOST_WIDE_INT,
  57                                    rtx);
  58 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  59                                     unsigned HOST_WIDE_INT,
  60                                     unsigned HOST_WIDE_INT, rtx, int, bool);
  61 static rtx mask_rtx (enum machine_mode, int, int, int);
  62 static rtx lshift_value (enum machine_mode, rtx, int, int);
  63 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  64                                     unsigned HOST_WIDE_INT, int);
  65 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  66 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  67 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  68
  69 /* Test whether a value is zero of a power of two.  */
  70 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
  71
  72 struct init_expmed_rtl
  73 {
  74   struct rtx_def reg;           rtunion reg_fld[2];
  75   struct rtx_def plus;  rtunion plus_fld1;
  76   struct rtx_def neg;
  77   struct rtx_def mult;  rtunion mult_fld1;
  78   struct rtx_def sdiv;  rtunion sdiv_fld1;
  79   struct rtx_def udiv;  rtunion udiv_fld1;
  80   struct rtx_def sdiv_32;       rtunion sdiv_32_fld1;
  81   struct rtx_def smod_32;       rtunion smod_32_fld1;
  82   struct rtx_def wide_mult;     rtunion wide_mult_fld1;
  83   struct rtx_def wide_lshr;     rtunion wide_lshr_fld1;
  84   struct rtx_def wide_trunc;
  85   struct rtx_def shift; rtunion shift_fld1;
  86   struct rtx_def shift_mult;    rtunion shift_mult_fld1;
  87   struct rtx_def shift_add;     rtunion shift_add_fld1;
  88   struct rtx_def shift_sub0;    rtunion shift_sub0_fld1;
  89   struct rtx_def shift_sub1;    rtunion shift_sub1_fld1;
  90   struct rtx_def zext;
  91   struct rtx_def trunc;
  92
  93   rtx pow2[MAX_BITS_PER_WORD];
  94   rtx cint[MAX_BITS_PER_WORD];
  95 };
  96
  97 static void
  98 init_expmed_one_conv (struct init_expmed_rtl *all, enum machine_mode to_mode,
  99                       enum machine_mode from_mode, bool speed)
 100 {
 101   int to_size, from_size;
 102   rtx which;
 103
 104   /* We're given no information about the true size of a partial integer,
 105      only the size of the "full" integer it requires for storage.  For
 106      comparison purposes here, reduce the bit size by one in that case.  */
 107   to_size = (GET_MODE_BITSIZE (to_mode)
 108              - (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT));
 109   from_size = (GET_MODE_BITSIZE (from_mode)
 110                - (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT));
 111
 112   /* Assume cost of zero-extend and sign-extend is the same.  */
 113   which = (to_size < from_size ? &all->trunc : &all->zext);
 114
 115   PUT_MODE (&all->reg, from_mode);
 116   set_convert_cost (to_mode, from_mode, speed, set_src_cost (which, speed));
 117 }
 118
 119 static void
 120 init_expmed_one_mode (struct init_expmed_rtl *all,
 121                       enum machine_mode mode, int speed)
 122 {
 123   int m, n, mode_bitsize;
 124   enum machine_mode mode_from;
 125
 126   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 127
 128   PUT_MODE (&all->reg, mode);
 129   PUT_MODE (&all->plus, mode);
 130   PUT_MODE (&all->neg, mode);
 131   PUT_MODE (&all->mult, mode);
 132   PUT_MODE (&all->sdiv, mode);
 133   PUT_MODE (&all->udiv, mode);
 134   PUT_MODE (&all->sdiv_32, mode);
 135   PUT_MODE (&all->smod_32, mode);
 136   PUT_MODE (&all->wide_trunc, mode);
 137   PUT_MODE (&all->shift, mode);
 138   PUT_MODE (&all->shift_mult, mode);
 139   PUT_MODE (&all->shift_add, mode);
 140   PUT_MODE (&all->shift_sub0, mode);
 141   PUT_MODE (&all->shift_sub1, mode);
 142   PUT_MODE (&all->zext, mode);
 143   PUT_MODE (&all->trunc, mode);
 144
 145   set_add_cost (speed, mode, set_src_cost (&all->plus, speed));
 146   set_neg_cost (speed, mode, set_src_cost (&all->neg, speed));
 147   set_mul_cost (speed, mode, set_src_cost (&all->mult, speed));
 148   set_sdiv_cost (speed, mode, set_src_cost (&all->sdiv, speed));
 149   set_udiv_cost (speed, mode, set_src_cost (&all->udiv, speed));
 150
 151   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (&all->sdiv_32, speed)
 152                                      <= 2 * add_cost (speed, mode)));
 153   set_smod_pow2_cheap (speed, mode, (set_src_cost (&all->smod_32, speed)
 154                                      <= 4 * add_cost (speed, mode)));
 155
 156   set_shift_cost (speed, mode, 0, 0);
 157   {
 158     int cost = add_cost (speed, mode);
 159     set_shiftadd_cost (speed, mode, 0, cost);
 160     set_shiftsub0_cost (speed, mode, 0, cost);
 161     set_shiftsub1_cost (speed, mode, 0, cost);
 162   }
 163
 164   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 165   for (m = 1; m < n; m++)
 166     {
 167       XEXP (&all->shift, 1) = all->cint[m];
 168       XEXP (&all->shift_mult, 1) = all->pow2[m];
 169
 170       set_shift_cost (speed, mode, m, set_src_cost (&all->shift, speed));
 171       set_shiftadd_cost (speed, mode, m, set_src_cost (&all->shift_add, speed));
 172       set_shiftsub0_cost (speed, mode, m, set_src_cost (&all->shift_sub0, speed));
 173       set_shiftsub1_cost (speed, mode, m, set_src_cost (&all->shift_sub1, speed));
 174     }
 175
 176   if (SCALAR_INT_MODE_P (mode))
 177     {
 178       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 179            mode_from = (enum machine_mode)(mode_from + 1))
 180         init_expmed_one_conv (all, mode, mode_from, speed);
 181     }
 182   if (GET_MODE_CLASS (mode) == MODE_INT)
 183     {
 184       enum machine_mode  wider_mode = GET_MODE_WIDER_MODE (mode);
 185       if (wider_mode != VOIDmode)
 186         {
 187           PUT_MODE (&all->zext, wider_mode);
 188           PUT_MODE (&all->wide_mult, wider_mode);
 189           PUT_MODE (&all->wide_lshr, wider_mode);
 190           XEXP (&all->wide_lshr, 1) = GEN_INT (mode_bitsize);
 191
 192           set_mul_widen_cost (speed, wider_mode,
 193                               set_src_cost (&all->wide_mult, speed));
 194           set_mul_highpart_cost (speed, mode,
 195                                  set_src_cost (&all->wide_trunc, speed));
 196         }
 197     }
 198 }
 199
 200 void
 201 init_expmed (void)
 202 {
 203   struct init_expmed_rtl all;
 204   enum machine_mode mode;
 205   int m, speed;
 206
 207   memset (&all, 0, sizeof all);
 208   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 209     {
 210       all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 211       all.cint[m] = GEN_INT (m);
 212     }
 213
 214   PUT_CODE (&all.reg, REG);
 215   /* Avoid using hard regs in ways which may be unsupported.  */
 216   SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
 217
 218   PUT_CODE (&all.plus, PLUS);
 219   XEXP (&all.plus, 0) = &all.reg;
 220   XEXP (&all.plus, 1) = &all.reg;
 221
 222   PUT_CODE (&all.neg, NEG);
 223   XEXP (&all.neg, 0) = &all.reg;
 224
 225   PUT_CODE (&all.mult, MULT);
 226   XEXP (&all.mult, 0) = &all.reg;
 227   XEXP (&all.mult, 1) = &all.reg;
 228
 229   PUT_CODE (&all.sdiv, DIV);
 230   XEXP (&all.sdiv, 0) = &all.reg;
 231   XEXP (&all.sdiv, 1) = &all.reg;
 232
 233   PUT_CODE (&all.udiv, UDIV);
 234   XEXP (&all.udiv, 0) = &all.reg;
 235   XEXP (&all.udiv, 1) = &all.reg;
 236
 237   PUT_CODE (&all.sdiv_32, DIV);
 238   XEXP (&all.sdiv_32, 0) = &all.reg;
 239   XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? all.cint[32] : GEN_INT (32);
 240
 241   PUT_CODE (&all.smod_32, MOD);
 242   XEXP (&all.smod_32, 0) = &all.reg;
 243   XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
 244
 245   PUT_CODE (&all.zext, ZERO_EXTEND);
 246   XEXP (&all.zext, 0) = &all.reg;
 247
 248   PUT_CODE (&all.wide_mult, MULT);
 249   XEXP (&all.wide_mult, 0) = &all.zext;
 250   XEXP (&all.wide_mult, 1) = &all.zext;
 251
 252   PUT_CODE (&all.wide_lshr, LSHIFTRT);
 253   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
 254
 255   PUT_CODE (&all.wide_trunc, TRUNCATE);
 256   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
 257
 258   PUT_CODE (&all.shift, ASHIFT);
 259   XEXP (&all.shift, 0) = &all.reg;
 260
 261   PUT_CODE (&all.shift_mult, MULT);
 262   XEXP (&all.shift_mult, 0) = &all.reg;
 263
 264   PUT_CODE (&all.shift_add, PLUS);
 265   XEXP (&all.shift_add, 0) = &all.shift_mult;
 266   XEXP (&all.shift_add, 1) = &all.reg;
 267
 268   PUT_CODE (&all.shift_sub0, MINUS);
 269   XEXP (&all.shift_sub0, 0) = &all.shift_mult;
 270   XEXP (&all.shift_sub0, 1) = &all.reg;
 271
 272   PUT_CODE (&all.shift_sub1, MINUS);
 273   XEXP (&all.shift_sub1, 0) = &all.reg;
 274   XEXP (&all.shift_sub1, 1) = &all.shift_mult;
 275
 276   PUT_CODE (&all.trunc, TRUNCATE);
 277   XEXP (&all.trunc, 0) = &all.reg;
 278
 279   for (speed = 0; speed < 2; speed++)
 280     {
 281       crtl->maybe_hot_insn_p = speed;
 282       set_zero_cost (speed, set_src_cost (const0_rtx, speed));
 283
 284       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 285            mode = (enum machine_mode)(mode + 1))
 286         init_expmed_one_mode (&all, mode, speed);
 287
 288       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 289         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 290              mode = (enum machine_mode)(mode + 1))
 291           init_expmed_one_mode (&all, mode, speed);
 292
 293       if (MIN_MODE_VECTOR_INT != VOIDmode)
 294         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 295              mode = (enum machine_mode)(mode + 1))
 296           init_expmed_one_mode (&all, mode, speed);
 297     }
 298
 299   if (alg_hash_used_p ())
 300     {
 301       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 302       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 303     }
 304   else
 305     set_alg_hash_used_p (true);
 306   default_rtl_profile ();
 307 }
 308
 309 /* Return an rtx representing minus the value of X.
 310    MODE is the intended mode of the result,
 311    useful if X is a CONST_INT.  */
 312
 313 rtx
 314 negate_rtx (enum machine_mode mode, rtx x)
 315 {
 316   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 317
 318   if (result == 0)
 319     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 320
 321   return result;
 322 }
 323
 324 /* Adjust bitfield memory MEM so that it points to the first unit of mode
 325    MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
 326    If MODE is BLKmode, return a reference to every byte in the bitfield.
 327    Set *NEW_BITNUM to the bit position of the field within the new memory.  */
 328
 329 static rtx
 330 narrow_bit_field_mem (rtx mem, enum machine_mode mode,
 331                       unsigned HOST_WIDE_INT bitsize,
 332                       unsigned HOST_WIDE_INT bitnum,
 333                       unsigned HOST_WIDE_INT *new_bitnum)
 334 {
 335   if (mode == BLKmode)
 336     {
 337       *new_bitnum = bitnum % BITS_PER_UNIT;
 338       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 339       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 340                             / BITS_PER_UNIT);
 341       return adjust_bitfield_address_size (mem, mode, offset, size);
 342     }
 343   else
 344     {
 345       unsigned int unit = GET_MODE_BITSIZE (mode);
 346       *new_bitnum = bitnum % unit;
 347       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 348       return adjust_bitfield_address (mem, mode, offset);
 349     }
 350 }
 351
 352 /* The caller wants to perform insertion or extraction PATTERN on a
 353    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 354    BITREGION_START and BITREGION_END are as for store_bit_field
 355    and FIELDMODE is the natural mode of the field.
 356
 357    Search for a mode that is compatible with the memory access
 358    restrictions and (where applicable) with a register insertion or
 359    extraction.  Return the new memory on success, storing the adjusted
 360    bit position in *NEW_BITNUM.  Return null otherwise.  */
 361
 362 static rtx
 363 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 364                               rtx op0, HOST_WIDE_INT bitsize,
 365                               HOST_WIDE_INT bitnum,
 366                               unsigned HOST_WIDE_INT bitregion_start,
 367                               unsigned HOST_WIDE_INT bitregion_end,
 368                               enum machine_mode fieldmode,
 369                               unsigned HOST_WIDE_INT *new_bitnum)
 370 {
 371   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 372                                 bitregion_end, MEM_ALIGN (op0),
 373                                 MEM_VOLATILE_P (op0));
 374   enum machine_mode best_mode;
 375   if (iter.next_mode (&best_mode))
 376     {
 377       /* We can use a memory in BEST_MODE.  See whether this is true for
 378          any wider modes.  All other things being equal, we prefer to
 379          use the widest mode possible because it tends to expose more
 380          CSE opportunities.  */
 381       if (!iter.prefer_smaller_modes ())
 382         {
 383           /* Limit the search to the mode required by the corresponding
 384              register insertion or extraction instruction, if any.  */
 385           enum machine_mode limit_mode = word_mode;
 386           extraction_insn insn;
 387           if (get_best_reg_extraction_insn (&insn, pattern,
 388                                             GET_MODE_BITSIZE (best_mode),
 389                                             fieldmode))
 390             limit_mode = insn.field_mode;
 391
 392           enum machine_mode wider_mode;
 393           while (iter.next_mode (&wider_mode)
 394                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 395             best_mode = wider_mode;
 396         }
 397       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 398                                    new_bitnum);
 399     }
 400   return NULL_RTX;
 401 }
 402
 403 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 404    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 405    offset is then BITNUM / BITS_PER_UNIT.  */
 406
 407 static bool
 408 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
 409                      unsigned HOST_WIDE_INT bitsize,
 410                      enum machine_mode struct_mode)
 411 {
 412   if (BYTES_BIG_ENDIAN)
 413     return (bitnum % BITS_PER_UNIT == 0
 414             && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
 415                 || (bitnum + bitsize) % BITS_PER_WORD == 0));
 416   else
 417     return bitnum % BITS_PER_WORD == 0;
 418 }
 419 \f
 420 /* Try to use instruction INSV to store VALUE into a field of OP0.
 421    BITSIZE and BITNUM are as for store_bit_field.  */
 422
 423 static bool
 424 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 425                             unsigned HOST_WIDE_INT bitsize,
 426                             unsigned HOST_WIDE_INT bitnum, rtx value)
 427 {
 428   struct expand_operand ops[4];
 429   rtx value1;
 430   rtx xop0 = op0;
 431   rtx last = get_last_insn ();
 432   bool copy_back = false;
 433
 434   enum machine_mode op_mode = insv->field_mode;
 435   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 436   if (bitsize == 0 || bitsize > unit)
 437     return false;
 438
 439   if (MEM_P (xop0))
 440     /* Get a reference to the first byte of the field.  */
 441     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 442                                  &bitnum);
 443   else
 444     {
 445       /* Convert from counting within OP0 to counting in OP_MODE.  */
 446       if (BYTES_BIG_ENDIAN)
 447         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 448
 449       /* If xop0 is a register, we need it in OP_MODE
 450          to make it acceptable to the format of insv.  */
 451       if (GET_CODE (xop0) == SUBREG)
 452         /* We can't just change the mode, because this might clobber op0,
 453            and we will need the original value of op0 if insv fails.  */
 454         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 455       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 456         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 457     }
 458
 459   /* If the destination is a paradoxical subreg such that we need a
 460      truncate to the inner mode, perform the insertion on a temporary and
 461      truncate the result to the original destination.  Note that we can't
 462      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 463      X) 0)) is (reg:N X).  */
 464   if (GET_CODE (xop0) == SUBREG
 465       && REG_P (SUBREG_REG (xop0))
 466       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 467                                          op_mode))
 468     {
 469       rtx tem = gen_reg_rtx (op_mode);
 470       emit_move_insn (tem, xop0);
 471       xop0 = tem;
 472       copy_back = true;
 473     }
 474
 475   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 476      "backwards" from the size of the unit we are inserting into.
 477      Otherwise, we count bits from the most significant on a
 478      BYTES/BITS_BIG_ENDIAN machine.  */
 479
 480   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 481     bitnum = unit - bitsize - bitnum;
 482
 483   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 484   value1 = value;
 485   if (GET_MODE (value) != op_mode)
 486     {
 487       if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 488         {
 489           /* Optimization: Don't bother really extending VALUE
 490              if it has all the bits we will actually use.  However,
 491              if we must narrow it, be sure we do it correctly.  */
 492
 493           if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 494             {
 495               rtx tmp;
 496
 497               tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 498               if (! tmp)
 499                 tmp = simplify_gen_subreg (op_mode,
 500                                            force_reg (GET_MODE (value),
 501                                                       value1),
 502                                            GET_MODE (value), 0);
 503               value1 = tmp;
 504             }
 505           else
 506             value1 = gen_lowpart (op_mode, value1);
 507         }
 508       else if (CONST_INT_P (value))
 509         value1 = gen_int_mode (INTVAL (value), op_mode);
 510       else
 511         /* Parse phase is supposed to make VALUE's data type
 512            match that of the component reference, which is a type
 513            at least as wide as the field; so VALUE should have
 514            a mode that corresponds to that type.  */
 515         gcc_assert (CONSTANT_P (value));
 516     }
 517
 518   create_fixed_operand (&ops[0], xop0);
 519   create_integer_operand (&ops[1], bitsize);
 520   create_integer_operand (&ops[2], bitnum);
 521   create_input_operand (&ops[3], value1, op_mode);
 522   if (maybe_expand_insn (insv->icode, 4, ops))
 523     {
 524       if (copy_back)
 525         convert_move (op0, xop0, true);
 526       return true;
 527     }
 528   delete_insns_since (last);
 529   return false;
 530 }
 531
 532 /* A subroutine of store_bit_field, with the same arguments.  Return true
 533    if the operation could be implemented.
 534
 535    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 536    no other way of implementing the operation.  If FALLBACK_P is false,
 537    return false instead.  */
 538
 539 static bool
 540 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 541                    unsigned HOST_WIDE_INT bitnum,
 542                    unsigned HOST_WIDE_INT bitregion_start,
 543                    unsigned HOST_WIDE_INT bitregion_end,
 544                    enum machine_mode fieldmode,
 545                    rtx value, bool fallback_p)
 546 {
 547   rtx op0 = str_rtx;
 548   rtx orig_value;
 549
 550   while (GET_CODE (op0) == SUBREG)
 551     {
 552       /* The following line once was done only if WORDS_BIG_ENDIAN,
 553          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 554          meaningful at a much higher level; when structures are copied
 555          between memory and regs, the higher-numbered regs
 556          always get higher addresses.  */
 557       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 558       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 559       int byte_offset = 0;
 560
 561       /* Paradoxical subregs need special handling on big endian machines.  */
 562       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 563         {
 564           int difference = inner_mode_size - outer_mode_size;
 565
 566           if (WORDS_BIG_ENDIAN)
 567             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 568           if (BYTES_BIG_ENDIAN)
 569             byte_offset += difference % UNITS_PER_WORD;
 570         }
 571       else
 572         byte_offset = SUBREG_BYTE (op0);
 573
 574       bitnum += byte_offset * BITS_PER_UNIT;
 575       op0 = SUBREG_REG (op0);
 576     }
 577
 578   /* No action is needed if the target is a register and if the field
 579      lies completely outside that register.  This can occur if the source
 580      code contains an out-of-bounds access to a small array.  */
 581   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 582     return true;
 583
 584   /* Use vec_set patterns for inserting parts of vectors whenever
 585      available.  */
 586   if (VECTOR_MODE_P (GET_MODE (op0))
 587       && !MEM_P (op0)
 588       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
 589       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 590       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 591       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 592     {
 593       struct expand_operand ops[3];
 594       enum machine_mode outermode = GET_MODE (op0);
 595       enum machine_mode innermode = GET_MODE_INNER (outermode);
 596       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 597       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 598
 599       create_fixed_operand (&ops[0], op0);
 600       create_input_operand (&ops[1], value, innermode);
 601       create_integer_operand (&ops[2], pos);
 602       if (maybe_expand_insn (icode, 3, ops))
 603         return true;
 604     }
 605
 606   /* If the target is a register, overwriting the entire object, or storing
 607      a full-word or multi-word field can be done with just a SUBREG.  */
 608   if (!MEM_P (op0)
 609       && bitsize == GET_MODE_BITSIZE (fieldmode)
 610       && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
 611           || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
 612     {
 613       /* Use the subreg machinery either to narrow OP0 to the required
 614          words or to cope with mode punning between equal-sized modes.  */
 615       rtx sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 616                                      bitnum / BITS_PER_UNIT);
 617       if (sub)
 618         {
 619           emit_move_insn (sub, value);
 620           return true;
 621         }
 622     }
 623
 624   /* If the target is memory, storing any naturally aligned field can be
 625      done with a simple store.  For targets that support fast unaligned
 626      memory, any naturally sized, unit aligned field can be done directly.  */
 627   if (MEM_P (op0)
 628       && bitnum % BITS_PER_UNIT == 0
 629       && bitsize == GET_MODE_BITSIZE (fieldmode)
 630       && (!SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 631           || (bitnum % bitsize == 0
 632               && MEM_ALIGN (op0) % bitsize == 0)))
 633     {
 634       op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
 635       emit_move_insn (op0, value);
 636       return true;
 637     }
 638
 639   /* Make sure we are playing with integral modes.  Pun with subregs
 640      if we aren't.  This must come after the entire register case above,
 641      since that case is valid for any mode.  The following cases are only
 642      valid for integral modes.  */
 643   {
 644     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 645     if (imode != GET_MODE (op0))
 646       {
 647         if (MEM_P (op0))
 648           op0 = adjust_bitfield_address (op0, imode, 0);
 649         else
 650           {
 651             gcc_assert (imode != BLKmode);
 652             op0 = gen_lowpart (imode, op0);
 653           }
 654       }
 655   }
 656
 657   /* Storing an lsb-aligned field in a register
 658      can be done with a movstrict instruction.  */
 659
 660   if (!MEM_P (op0)
 661       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
 662       && bitsize == GET_MODE_BITSIZE (fieldmode)
 663       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 664     {
 665       struct expand_operand ops[2];
 666       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 667       rtx arg0 = op0;
 668       unsigned HOST_WIDE_INT subreg_off;
 669
 670       if (GET_CODE (arg0) == SUBREG)
 671         {
 672           /* Else we've got some float mode source being extracted into
 673              a different float mode destination -- this combination of
 674              subregs results in Severe Tire Damage.  */
 675           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 676                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 677                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 678           arg0 = SUBREG_REG (arg0);
 679         }
 680
 681       subreg_off = bitnum / BITS_PER_UNIT;
 682       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 683         {
 684           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 685
 686           create_fixed_operand (&ops[0], arg0);
 687           /* Shrink the source operand to FIELDMODE.  */
 688           create_convert_operand_to (&ops[1], value, fieldmode, false);
 689           if (maybe_expand_insn (icode, 2, ops))
 690             return true;
 691         }
 692     }
 693
 694   /* Handle fields bigger than a word.  */
 695
 696   if (bitsize > BITS_PER_WORD)
 697     {
 698       /* Here we transfer the words of the field
 699          in the order least significant first.
 700          This is because the most significant word is the one which may
 701          be less than full.
 702          However, only do that if the value is not BLKmode.  */
 703
 704       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 705       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 706       unsigned int i;
 707       rtx last;
 708
 709       /* This is the mode we must force value to, so that there will be enough
 710          subwords to extract.  Note that fieldmode will often (always?) be
 711          VOIDmode, because that is what store_field uses to indicate that this
 712          is a bit field, but passing VOIDmode to operand_subword_force
 713          is not allowed.  */
 714       fieldmode = GET_MODE (value);
 715       if (fieldmode == VOIDmode)
 716         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 717
 718       last = get_last_insn ();
 719       for (i = 0; i < nwords; i++)
 720         {
 721           /* If I is 0, use the low-order word in both field and target;
 722              if I is 1, use the next to lowest word; and so on.  */
 723           unsigned int wordnum = (backwards
 724                                   ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
 725                                   - i - 1
 726                                   : i);
 727           unsigned int bit_offset = (backwards
 728                                      ? MAX ((int) bitsize - ((int) i + 1)
 729                                             * BITS_PER_WORD,
 730                                             0)
 731                                      : (int) i * BITS_PER_WORD);
 732           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 733           unsigned HOST_WIDE_INT new_bitsize =
 734             MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 735
 736           /* If the remaining chunk doesn't have full wordsize we have
 737              to make sure that for big endian machines the higher order
 738              bits are used.  */
 739           if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
 740             value_word = simplify_expand_binop (word_mode, lshr_optab,
 741                                                 value_word,
 742                                                 GEN_INT (BITS_PER_WORD
 743                                                          - new_bitsize),
 744                                                 NULL_RTX, true,
 745                                                 OPTAB_LIB_WIDEN);
 746
 747           if (!store_bit_field_1 (op0, new_bitsize,
 748                                   bitnum + bit_offset,
 749                                   bitregion_start, bitregion_end,
 750                                   word_mode,
 751                                   value_word, fallback_p))
 752             {
 753               delete_insns_since (last);
 754               return false;
 755             }
 756         }
 757       return true;
 758     }
 759
 760   /* If VALUE has a floating-point or complex mode, access it as an
 761      integer of the corresponding size.  This can occur on a machine
 762      with 64 bit registers that uses SFmode for float.  It can also
 763      occur for unaligned float or complex fields.  */
 764   orig_value = value;
 765   if (GET_MODE (value) != VOIDmode
 766       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 767       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 768     {
 769       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 770       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 771     }
 772
 773   /* If OP0 is a multi-word register, narrow it to the affected word.
 774      If the region spans two words, defer to store_split_bit_field.  */
 775   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 776     {
 777       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
 778                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
 779       gcc_assert (op0);
 780       bitnum %= BITS_PER_WORD;
 781       if (bitnum + bitsize > BITS_PER_WORD)
 782         {
 783           if (!fallback_p)
 784             return false;
 785
 786           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
 787                                  bitregion_end, value);
 788           return true;
 789         }
 790     }
 791
 792   /* From here on we can assume that the field to be stored in fits
 793      within a word.  If the destination is a register, it too fits
 794      in a word.  */
 795
 796   extraction_insn insv;
 797   if (!MEM_P (op0)
 798       && get_best_reg_extraction_insn (&insv, EP_insv,
 799                                        GET_MODE_BITSIZE (GET_MODE (op0)),
 800                                        fieldmode)
 801       && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 802     return true;
 803
 804   /* If OP0 is a memory, try copying it to a register and seeing if a
 805      cheap register alternative is available.  */
 806   if (MEM_P (op0))
 807     {
 808       /* Do not use unaligned memory insvs for volatile bitfields when
 809          -fstrict-volatile-bitfields is in effect.  */
 810       if (!(MEM_VOLATILE_P (op0)
 811             && flag_strict_volatile_bitfields > 0)
 812           && get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
 813                                            fieldmode)
 814           && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 815         return true;
 816
 817       rtx last = get_last_insn ();
 818
 819       /* Try loading part of OP0 into a register, inserting the bitfield
 820          into that, and then copying the result back to OP0.  */
 821       unsigned HOST_WIDE_INT bitpos;
 822       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
 823                                                bitregion_start, bitregion_end,
 824                                                fieldmode, &bitpos);
 825       if (xop0)
 826         {
 827           rtx tempreg = copy_to_reg (xop0);
 828           if (store_bit_field_1 (tempreg, bitsize, bitpos,
 829                                  bitregion_start, bitregion_end,
 830                                  fieldmode, orig_value, false))
 831             {
 832               emit_move_insn (xop0, tempreg);
 833               return true;
 834             }
 835           delete_insns_since (last);
 836         }
 837     }
 838
 839   if (!fallback_p)
 840     return false;
 841
 842   store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
 843                          bitregion_end, value);
 844   return true;
 845 }
 846
 847 /* Generate code to store value from rtx VALUE
 848    into a bit-field within structure STR_RTX
 849    containing BITSIZE bits starting at bit BITNUM.
 850
 851    BITREGION_START is bitpos of the first bitfield in this region.
 852    BITREGION_END is the bitpos of the ending bitfield in this region.
 853    These two fields are 0, if the C++ memory model does not apply,
 854    or we are not interested in keeping track of bitfield regions.
 855
 856    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.  */
 857
 858 void
 859 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 860                  unsigned HOST_WIDE_INT bitnum,
 861                  unsigned HOST_WIDE_INT bitregion_start,
 862                  unsigned HOST_WIDE_INT bitregion_end,
 863                  enum machine_mode fieldmode,
 864                  rtx value)
 865 {
 866   /* Under the C++0x memory model, we must not touch bits outside the
 867      bit region.  Adjust the address to start at the beginning of the
 868      bit region.  */
 869   if (MEM_P (str_rtx) && bitregion_start > 0)
 870     {
 871       enum machine_mode bestmode;
 872       unsigned HOST_WIDE_INT offset;
 873
 874       gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
 875
 876       offset = bitregion_start / BITS_PER_UNIT;
 877       bitnum -= bitregion_start;
 878       bitregion_end -= bitregion_start;
 879       bitregion_start = 0;
 880       bestmode = get_best_mode (bitsize, bitnum,
 881                                 bitregion_start, bitregion_end,
 882                                 MEM_ALIGN (str_rtx), VOIDmode,
 883                                 MEM_VOLATILE_P (str_rtx));
 884       str_rtx = adjust_address (str_rtx, bestmode, offset);
 885     }
 886
 887   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
 888                           bitregion_start, bitregion_end,
 889                           fieldmode, value, true))
 890     gcc_unreachable ();
 891 }
 892 \f
 893 /* Use shifts and boolean operations to store VALUE into a bit field of
 894    width BITSIZE in OP0, starting at bit BITNUM.  */
 895
 896 static void
 897 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
 898                        unsigned HOST_WIDE_INT bitnum,
 899                        unsigned HOST_WIDE_INT bitregion_start,
 900                        unsigned HOST_WIDE_INT bitregion_end,
 901                        rtx value)
 902 {
 903   enum machine_mode mode;
 904   rtx temp;
 905   int all_zero = 0;
 906   int all_one = 0;
 907
 908   /* There is a case not handled here:
 909      a structure with a known alignment of just a halfword
 910      and a field split across two aligned halfwords within the structure.
 911      Or likewise a structure with a known alignment of just a byte
 912      and a field split across two bytes.
 913      Such cases are not supposed to be able to occur.  */
 914
 915   if (MEM_P (op0))
 916     {
 917       unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
 918
 919       if (bitregion_end)
 920         maxbits = bitregion_end - bitregion_start + 1;
 921
 922       /* Get the proper mode to use for this field.  We want a mode that
 923          includes the entire field.  If such a mode would be larger than
 924          a word, we won't be doing the extraction the normal way.
 925          We don't want a mode bigger than the destination.  */
 926
 927       mode = GET_MODE (op0);
 928       if (GET_MODE_BITSIZE (mode) == 0
 929           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 930         mode = word_mode;
 931
 932       if (MEM_VOLATILE_P (op0)
 933           && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
 934           && GET_MODE_BITSIZE (GET_MODE (op0)) <= maxbits
 935           && flag_strict_volatile_bitfields > 0)
 936         mode = GET_MODE (op0);
 937       else
 938         mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
 939                               MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 940
 941       if (mode == VOIDmode)
 942         {
 943           /* The only way this should occur is if the field spans word
 944              boundaries.  */
 945           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
 946                                  bitregion_end, value);
 947           return;
 948         }
 949
 950       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
 951     }
 952
 953   mode = GET_MODE (op0);
 954   gcc_assert (SCALAR_INT_MODE_P (mode));
 955
 956   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
 957      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
 958
 959   if (BYTES_BIG_ENDIAN)
 960     /* BITNUM is the distance between our msb
 961        and that of the containing datum.
 962        Convert it to the distance from the lsb.  */
 963     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
 964
 965   /* Now BITNUM is always the distance between our lsb
 966      and that of OP0.  */
 967
 968   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
 969      we must first convert its mode to MODE.  */
 970
 971   if (CONST_INT_P (value))
 972     {
 973       HOST_WIDE_INT v = INTVAL (value);
 974
 975       if (bitsize < HOST_BITS_PER_WIDE_INT)
 976         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 977
 978       if (v == 0)
 979         all_zero = 1;
 980       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 981                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 982                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 983         all_one = 1;
 984
 985       value = lshift_value (mode, value, bitnum, bitsize);
 986     }
 987   else
 988     {
 989       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 990                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
 991
 992       if (GET_MODE (value) != mode)
 993         value = convert_to_mode (mode, value, 1);
 994
 995       if (must_and)
 996         value = expand_binop (mode, and_optab, value,
 997                               mask_rtx (mode, 0, bitsize, 0),
 998                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
 999       if (bitnum > 0)
1000         value = expand_shift (LSHIFT_EXPR, mode, value,
1001                               bitnum, NULL_RTX, 1);
1002     }
1003
1004   /* Now clear the chosen bits in OP0,
1005      except that if VALUE is -1 we need not bother.  */
1006   /* We keep the intermediates in registers to allow CSE to combine
1007      consecutive bitfield assignments.  */
1008
1009   temp = force_reg (mode, op0);
1010
1011   if (! all_one)
1012     {
1013       temp = expand_binop (mode, and_optab, temp,
1014                            mask_rtx (mode, bitnum, bitsize, 1),
1015                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1016       temp = force_reg (mode, temp);
1017     }
1018
1019   /* Now logical-or VALUE into OP0, unless it is zero.  */
1020
1021   if (! all_zero)
1022     {
1023       temp = expand_binop (mode, ior_optab, temp, value,
1024                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1025       temp = force_reg (mode, temp);
1026     }
1027
1028   if (op0 != temp)
1029     {
1030       op0 = copy_rtx (op0);
1031       emit_move_insn (op0, temp);
1032     }
1033 }
1034 \f
1035 /* Store a bit field that is split across multiple accessible memory objects.
1036
1037    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1038    BITSIZE is the field width; BITPOS the position of its first bit
1039    (within the word).
1040    VALUE is the value to store.
1041
1042    This does not yet handle fields wider than BITS_PER_WORD.  */
1043
1044 static void
1045 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1046                        unsigned HOST_WIDE_INT bitpos,
1047                        unsigned HOST_WIDE_INT bitregion_start,
1048                        unsigned HOST_WIDE_INT bitregion_end,
1049                        rtx value)
1050 {
1051   unsigned int unit;
1052   unsigned int bitsdone = 0;
1053
1054   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1055      much at a time.  */
1056   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1057     unit = BITS_PER_WORD;
1058   else
1059     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1060
1061   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1062      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1063      that VALUE might be a floating-point constant.  */
1064   if (CONSTANT_P (value) && !CONST_INT_P (value))
1065     {
1066       rtx word = gen_lowpart_common (word_mode, value);
1067
1068       if (word && (value != word))
1069         value = word;
1070       else
1071         value = gen_lowpart_common (word_mode,
1072                                     force_reg (GET_MODE (value) != VOIDmode
1073                                                ? GET_MODE (value)
1074                                                : word_mode, value));
1075     }
1076
1077   while (bitsdone < bitsize)
1078     {
1079       unsigned HOST_WIDE_INT thissize;
1080       rtx part, word;
1081       unsigned HOST_WIDE_INT thispos;
1082       unsigned HOST_WIDE_INT offset;
1083
1084       offset = (bitpos + bitsdone) / unit;
1085       thispos = (bitpos + bitsdone) % unit;
1086
1087       /* When region of bytes we can touch is restricted, decrease
1088          UNIT close to the end of the region as needed.  */
1089       if (bitregion_end
1090           && unit > BITS_PER_UNIT
1091           && bitpos + bitsdone - thispos + unit > bitregion_end + 1)
1092         {
1093           unit = unit / 2;
1094           continue;
1095         }
1096
1097       /* THISSIZE must not overrun a word boundary.  Otherwise,
1098          store_fixed_bit_field will call us again, and we will mutually
1099          recurse forever.  */
1100       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1101       thissize = MIN (thissize, unit - thispos);
1102
1103       if (BYTES_BIG_ENDIAN)
1104         {
1105           /* Fetch successively less significant portions.  */
1106           if (CONST_INT_P (value))
1107             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1108                              >> (bitsize - bitsdone - thissize))
1109                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1110           else
1111             {
1112               int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1113               /* The args are chosen so that the last part includes the
1114                  lsb.  Give extract_bit_field the value it needs (with
1115                  endianness compensation) to fetch the piece we want.  */
1116               part = extract_fixed_bit_field (word_mode, value, thissize,
1117                                               total_bits - bitsize + bitsdone,
1118                                               NULL_RTX, 1, false);
1119             }
1120         }
1121       else
1122         {
1123           /* Fetch successively more significant portions.  */
1124           if (CONST_INT_P (value))
1125             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1126                              >> bitsdone)
1127                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1128           else
1129             part = extract_fixed_bit_field (word_mode, value, thissize,
1130                                             bitsdone, NULL_RTX, 1, false);
1131         }
1132
1133       /* If OP0 is a register, then handle OFFSET here.
1134
1135          When handling multiword bitfields, extract_bit_field may pass
1136          down a word_mode SUBREG of a larger REG for a bitfield that actually
1137          crosses a word boundary.  Thus, for a SUBREG, we must find
1138          the current word starting from the base register.  */
1139       if (GET_CODE (op0) == SUBREG)
1140         {
1141           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1142           enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1143           if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1144             word = word_offset ? const0_rtx : op0;
1145           else
1146             word = operand_subword_force (SUBREG_REG (op0), word_offset,
1147                                           GET_MODE (SUBREG_REG (op0)));
1148           offset = 0;
1149         }
1150       else if (REG_P (op0))
1151         {
1152           enum machine_mode op0_mode = GET_MODE (op0);
1153           if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1154             word = offset ? const0_rtx : op0;
1155           else
1156             word = operand_subword_force (op0, offset, GET_MODE (op0));
1157           offset = 0;
1158         }
1159       else
1160         word = op0;
1161
1162       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1163          it is just an out-of-bounds access.  Ignore it.  */
1164       if (word != const0_rtx)
1165         store_fixed_bit_field (word, thissize, offset * unit + thispos,
1166                                bitregion_start, bitregion_end, part);
1167       bitsdone += thissize;
1168     }
1169 }
1170 \f
1171 /* A subroutine of extract_bit_field_1 that converts return value X
1172    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1173    to extract_bit_field.  */
1174
1175 static rtx
1176 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1177                              enum machine_mode tmode, bool unsignedp)
1178 {
1179   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1180     return x;
1181
1182   /* If the x mode is not a scalar integral, first convert to the
1183      integer mode of that size and then access it as a floating-point
1184      value via a SUBREG.  */
1185   if (!SCALAR_INT_MODE_P (tmode))
1186     {
1187       enum machine_mode smode;
1188
1189       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1190       x = convert_to_mode (smode, x, unsignedp);
1191       x = force_reg (smode, x);
1192       return gen_lowpart (tmode, x);
1193     }
1194
1195   return convert_to_mode (tmode, x, unsignedp);
1196 }
1197
1198 /* Try to use an ext(z)v pattern to extract a field from OP0.
1199    Return the extracted value on success, otherwise return null.
1200    EXT_MODE is the mode of the extraction and the other arguments
1201    are as for extract_bit_field.  */
1202
1203 static rtx
1204 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1205                               unsigned HOST_WIDE_INT bitsize,
1206                               unsigned HOST_WIDE_INT bitnum,
1207                               int unsignedp, rtx target,
1208                               enum machine_mode mode, enum machine_mode tmode)
1209 {
1210   struct expand_operand ops[4];
1211   rtx spec_target = target;
1212   rtx spec_target_subreg = 0;
1213   enum machine_mode ext_mode = extv->field_mode;
1214   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1215
1216   if (bitsize == 0 || unit < bitsize)
1217     return NULL_RTX;
1218
1219   if (MEM_P (op0))
1220     /* Get a reference to the first byte of the field.  */
1221     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1222                                 &bitnum);
1223   else
1224     {
1225       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1226       if (BYTES_BIG_ENDIAN)
1227         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1228
1229       /* If op0 is a register, we need it in EXT_MODE to make it
1230          acceptable to the format of ext(z)v.  */
1231       if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1232         return NULL_RTX;
1233       if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1234         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1235     }
1236
1237   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1238      "backwards" from the size of the unit we are extracting from.
1239      Otherwise, we count bits from the most significant on a
1240      BYTES/BITS_BIG_ENDIAN machine.  */
1241
1242   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1243     bitnum = unit - bitsize - bitnum;
1244
1245   if (target == 0)
1246     target = spec_target = gen_reg_rtx (tmode);
1247
1248   if (GET_MODE (target) != ext_mode)
1249     {
1250       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1251          between the mode of the extraction (word_mode) and the target
1252          mode.  Instead, create a temporary and use convert_move to set
1253          the target.  */
1254       if (REG_P (target)
1255           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1256         {
1257           target = gen_lowpart (ext_mode, target);
1258           if (GET_MODE_PRECISION (ext_mode)
1259               > GET_MODE_PRECISION (GET_MODE (spec_target)))
1260             spec_target_subreg = target;
1261         }
1262       else
1263         target = gen_reg_rtx (ext_mode);
1264     }
1265
1266   create_output_operand (&ops[0], target, ext_mode);
1267   create_fixed_operand (&ops[1], op0);
1268   create_integer_operand (&ops[2], bitsize);
1269   create_integer_operand (&ops[3], bitnum);
1270   if (maybe_expand_insn (extv->icode, 4, ops))
1271     {
1272       target = ops[0].value;
1273       if (target == spec_target)
1274         return target;
1275       if (target == spec_target_subreg)
1276         return spec_target;
1277       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1278     }
1279   return NULL_RTX;
1280 }
1281
1282 /* A subroutine of extract_bit_field, with the same arguments.
1283    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1284    if we can find no other means of implementing the operation.
1285    if FALLBACK_P is false, return NULL instead.  */
1286
1287 static rtx
1288 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1289                      unsigned HOST_WIDE_INT bitnum,
1290                      int unsignedp, bool packedp, rtx target,
1291                      enum machine_mode mode, enum machine_mode tmode,
1292                      bool fallback_p)
1293 {
1294   rtx op0 = str_rtx;
1295   enum machine_mode int_mode;
1296   enum machine_mode mode1;
1297
1298   if (tmode == VOIDmode)
1299     tmode = mode;
1300
1301   while (GET_CODE (op0) == SUBREG)
1302     {
1303       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1304       op0 = SUBREG_REG (op0);
1305     }
1306
1307   /* If we have an out-of-bounds access to a register, just return an
1308      uninitialized register of the required mode.  This can occur if the
1309      source code contains an out-of-bounds access to a small array.  */
1310   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1311     return gen_reg_rtx (tmode);
1312
1313   if (REG_P (op0)
1314       && mode == GET_MODE (op0)
1315       && bitnum == 0
1316       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1317     {
1318       /* We're trying to extract a full register from itself.  */
1319       return op0;
1320     }
1321
1322   /* See if we can get a better vector mode before extracting.  */
1323   if (VECTOR_MODE_P (GET_MODE (op0))
1324       && !MEM_P (op0)
1325       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1326     {
1327       enum machine_mode new_mode;
1328
1329       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1330         new_mode = MIN_MODE_VECTOR_FLOAT;
1331       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1332         new_mode = MIN_MODE_VECTOR_FRACT;
1333       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1334         new_mode = MIN_MODE_VECTOR_UFRACT;
1335       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1336         new_mode = MIN_MODE_VECTOR_ACCUM;
1337       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1338         new_mode = MIN_MODE_VECTOR_UACCUM;
1339       else
1340         new_mode = MIN_MODE_VECTOR_INT;
1341
1342       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1343         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1344             && targetm.vector_mode_supported_p (new_mode))
1345           break;
1346       if (new_mode != VOIDmode)
1347         op0 = gen_lowpart (new_mode, op0);
1348     }
1349
1350   /* Use vec_extract patterns for extracting parts of vectors whenever
1351      available.  */
1352   if (VECTOR_MODE_P (GET_MODE (op0))
1353       && !MEM_P (op0)
1354       && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1355       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1356           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1357     {
1358       struct expand_operand ops[3];
1359       enum machine_mode outermode = GET_MODE (op0);
1360       enum machine_mode innermode = GET_MODE_INNER (outermode);
1361       enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1362       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1363
1364       create_output_operand (&ops[0], target, innermode);
1365       create_input_operand (&ops[1], op0, outermode);
1366       create_integer_operand (&ops[2], pos);
1367       if (maybe_expand_insn (icode, 3, ops))
1368         {
1369           target = ops[0].value;
1370           if (GET_MODE (target) != mode)
1371             return gen_lowpart (tmode, target);
1372           return target;
1373         }
1374     }
1375
1376   /* Make sure we are playing with integral modes.  Pun with subregs
1377      if we aren't.  */
1378   {
1379     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1380     if (imode != GET_MODE (op0))
1381       {
1382         if (MEM_P (op0))
1383           op0 = adjust_bitfield_address (op0, imode, 0);
1384         else if (imode != BLKmode)
1385           {
1386             op0 = gen_lowpart (imode, op0);
1387
1388             /* If we got a SUBREG, force it into a register since we
1389                aren't going to be able to do another SUBREG on it.  */
1390             if (GET_CODE (op0) == SUBREG)
1391               op0 = force_reg (imode, op0);
1392           }
1393         else if (REG_P (op0))
1394           {
1395             rtx reg, subreg;
1396             imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1397                                             MODE_INT);
1398             reg = gen_reg_rtx (imode);
1399             subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1400             emit_move_insn (subreg, op0);
1401             op0 = reg;
1402             bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1403           }
1404         else
1405           {
1406             rtx mem = assign_stack_temp (GET_MODE (op0),
1407                                          GET_MODE_SIZE (GET_MODE (op0)));
1408             emit_move_insn (mem, op0);
1409             op0 = adjust_bitfield_address (mem, BLKmode, 0);
1410           }
1411       }
1412   }
1413
1414   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1415      If that's wrong, the solution is to test for it and set TARGET to 0
1416      if needed.  */
1417
1418   /* If the bitfield is volatile, we need to make sure the access
1419      remains on a type-aligned boundary.  */
1420   if (GET_CODE (op0) == MEM
1421       && MEM_VOLATILE_P (op0)
1422       && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
1423       && flag_strict_volatile_bitfields > 0)
1424     goto no_subreg_mode_swap;
1425
1426   /* Only scalar integer modes can be converted via subregs.  There is an
1427      additional problem for FP modes here in that they can have a precision
1428      which is different from the size.  mode_for_size uses precision, but
1429      we want a mode based on the size, so we must avoid calling it for FP
1430      modes.  */
1431   mode1 = mode;
1432   if (SCALAR_INT_MODE_P (tmode))
1433     {
1434       enum machine_mode try_mode = mode_for_size (bitsize,
1435                                                   GET_MODE_CLASS (tmode), 0);
1436       if (try_mode != BLKmode)
1437         mode1 = try_mode;
1438     }
1439   gcc_assert (mode1 != BLKmode);
1440
1441   /* Extraction of a full MODE1 value can be done with a subreg as long
1442      as the least significant bit of the value is the least significant
1443      bit of either OP0 or a word of OP0.  */
1444   if (!MEM_P (op0)
1445       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1446       && bitsize == GET_MODE_BITSIZE (mode1)
1447       && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1448     {
1449       rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1450                                      bitnum / BITS_PER_UNIT);
1451       if (sub)
1452         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1453     }
1454
1455   /* Extraction of a full MODE1 value can be done with a load as long as
1456      the field is on a byte boundary and is sufficiently aligned.  */
1457   if (MEM_P (op0)
1458       && bitnum % BITS_PER_UNIT == 0
1459       && bitsize == GET_MODE_BITSIZE (mode1)
1460       && (!SLOW_UNALIGNED_ACCESS (mode1, MEM_ALIGN (op0))
1461           || (bitnum % bitsize == 0
1462               && MEM_ALIGN (op0) % bitsize == 0)))
1463     {
1464       op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1465       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1466     }
1467
1468  no_subreg_mode_swap:
1469
1470   /* Handle fields bigger than a word.  */
1471
1472   if (bitsize > BITS_PER_WORD)
1473     {
1474       /* Here we transfer the words of the field
1475          in the order least significant first.
1476          This is because the most significant word is the one which may
1477          be less than full.  */
1478
1479       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1480       unsigned int i;
1481       rtx last;
1482
1483       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1484         target = gen_reg_rtx (mode);
1485
1486       /* Indicate for flow that the entire target reg is being set.  */
1487       emit_clobber (target);
1488
1489       last = get_last_insn ();
1490       for (i = 0; i < nwords; i++)
1491         {
1492           /* If I is 0, use the low-order word in both field and target;
1493              if I is 1, use the next to lowest word; and so on.  */
1494           /* Word number in TARGET to use.  */
1495           unsigned int wordnum
1496             = (WORDS_BIG_ENDIAN
1497                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1498                : i);
1499           /* Offset from start of field in OP0.  */
1500           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1501                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1502                                                 * (int) BITS_PER_WORD))
1503                                      : (int) i * BITS_PER_WORD);
1504           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1505           rtx result_part
1506             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1507                                              bitsize - i * BITS_PER_WORD),
1508                                    bitnum + bit_offset, 1, false, target_part,
1509                                    mode, word_mode, fallback_p);
1510
1511           gcc_assert (target_part);
1512           if (!result_part)
1513             {
1514               delete_insns_since (last);
1515               return NULL;
1516             }
1517
1518           if (result_part != target_part)
1519             emit_move_insn (target_part, result_part);
1520         }
1521
1522       if (unsignedp)
1523         {
1524           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1525              need to be zero'd out.  */
1526           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1527             {
1528               unsigned int i, total_words;
1529
1530               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1531               for (i = nwords; i < total_words; i++)
1532                 emit_move_insn
1533                   (operand_subword (target,
1534                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1535                                     1, VOIDmode),
1536                    const0_rtx);
1537             }
1538           return target;
1539         }
1540
1541       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1542       target = expand_shift (LSHIFT_EXPR, mode, target,
1543                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1544       return expand_shift (RSHIFT_EXPR, mode, target,
1545                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1546     }
1547
1548   /* If OP0 is a multi-word register, narrow it to the affected word.
1549      If the region spans two words, defer to extract_split_bit_field.  */
1550   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1551     {
1552       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1553                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1554       bitnum %= BITS_PER_WORD;
1555       if (bitnum + bitsize > BITS_PER_WORD)
1556         {
1557           if (!fallback_p)
1558             return NULL_RTX;
1559           target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1560           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1561         }
1562     }
1563
1564   /* From here on we know the desired field is smaller than a word.
1565      If OP0 is a register, it too fits within a word.  */
1566   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1567   extraction_insn extv;
1568   if (!MEM_P (op0)
1569       && get_best_reg_extraction_insn (&extv, pattern, bitnum + bitsize,
1570                                        tmode))
1571     {
1572       rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1573                                                  unsignedp, target, mode,
1574                                                  tmode);
1575       if (result)
1576         return result;
1577     }
1578
1579   /* If OP0 is a memory, try copying it to a register and seeing if a
1580      cheap register alternative is available.  */
1581   if (MEM_P (op0))
1582     {
1583       /* Do not use extv/extzv for volatile bitfields when
1584          -fstrict-volatile-bitfields is in effect.  */
1585       if (!(MEM_VOLATILE_P (op0) && flag_strict_volatile_bitfields > 0)
1586           && get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1587                                            tmode))
1588         {
1589           rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1590                                                      bitnum, unsignedp,
1591                                                      target, mode,
1592                                                      tmode);
1593           if (result)
1594             return result;
1595         }
1596
1597       rtx last = get_last_insn ();
1598
1599       /* Try loading part of OP0 into a register and extracting the
1600          bitfield from that.  */
1601       unsigned HOST_WIDE_INT bitpos;
1602       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1603                                                0, 0, tmode, &bitpos);
1604       if (xop0)
1605         {
1606           xop0 = copy_to_reg (xop0);
1607           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1608                                             unsignedp, packedp, target,
1609                                             mode, tmode, false);
1610           if (result)
1611             return result;
1612           delete_insns_since (last);
1613         }
1614     }
1615
1616   if (!fallback_p)
1617     return NULL;
1618
1619   /* Find a correspondingly-sized integer field, so we can apply
1620      shifts and masks to it.  */
1621   int_mode = int_mode_for_mode (tmode);
1622   if (int_mode == BLKmode)
1623     int_mode = int_mode_for_mode (mode);
1624   /* Should probably push op0 out to memory and then do a load.  */
1625   gcc_assert (int_mode != BLKmode);
1626
1627   target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum,
1628                                     target, unsignedp, packedp);
1629   return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1630 }
1631
1632 /* Generate code to extract a byte-field from STR_RTX
1633    containing BITSIZE bits, starting at BITNUM,
1634    and put it in TARGET if possible (if TARGET is nonzero).
1635    Regardless of TARGET, we return the rtx for where the value is placed.
1636
1637    STR_RTX is the structure containing the byte (a REG or MEM).
1638    UNSIGNEDP is nonzero if this is an unsigned bit field.
1639    PACKEDP is nonzero if the field has the packed attribute.
1640    MODE is the natural mode of the field value once extracted.
1641    TMODE is the mode the caller would like the value to have;
1642    but the value may be returned with type MODE instead.
1643
1644    If a TARGET is specified and we can store in it at no extra cost,
1645    we do so, and return TARGET.
1646    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1647    if they are equally easy.  */
1648
1649 rtx
1650 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1651                    unsigned HOST_WIDE_INT bitnum, int unsignedp, bool packedp,
1652                    rtx target, enum machine_mode mode, enum machine_mode tmode)
1653 {
1654   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, packedp,
1655                               target, mode, tmode, true);
1656 }
1657 \f
1658 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1659    from bit BITNUM of OP0.
1660
1661    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1662    PACKEDP is true if the field has the packed attribute.
1663
1664    If TARGET is nonzero, attempts to store the value there
1665    and return TARGET, but this is not guaranteed.
1666    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1667
1668 static rtx
1669 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1670                          unsigned HOST_WIDE_INT bitsize,
1671                          unsigned HOST_WIDE_INT bitnum, rtx target,
1672                          int unsignedp, bool packedp)
1673 {
1674   enum machine_mode mode;
1675
1676   if (MEM_P (op0))
1677     {
1678       /* Get the proper mode to use for this field.  We want a mode that
1679          includes the entire field.  If such a mode would be larger than
1680          a word, we won't be doing the extraction the normal way.  */
1681
1682       if (MEM_VOLATILE_P (op0)
1683           && flag_strict_volatile_bitfields > 0)
1684         {
1685           if (GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1686             mode = GET_MODE (op0);
1687           else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1688             mode = GET_MODE (target);
1689           else
1690             mode = tmode;
1691         }
1692       else
1693         mode = get_best_mode (bitsize, bitnum, 0, 0,
1694                               MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1695
1696       if (mode == VOIDmode)
1697         /* The only way this should occur is if the field spans word
1698            boundaries.  */
1699         return extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1700
1701       unsigned int total_bits = GET_MODE_BITSIZE (mode);
1702       HOST_WIDE_INT bit_offset = bitnum - bitnum % total_bits;
1703
1704       /* If we're accessing a volatile MEM, we can't apply BIT_OFFSET
1705          if it results in a multi-word access where we otherwise wouldn't
1706          have one.  So, check for that case here.  */
1707       if (MEM_P (op0)
1708           && MEM_VOLATILE_P (op0)
1709           && flag_strict_volatile_bitfields > 0
1710           && bitnum % BITS_PER_UNIT + bitsize <= total_bits
1711           && bitnum % GET_MODE_BITSIZE (mode) + bitsize > total_bits)
1712         {
1713           if (STRICT_ALIGNMENT)
1714             {
1715               static bool informed_about_misalignment = false;
1716
1717               if (packedp)
1718                 {
1719                   if (bitsize == total_bits)
1720                     warning_at (input_location, OPT_fstrict_volatile_bitfields,
1721                                 "multiple accesses to volatile structure"
1722                                 " member because of packed attribute");
1723                   else
1724                     warning_at (input_location, OPT_fstrict_volatile_bitfields,
1725                                 "multiple accesses to volatile structure"
1726                                 " bitfield because of packed attribute");
1727
1728                   return extract_split_bit_field (op0, bitsize, bitnum,
1729                                                   unsignedp);
1730                 }
1731
1732               if (bitsize == total_bits)
1733                 warning_at (input_location, OPT_fstrict_volatile_bitfields,
1734                             "mis-aligned access used for structure member");
1735               else
1736                 warning_at (input_location, OPT_fstrict_volatile_bitfields,
1737                             "mis-aligned access used for structure bitfield");
1738
1739               if (! informed_about_misalignment)
1740                 {
1741                   informed_about_misalignment = true;
1742                   inform (input_location,
1743                           "when a volatile object spans multiple type-sized"
1744                           " locations, the compiler must choose between using"
1745                           " a single mis-aligned access to preserve the"
1746                           " volatility, or using multiple aligned accesses"
1747                           " to avoid runtime faults; this code may fail at"
1748                           " runtime if the hardware does not allow this"
1749                           " access");
1750                 }
1751             }
1752           bit_offset = bitnum - bitnum % BITS_PER_UNIT;
1753         }
1754       op0 = adjust_bitfield_address (op0, mode, bit_offset / BITS_PER_UNIT);
1755       bitnum -= bit_offset;
1756     }
1757
1758   mode = GET_MODE (op0);
1759   gcc_assert (SCALAR_INT_MODE_P (mode));
1760
1761   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1762      for invalid input, such as extract equivalent of f5 from
1763      gcc.dg/pr48335-2.c.  */
1764
1765   if (BYTES_BIG_ENDIAN)
1766     /* BITNUM is the distance between our msb and that of OP0.
1767        Convert it to the distance from the lsb.  */
1768     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1769
1770   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1771      We have reduced the big-endian case to the little-endian case.  */
1772
1773   if (unsignedp)
1774     {
1775       if (bitnum)
1776         {
1777           /* If the field does not already start at the lsb,
1778              shift it so it does.  */
1779           /* Maybe propagate the target for the shift.  */
1780           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1781           if (tmode != mode)
1782             subtarget = 0;
1783           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
1784         }
1785       /* Convert the value to the desired mode.  */
1786       if (mode != tmode)
1787         op0 = convert_to_mode (tmode, op0, 1);
1788
1789       /* Unless the msb of the field used to be the msb when we shifted,
1790          mask out the upper bits.  */
1791
1792       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
1793         return expand_binop (GET_MODE (op0), and_optab, op0,
1794                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1795                              target, 1, OPTAB_LIB_WIDEN);
1796       return op0;
1797     }
1798
1799   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1800      then arithmetic-shift its lsb to the lsb of the word.  */
1801   op0 = force_reg (mode, op0);
1802
1803   /* Find the narrowest integer mode that contains the field.  */
1804
1805   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1806        mode = GET_MODE_WIDER_MODE (mode))
1807     if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
1808       {
1809         op0 = convert_to_mode (mode, op0, 0);
1810         break;
1811       }
1812
1813   if (mode != tmode)
1814     target = 0;
1815
1816   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
1817     {
1818       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
1819       /* Maybe propagate the target for the shift.  */
1820       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1821       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1822     }
1823
1824   return expand_shift (RSHIFT_EXPR, mode, op0,
1825                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1826 }
1827 \f
1828 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1829    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1830    complement of that if COMPLEMENT.  The mask is truncated if
1831    necessary to the width of mode MODE.  The mask is zero-extended if
1832    BITSIZE+BITPOS is too small for MODE.  */
1833
1834 static rtx
1835 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1836 {
1837   double_int mask;
1838
1839   mask = double_int::mask (bitsize);
1840   mask = mask.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);
1841
1842   if (complement)
1843     mask = ~mask;
1844
1845   return immed_double_int_const (mask, mode);
1846 }
1847
1848 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1849    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1850
1851 static rtx
1852 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1853 {
1854   double_int val;
1855
1856   val = double_int::from_uhwi (INTVAL (value)).zext (bitsize);
1857   val = val.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);
1858
1859   return immed_double_int_const (val, mode);
1860 }
1861 \f
1862 /* Extract a bit field that is split across two words
1863    and return an RTX for the result.
1864
1865    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1866    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1867    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1868
1869 static rtx
1870 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1871                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1872 {
1873   unsigned int unit;
1874   unsigned int bitsdone = 0;
1875   rtx result = NULL_RTX;
1876   int first = 1;
1877
1878   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1879      much at a time.  */
1880   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1881     unit = BITS_PER_WORD;
1882   else
1883     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1884
1885   while (bitsdone < bitsize)
1886     {
1887       unsigned HOST_WIDE_INT thissize;
1888       rtx part, word;
1889       unsigned HOST_WIDE_INT thispos;
1890       unsigned HOST_WIDE_INT offset;
1891
1892       offset = (bitpos + bitsdone) / unit;
1893       thispos = (bitpos + bitsdone) % unit;
1894
1895       /* THISSIZE must not overrun a word boundary.  Otherwise,
1896          extract_fixed_bit_field will call us again, and we will mutually
1897          recurse forever.  */
1898       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1899       thissize = MIN (thissize, unit - thispos);
1900
1901       /* If OP0 is a register, then handle OFFSET here.
1902
1903          When handling multiword bitfields, extract_bit_field may pass
1904          down a word_mode SUBREG of a larger REG for a bitfield that actually
1905          crosses a word boundary.  Thus, for a SUBREG, we must find
1906          the current word starting from the base register.  */
1907       if (GET_CODE (op0) == SUBREG)
1908         {
1909           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1910           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1911                                         GET_MODE (SUBREG_REG (op0)));
1912           offset = 0;
1913         }
1914       else if (REG_P (op0))
1915         {
1916           word = operand_subword_force (op0, offset, GET_MODE (op0));
1917           offset = 0;
1918         }
1919       else
1920         word = op0;
1921
1922       /* Extract the parts in bit-counting order,
1923          whose meaning is determined by BYTES_PER_UNIT.
1924          OFFSET is in UNITs, and UNIT is in bits.  */
1925       part = extract_fixed_bit_field (word_mode, word, thissize,
1926                                       offset * unit + thispos, 0, 1, false);
1927       bitsdone += thissize;
1928
1929       /* Shift this part into place for the result.  */
1930       if (BYTES_BIG_ENDIAN)
1931         {
1932           if (bitsize != bitsdone)
1933             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1934                                  bitsize - bitsdone, 0, 1);
1935         }
1936       else
1937         {
1938           if (bitsdone != thissize)
1939             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1940                                  bitsdone - thissize, 0, 1);
1941         }
1942
1943       if (first)
1944         result = part;
1945       else
1946         /* Combine the parts with bitwise or.  This works
1947            because we extracted each part as an unsigned bit field.  */
1948         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1949                                OPTAB_LIB_WIDEN);
1950
1951       first = 0;
1952     }
1953
1954   /* Unsigned bit field: we are done.  */
1955   if (unsignedp)
1956     return result;
1957   /* Signed bit field: sign-extend with two arithmetic shifts.  */
1958   result = expand_shift (LSHIFT_EXPR, word_mode, result,
1959                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
1960   return expand_shift (RSHIFT_EXPR, word_mode, result,
1961                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
1962 }
1963 \f
1964 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
1965    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
1966    MODE, fill the upper bits with zeros.  Fail if the layout of either
1967    mode is unknown (as for CC modes) or if the extraction would involve
1968    unprofitable mode punning.  Return the value on success, otherwise
1969    return null.
1970
1971    This is different from gen_lowpart* in these respects:
1972
1973      - the returned value must always be considered an rvalue
1974
1975      - when MODE is wider than SRC_MODE, the extraction involves
1976        a zero extension
1977
1978      - when MODE is smaller than SRC_MODE, the extraction involves
1979        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
1980
1981    In other words, this routine performs a computation, whereas the
1982    gen_lowpart* routines are conceptually lvalue or rvalue subreg
1983    operations.  */
1984
1985 rtx
1986 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
1987 {
1988   enum machine_mode int_mode, src_int_mode;
1989
1990   if (mode == src_mode)
1991     return src;
1992
1993   if (CONSTANT_P (src))
1994     {
1995       /* simplify_gen_subreg can't be used here, as if simplify_subreg
1996          fails, it will happily create (subreg (symbol_ref)) or similar
1997          invalid SUBREGs.  */
1998       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
1999       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2000       if (ret)
2001         return ret;
2002
2003       if (GET_MODE (src) == VOIDmode
2004           || !validate_subreg (mode, src_mode, src, byte))
2005         return NULL_RTX;
2006
2007       src = force_reg (GET_MODE (src), src);
2008       return gen_rtx_SUBREG (mode, src, byte);
2009     }
2010
2011   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2012     return NULL_RTX;
2013
2014   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2015       && MODES_TIEABLE_P (mode, src_mode))
2016     {
2017       rtx x = gen_lowpart_common (mode, src);
2018       if (x)
2019         return x;
2020     }
2021
2022   src_int_mode = int_mode_for_mode (src_mode);
2023   int_mode = int_mode_for_mode (mode);
2024   if (src_int_mode == BLKmode || int_mode == BLKmode)
2025     return NULL_RTX;
2026
2027   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2028     return NULL_RTX;
2029   if (!MODES_TIEABLE_P (int_mode, mode))
2030     return NULL_RTX;
2031
2032   src = gen_lowpart (src_int_mode, src);
2033   src = convert_modes (int_mode, src_int_mode, src, true);
2034   src = gen_lowpart (mode, src);
2035   return src;
2036 }
2037 \f
2038 /* Add INC into TARGET.  */
2039
2040 void
2041 expand_inc (rtx target, rtx inc)
2042 {
2043   rtx value = expand_binop (GET_MODE (target), add_optab,
2044                             target, inc,
2045                             target, 0, OPTAB_LIB_WIDEN);
2046   if (value != target)
2047     emit_move_insn (target, value);
2048 }
2049
2050 /* Subtract DEC from TARGET.  */
2051
2052 void
2053 expand_dec (rtx target, rtx dec)
2054 {
2055   rtx value = expand_binop (GET_MODE (target), sub_optab,
2056                             target, dec,
2057                             target, 0, OPTAB_LIB_WIDEN);
2058   if (value != target)
2059     emit_move_insn (target, value);
2060 }
2061 \f
2062 /* Output a shift instruction for expression code CODE,
2063    with SHIFTED being the rtx for the value to shift,
2064    and AMOUNT the rtx for the amount to shift by.
2065    Store the result in the rtx TARGET, if that is convenient.
2066    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2067    Return the rtx for where the value is.  */
2068
2069 static rtx
2070 expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted,
2071                 rtx amount, rtx target, int unsignedp)
2072 {
2073   rtx op1, temp = 0;
2074   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2075   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2076   optab lshift_optab = ashl_optab;
2077   optab rshift_arith_optab = ashr_optab;
2078   optab rshift_uns_optab = lshr_optab;
2079   optab lrotate_optab = rotl_optab;
2080   optab rrotate_optab = rotr_optab;
2081   enum machine_mode op1_mode;
2082   int attempt;
2083   bool speed = optimize_insn_for_speed_p ();
2084
2085   op1 = amount;
2086   op1_mode = GET_MODE (op1);
2087
2088   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2089      shift amount is a vector, use the vector/vector shift patterns.  */
2090   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2091     {
2092       lshift_optab = vashl_optab;
2093       rshift_arith_optab = vashr_optab;
2094       rshift_uns_optab = vlshr_optab;
2095       lrotate_optab = vrotl_optab;
2096       rrotate_optab = vrotr_optab;
2097     }
2098
2099   /* Previously detected shift-counts computed by NEGATE_EXPR
2100      and shifted in the other direction; but that does not work
2101      on all machines.  */
2102
2103   if (SHIFT_COUNT_TRUNCATED)
2104     {
2105       if (CONST_INT_P (op1)
2106           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2107               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2108         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2109                        % GET_MODE_BITSIZE (mode));
2110       else if (GET_CODE (op1) == SUBREG
2111                && subreg_lowpart_p (op1)
2112                && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (op1)))
2113                && INTEGRAL_MODE_P (GET_MODE (op1)))
2114         op1 = SUBREG_REG (op1);
2115     }
2116
2117   if (op1 == const0_rtx)
2118     return shifted;
2119
2120   /* Check whether its cheaper to implement a left shift by a constant
2121      bit count by a sequence of additions.  */
2122   if (code == LSHIFT_EXPR
2123       && CONST_INT_P (op1)
2124       && INTVAL (op1) > 0
2125       && INTVAL (op1) < GET_MODE_PRECISION (mode)
2126       && INTVAL (op1) < MAX_BITS_PER_WORD
2127       && (shift_cost (speed, mode, INTVAL (op1))
2128           > INTVAL (op1) * add_cost (speed, mode))
2129       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2130     {
2131       int i;
2132       for (i = 0; i < INTVAL (op1); i++)
2133         {
2134           temp = force_reg (mode, shifted);
2135           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2136                                   unsignedp, OPTAB_LIB_WIDEN);
2137         }
2138       return shifted;
2139     }
2140
2141   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2142     {
2143       enum optab_methods methods;
2144
2145       if (attempt == 0)
2146         methods = OPTAB_DIRECT;
2147       else if (attempt == 1)
2148         methods = OPTAB_WIDEN;
2149       else
2150         methods = OPTAB_LIB_WIDEN;
2151
2152       if (rotate)
2153         {
2154           /* Widening does not work for rotation.  */
2155           if (methods == OPTAB_WIDEN)
2156             continue;
2157           else if (methods == OPTAB_LIB_WIDEN)
2158             {
2159               /* If we have been unable to open-code this by a rotation,
2160                  do it as the IOR of two shifts.  I.e., to rotate A
2161                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2162                  where C is the bitsize of A.
2163
2164                  It is theoretically possible that the target machine might
2165                  not be able to perform either shift and hence we would
2166                  be making two libcalls rather than just the one for the
2167                  shift (similarly if IOR could not be done).  We will allow
2168                  this extremely unlikely lossage to avoid complicating the
2169                  code below.  */
2170
2171               rtx subtarget = target == shifted ? 0 : target;
2172               rtx new_amount, other_amount;
2173               rtx temp1;
2174
2175               new_amount = op1;
2176               if (CONST_INT_P (op1))
2177                 other_amount = GEN_INT (GET_MODE_BITSIZE (mode)
2178                                         - INTVAL (op1));
2179               else
2180                 other_amount
2181                   = simplify_gen_binary (MINUS, GET_MODE (op1),
2182                                          GEN_INT (GET_MODE_PRECISION (mode)),
2183                                          op1);
2184
2185               shifted = force_reg (mode, shifted);
2186
2187               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2188                                      mode, shifted, new_amount, 0, 1);
2189               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2190                                       mode, shifted, other_amount,
2191                                       subtarget, 1);
2192               return expand_binop (mode, ior_optab, temp, temp1, target,
2193                                    unsignedp, methods);
2194             }
2195
2196           temp = expand_binop (mode,
2197                                left ? lrotate_optab : rrotate_optab,
2198                                shifted, op1, target, unsignedp, methods);
2199         }
2200       else if (unsignedp)
2201         temp = expand_binop (mode,
2202                              left ? lshift_optab : rshift_uns_optab,
2203                              shifted, op1, target, unsignedp, methods);
2204
2205       /* Do arithmetic shifts.
2206          Also, if we are going to widen the operand, we can just as well
2207          use an arithmetic right-shift instead of a logical one.  */
2208       if (temp == 0 && ! rotate
2209           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2210         {
2211           enum optab_methods methods1 = methods;
2212
2213           /* If trying to widen a log shift to an arithmetic shift,
2214              don't accept an arithmetic shift of the same size.  */
2215           if (unsignedp)
2216             methods1 = OPTAB_MUST_WIDEN;
2217
2218           /* Arithmetic shift */
2219
2220           temp = expand_binop (mode,
2221                                left ? lshift_optab : rshift_arith_optab,
2222                                shifted, op1, target, unsignedp, methods1);
2223         }
2224
2225       /* We used to try extzv here for logical right shifts, but that was
2226          only useful for one machine, the VAX, and caused poor code
2227          generation there for lshrdi3, so the code was deleted and a
2228          define_expand for lshrsi3 was added to vax.md.  */
2229     }
2230
2231   gcc_assert (temp);
2232   return temp;
2233 }
2234
2235 /* Output a shift instruction for expression code CODE,
2236    with SHIFTED being the rtx for the value to shift,
2237    and AMOUNT the amount to shift by.
2238    Store the result in the rtx TARGET, if that is convenient.
2239    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2240    Return the rtx for where the value is.  */
2241
2242 rtx
2243 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2244               int amount, rtx target, int unsignedp)
2245 {
2246   return expand_shift_1 (code, mode,
2247                          shifted, GEN_INT (amount), target, unsignedp);
2248 }
2249
2250 /* Output a shift instruction for expression code CODE,
2251    with SHIFTED being the rtx for the value to shift,
2252    and AMOUNT the tree for the amount to shift by.
2253    Store the result in the rtx TARGET, if that is convenient.
2254    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2255    Return the rtx for where the value is.  */
2256
2257 rtx
2258 expand_variable_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2259                        tree amount, rtx target, int unsignedp)
2260 {
2261   return expand_shift_1 (code, mode,
2262                          shifted, expand_normal (amount), target, unsignedp);
2263 }
2264
2265 \f
2266 /* Indicates the type of fixup needed after a constant multiplication.
2267    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2268    the result should be negated, and ADD_VARIANT means that the
2269    multiplicand should be added to the result.  */
2270 enum mult_variant {basic_variant, negate_variant, add_variant};
2271
2272 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2273                         const struct mult_cost *, enum machine_mode mode);
2274 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2275                                  struct algorithm *, enum mult_variant *, int);
2276 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2277                               const struct algorithm *, enum mult_variant);
2278 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2279 static rtx extract_high_half (enum machine_mode, rtx);
2280 static rtx expmed_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2281 static rtx expmed_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2282                                        int, int);
2283 /* Compute and return the best algorithm for multiplying by T.
2284    The algorithm must cost less than cost_limit
2285    If retval.cost >= COST_LIMIT, no algorithm was found and all
2286    other field of the returned struct are undefined.
2287    MODE is the machine mode of the multiplication.  */
2288
2289 static void
2290 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2291             const struct mult_cost *cost_limit, enum machine_mode mode)
2292 {
2293   int m;
2294   struct algorithm *alg_in, *best_alg;
2295   struct mult_cost best_cost;
2296   struct mult_cost new_limit;
2297   int op_cost, op_latency;
2298   unsigned HOST_WIDE_INT orig_t = t;
2299   unsigned HOST_WIDE_INT q;
2300   int maxm, hash_index;
2301   bool cache_hit = false;
2302   enum alg_code cache_alg = alg_zero;
2303   bool speed = optimize_insn_for_speed_p ();
2304   enum machine_mode imode;
2305   struct alg_hash_entry *entry_ptr;
2306
2307   /* Indicate that no algorithm is yet found.  If no algorithm
2308      is found, this value will be returned and indicate failure.  */
2309   alg_out->cost.cost = cost_limit->cost + 1;
2310   alg_out->cost.latency = cost_limit->latency + 1;
2311
2312   if (cost_limit->cost < 0
2313       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2314     return;
2315
2316   /* Be prepared for vector modes.  */
2317   imode = GET_MODE_INNER (mode);
2318   if (imode == VOIDmode)
2319     imode = mode;
2320
2321   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2322
2323   /* Restrict the bits of "t" to the multiplication's mode.  */
2324   t &= GET_MODE_MASK (imode);
2325
2326   /* t == 1 can be done in zero cost.  */
2327   if (t == 1)
2328     {
2329       alg_out->ops = 1;
2330       alg_out->cost.cost = 0;
2331       alg_out->cost.latency = 0;
2332       alg_out->op[0] = alg_m;
2333       return;
2334     }
2335
2336   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2337      fail now.  */
2338   if (t == 0)
2339     {
2340       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2341         return;
2342       else
2343         {
2344           alg_out->ops = 1;
2345           alg_out->cost.cost = zero_cost (speed);
2346           alg_out->cost.latency = zero_cost (speed);
2347           alg_out->op[0] = alg_zero;
2348           return;
2349         }
2350     }
2351
2352   /* We'll be needing a couple extra algorithm structures now.  */
2353
2354   alg_in = XALLOCA (struct algorithm);
2355   best_alg = XALLOCA (struct algorithm);
2356   best_cost = *cost_limit;
2357
2358   /* Compute the hash index.  */
2359   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2360
2361   /* See if we already know what to do for T.  */
2362   entry_ptr = alg_hash_entry_ptr (hash_index);
2363   if (entry_ptr->t == t
2364       && entry_ptr->mode == mode
2365       && entry_ptr->mode == mode
2366       && entry_ptr->speed == speed
2367       && entry_ptr->alg != alg_unknown)
2368     {
2369       cache_alg = entry_ptr->alg;
2370
2371       if (cache_alg == alg_impossible)
2372         {
2373           /* The cache tells us that it's impossible to synthesize
2374              multiplication by T within entry_ptr->cost.  */
2375           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2376             /* COST_LIMIT is at least as restrictive as the one
2377                recorded in the hash table, in which case we have no
2378                hope of synthesizing a multiplication.  Just
2379                return.  */
2380             return;
2381
2382           /* If we get here, COST_LIMIT is less restrictive than the
2383              one recorded in the hash table, so we may be able to
2384              synthesize a multiplication.  Proceed as if we didn't
2385              have the cache entry.  */
2386         }
2387       else
2388         {
2389           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2390             /* The cached algorithm shows that this multiplication
2391                requires more cost than COST_LIMIT.  Just return.  This
2392                way, we don't clobber this cache entry with
2393                alg_impossible but retain useful information.  */
2394             return;
2395
2396           cache_hit = true;
2397
2398           switch (cache_alg)
2399             {
2400             case alg_shift:
2401               goto do_alg_shift;
2402
2403             case alg_add_t_m2:
2404             case alg_sub_t_m2:
2405               goto do_alg_addsub_t_m2;
2406
2407             case alg_add_factor:
2408             case alg_sub_factor:
2409               goto do_alg_addsub_factor;
2410
2411             case alg_add_t2_m:
2412               goto do_alg_add_t2_m;
2413
2414             case alg_sub_t2_m:
2415               goto do_alg_sub_t2_m;
2416
2417             default:
2418               gcc_unreachable ();
2419             }
2420         }
2421     }
2422
2423   /* If we have a group of zero bits at the low-order part of T, try
2424      multiplying by the remaining bits and then doing a shift.  */
2425
2426   if ((t & 1) == 0)
2427     {
2428     do_alg_shift:
2429       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2430       if (m < maxm)
2431         {
2432           q = t >> m;
2433           /* The function expand_shift will choose between a shift and
2434              a sequence of additions, so the observed cost is given as
2435              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2436           op_cost = m * add_cost (speed, mode);
2437           if (shift_cost (speed, mode, m) < op_cost)
2438             op_cost = shift_cost (speed, mode, m);
2439           new_limit.cost = best_cost.cost - op_cost;
2440           new_limit.latency = best_cost.latency - op_cost;
2441           synth_mult (alg_in, q, &new_limit, mode);
2442
2443           alg_in->cost.cost += op_cost;
2444           alg_in->cost.latency += op_cost;
2445           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2446             {
2447               struct algorithm *x;
2448               best_cost = alg_in->cost;
2449               x = alg_in, alg_in = best_alg, best_alg = x;
2450               best_alg->log[best_alg->ops] = m;
2451               best_alg->op[best_alg->ops] = alg_shift;
2452             }
2453
2454           /* See if treating ORIG_T as a signed number yields a better
2455              sequence.  Try this sequence only for a negative ORIG_T
2456              as it would be useless for a non-negative ORIG_T.  */
2457           if ((HOST_WIDE_INT) orig_t < 0)
2458             {
2459               /* Shift ORIG_T as follows because a right shift of a
2460                  negative-valued signed type is implementation
2461                  defined.  */
2462               q = ~(~orig_t >> m);
2463               /* The function expand_shift will choose between a shift
2464                  and a sequence of additions, so the observed cost is
2465                  given as MIN (m * add_cost(speed, mode),
2466                  shift_cost(speed, mode, m)).  */
2467               op_cost = m * add_cost (speed, mode);
2468               if (shift_cost (speed, mode, m) < op_cost)
2469                 op_cost = shift_cost (speed, mode, m);
2470               new_limit.cost = best_cost.cost - op_cost;
2471               new_limit.latency = best_cost.latency - op_cost;
2472               synth_mult (alg_in, q, &new_limit, mode);
2473
2474               alg_in->cost.cost += op_cost;
2475               alg_in->cost.latency += op_cost;
2476               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2477                 {
2478                   struct algorithm *x;
2479                   best_cost = alg_in->cost;
2480                   x = alg_in, alg_in = best_alg, best_alg = x;
2481                   best_alg->log[best_alg->ops] = m;
2482                   best_alg->op[best_alg->ops] = alg_shift;
2483                 }
2484             }
2485         }
2486       if (cache_hit)
2487         goto done;
2488     }
2489
2490   /* If we have an odd number, add or subtract one.  */
2491   if ((t & 1) != 0)
2492     {
2493       unsigned HOST_WIDE_INT w;
2494
2495     do_alg_addsub_t_m2:
2496       for (w = 1; (w & t) != 0; w <<= 1)
2497         ;
2498       /* If T was -1, then W will be zero after the loop.  This is another
2499          case where T ends with ...111.  Handling this with (T + 1) and
2500          subtract 1 produces slightly better code and results in algorithm
2501          selection much faster than treating it like the ...0111 case
2502          below.  */
2503       if (w == 0
2504           || (w > 2
2505               /* Reject the case where t is 3.
2506                  Thus we prefer addition in that case.  */
2507               && t != 3))
2508         {
2509           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2510
2511           op_cost = add_cost (speed, mode);
2512           new_limit.cost = best_cost.cost - op_cost;
2513           new_limit.latency = best_cost.latency - op_cost;
2514           synth_mult (alg_in, t + 1, &new_limit, mode);
2515
2516           alg_in->cost.cost += op_cost;
2517           alg_in->cost.latency += op_cost;
2518           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2519             {
2520               struct algorithm *x;
2521               best_cost = alg_in->cost;
2522               x = alg_in, alg_in = best_alg, best_alg = x;
2523               best_alg->log[best_alg->ops] = 0;
2524               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2525             }
2526         }
2527       else
2528         {
2529           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2530
2531           op_cost = add_cost (speed, mode);
2532           new_limit.cost = best_cost.cost - op_cost;
2533           new_limit.latency = best_cost.latency - op_cost;
2534           synth_mult (alg_in, t - 1, &new_limit, mode);
2535
2536           alg_in->cost.cost += op_cost;
2537           alg_in->cost.latency += op_cost;
2538           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2539             {
2540               struct algorithm *x;
2541               best_cost = alg_in->cost;
2542               x = alg_in, alg_in = best_alg, best_alg = x;
2543               best_alg->log[best_alg->ops] = 0;
2544               best_alg->op[best_alg->ops] = alg_add_t_m2;
2545             }
2546         }
2547
2548       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2549          quickly with a - a * n for some appropriate constant n.  */
2550       m = exact_log2 (-orig_t + 1);
2551       if (m >= 0 && m < maxm)
2552         {
2553           op_cost = shiftsub1_cost (speed, mode, m);
2554           new_limit.cost = best_cost.cost - op_cost;
2555           new_limit.latency = best_cost.latency - op_cost;
2556           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2557                       &new_limit, mode);
2558
2559           alg_in->cost.cost += op_cost;
2560           alg_in->cost.latency += op_cost;
2561           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2562             {
2563               struct algorithm *x;
2564               best_cost = alg_in->cost;
2565               x = alg_in, alg_in = best_alg, best_alg = x;
2566               best_alg->log[best_alg->ops] = m;
2567               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2568             }
2569         }
2570
2571       if (cache_hit)
2572         goto done;
2573     }
2574
2575   /* Look for factors of t of the form
2576      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2577      If we find such a factor, we can multiply by t using an algorithm that
2578      multiplies by q, shift the result by m and add/subtract it to itself.
2579
2580      We search for large factors first and loop down, even if large factors
2581      are less probable than small; if we find a large factor we will find a
2582      good sequence quickly, and therefore be able to prune (by decreasing
2583      COST_LIMIT) the search.  */
2584
2585  do_alg_addsub_factor:
2586   for (m = floor_log2 (t - 1); m >= 2; m--)
2587     {
2588       unsigned HOST_WIDE_INT d;
2589
2590       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2591       if (t % d == 0 && t > d && m < maxm
2592           && (!cache_hit || cache_alg == alg_add_factor))
2593         {
2594           /* If the target has a cheap shift-and-add instruction use
2595              that in preference to a shift insn followed by an add insn.
2596              Assume that the shift-and-add is "atomic" with a latency
2597              equal to its cost, otherwise assume that on superscalar
2598              hardware the shift may be executed concurrently with the
2599              earlier steps in the algorithm.  */
2600           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2601           if (shiftadd_cost (speed, mode, m) < op_cost)
2602             {
2603               op_cost = shiftadd_cost (speed, mode, m);
2604               op_latency = op_cost;
2605             }
2606           else
2607             op_latency = add_cost (speed, mode);
2608
2609           new_limit.cost = best_cost.cost - op_cost;
2610           new_limit.latency = best_cost.latency - op_latency;
2611           synth_mult (alg_in, t / d, &new_limit, mode);
2612
2613           alg_in->cost.cost += op_cost;
2614           alg_in->cost.latency += op_latency;
2615           if (alg_in->cost.latency < op_cost)
2616             alg_in->cost.latency = op_cost;
2617           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2618             {
2619               struct algorithm *x;
2620               best_cost = alg_in->cost;
2621               x = alg_in, alg_in = best_alg, best_alg = x;
2622               best_alg->log[best_alg->ops] = m;
2623               best_alg->op[best_alg->ops] = alg_add_factor;
2624             }
2625           /* Other factors will have been taken care of in the recursion.  */
2626           break;
2627         }
2628
2629       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2630       if (t % d == 0 && t > d && m < maxm
2631           && (!cache_hit || cache_alg == alg_sub_factor))
2632         {
2633           /* If the target has a cheap shift-and-subtract insn use
2634              that in preference to a shift insn followed by a sub insn.
2635              Assume that the shift-and-sub is "atomic" with a latency
2636              equal to it's cost, otherwise assume that on superscalar
2637              hardware the shift may be executed concurrently with the
2638              earlier steps in the algorithm.  */
2639           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2640           if (shiftsub0_cost (speed, mode, m) < op_cost)
2641             {
2642               op_cost = shiftsub0_cost (speed, mode, m);
2643               op_latency = op_cost;
2644             }
2645           else
2646             op_latency = add_cost (speed, mode);
2647
2648           new_limit.cost = best_cost.cost - op_cost;
2649           new_limit.latency = best_cost.latency - op_latency;
2650           synth_mult (alg_in, t / d, &new_limit, mode);
2651
2652           alg_in->cost.cost += op_cost;
2653           alg_in->cost.latency += op_latency;
2654           if (alg_in->cost.latency < op_cost)
2655             alg_in->cost.latency = op_cost;
2656           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2657             {
2658               struct algorithm *x;
2659               best_cost = alg_in->cost;
2660               x = alg_in, alg_in = best_alg, best_alg = x;
2661               best_alg->log[best_alg->ops] = m;
2662               best_alg->op[best_alg->ops] = alg_sub_factor;
2663             }
2664           break;
2665         }
2666     }
2667   if (cache_hit)
2668     goto done;
2669
2670   /* Try shift-and-add (load effective address) instructions,
2671      i.e. do a*3, a*5, a*9.  */
2672   if ((t & 1) != 0)
2673     {
2674     do_alg_add_t2_m:
2675       q = t - 1;
2676       q = q & -q;
2677       m = exact_log2 (q);
2678       if (m >= 0 && m < maxm)
2679         {
2680           op_cost = shiftadd_cost (speed, mode, m);
2681           new_limit.cost = best_cost.cost - op_cost;
2682           new_limit.latency = best_cost.latency - op_cost;
2683           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2684
2685           alg_in->cost.cost += op_cost;
2686           alg_in->cost.latency += op_cost;
2687           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2688             {
2689               struct algorithm *x;
2690               best_cost = alg_in->cost;
2691               x = alg_in, alg_in = best_alg, best_alg = x;
2692               best_alg->log[best_alg->ops] = m;
2693               best_alg->op[best_alg->ops] = alg_add_t2_m;
2694             }
2695         }
2696       if (cache_hit)
2697         goto done;
2698
2699     do_alg_sub_t2_m:
2700       q = t + 1;
2701       q = q & -q;
2702       m = exact_log2 (q);
2703       if (m >= 0 && m < maxm)
2704         {
2705           op_cost = shiftsub0_cost (speed, mode, m);
2706           new_limit.cost = best_cost.cost - op_cost;
2707           new_limit.latency = best_cost.latency - op_cost;
2708           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2709
2710           alg_in->cost.cost += op_cost;
2711           alg_in->cost.latency += op_cost;
2712           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2713             {
2714               struct algorithm *x;
2715               best_cost = alg_in->cost;
2716               x = alg_in, alg_in = best_alg, best_alg = x;
2717               best_alg->log[best_alg->ops] = m;
2718               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2719             }
2720         }
2721       if (cache_hit)
2722         goto done;
2723     }
2724
2725  done:
2726   /* If best_cost has not decreased, we have not found any algorithm.  */
2727   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2728     {
2729       /* We failed to find an algorithm.  Record alg_impossible for
2730          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2731          we are asked to find an algorithm for T within the same or
2732          lower COST_LIMIT, we can immediately return to the
2733          caller.  */
2734       entry_ptr->t = t;
2735       entry_ptr->mode = mode;
2736       entry_ptr->speed = speed;
2737       entry_ptr->alg = alg_impossible;
2738       entry_ptr->cost = *cost_limit;
2739       return;
2740     }
2741
2742   /* Cache the result.  */
2743   if (!cache_hit)
2744     {
2745       entry_ptr->t = t;
2746       entry_ptr->mode = mode;
2747       entry_ptr->speed = speed;
2748       entry_ptr->alg = best_alg->op[best_alg->ops];
2749       entry_ptr->cost.cost = best_cost.cost;
2750       entry_ptr->cost.latency = best_cost.latency;
2751     }
2752
2753   /* If we are getting a too long sequence for `struct algorithm'
2754      to record, make this search fail.  */
2755   if (best_alg->ops == MAX_BITS_PER_WORD)
2756     return;
2757
2758   /* Copy the algorithm from temporary space to the space at alg_out.
2759      We avoid using structure assignment because the majority of
2760      best_alg is normally undefined, and this is a critical function.  */
2761   alg_out->ops = best_alg->ops + 1;
2762   alg_out->cost = best_cost;
2763   memcpy (alg_out->op, best_alg->op,
2764           alg_out->ops * sizeof *alg_out->op);
2765   memcpy (alg_out->log, best_alg->log,
2766           alg_out->ops * sizeof *alg_out->log);
2767 }
2768 \f
2769 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2770    Try three variations:
2771
2772        - a shift/add sequence based on VAL itself
2773        - a shift/add sequence based on -VAL, followed by a negation
2774        - a shift/add sequence based on VAL - 1, followed by an addition.
2775
2776    Return true if the cheapest of these cost less than MULT_COST,
2777    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2778
2779 static bool
2780 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2781                      struct algorithm *alg, enum mult_variant *variant,
2782                      int mult_cost)
2783 {
2784   struct algorithm alg2;
2785   struct mult_cost limit;
2786   int op_cost;
2787   bool speed = optimize_insn_for_speed_p ();
2788
2789   /* Fail quickly for impossible bounds.  */
2790   if (mult_cost < 0)
2791     return false;
2792
2793   /* Ensure that mult_cost provides a reasonable upper bound.
2794      Any constant multiplication can be performed with less
2795      than 2 * bits additions.  */
2796   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
2797   if (mult_cost > op_cost)
2798     mult_cost = op_cost;
2799
2800   *variant = basic_variant;
2801   limit.cost = mult_cost;
2802   limit.latency = mult_cost;
2803   synth_mult (alg, val, &limit, mode);
2804
2805   /* This works only if the inverted value actually fits in an
2806      `unsigned int' */
2807   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
2808     {
2809       op_cost = neg_cost(speed, mode);
2810       if (MULT_COST_LESS (&alg->cost, mult_cost))
2811         {
2812           limit.cost = alg->cost.cost - op_cost;
2813           limit.latency = alg->cost.latency - op_cost;
2814         }
2815       else
2816         {
2817           limit.cost = mult_cost - op_cost;
2818           limit.latency = mult_cost - op_cost;
2819         }
2820
2821       synth_mult (&alg2, -val, &limit, mode);
2822       alg2.cost.cost += op_cost;
2823       alg2.cost.latency += op_cost;
2824       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2825         *alg = alg2, *variant = negate_variant;
2826     }
2827
2828   /* This proves very useful for division-by-constant.  */
2829   op_cost = add_cost (speed, mode);
2830   if (MULT_COST_LESS (&alg->cost, mult_cost))
2831     {
2832       limit.cost = alg->cost.cost - op_cost;
2833       limit.latency = alg->cost.latency - op_cost;
2834     }
2835   else
2836     {
2837       limit.cost = mult_cost - op_cost;
2838       limit.latency = mult_cost - op_cost;
2839     }
2840
2841   synth_mult (&alg2, val - 1, &limit, mode);
2842   alg2.cost.cost += op_cost;
2843   alg2.cost.latency += op_cost;
2844   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2845     *alg = alg2, *variant = add_variant;
2846
2847   return MULT_COST_LESS (&alg->cost, mult_cost);
2848 }
2849
2850 /* A subroutine of expand_mult, used for constant multiplications.
2851    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2852    convenient.  Use the shift/add sequence described by ALG and apply
2853    the final fixup specified by VARIANT.  */
2854
2855 static rtx
2856 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2857                    rtx target, const struct algorithm *alg,
2858                    enum mult_variant variant)
2859 {
2860   HOST_WIDE_INT val_so_far;
2861   rtx insn, accum, tem;
2862   int opno;
2863   enum machine_mode nmode;
2864
2865   /* Avoid referencing memory over and over and invalid sharing
2866      on SUBREGs.  */
2867   op0 = force_reg (mode, op0);
2868
2869   /* ACCUM starts out either as OP0 or as a zero, depending on
2870      the first operation.  */
2871
2872   if (alg->op[0] == alg_zero)
2873     {
2874       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
2875       val_so_far = 0;
2876     }
2877   else if (alg->op[0] == alg_m)
2878     {
2879       accum = copy_to_mode_reg (mode, op0);
2880       val_so_far = 1;
2881     }
2882   else
2883     gcc_unreachable ();
2884
2885   for (opno = 1; opno < alg->ops; opno++)
2886     {
2887       int log = alg->log[opno];
2888       rtx shift_subtarget = optimize ? 0 : accum;
2889       rtx add_target
2890         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2891            && !optimize)
2892           ? target : 0;
2893       rtx accum_target = optimize ? 0 : accum;
2894       rtx accum_inner;
2895
2896       switch (alg->op[opno])
2897         {
2898         case alg_shift:
2899           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2900           /* REG_EQUAL note will be attached to the following insn.  */
2901           emit_move_insn (accum, tem);
2902           val_so_far <<= log;
2903           break;
2904
2905         case alg_add_t_m2:
2906           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2907           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2908                                  add_target ? add_target : accum_target);
2909           val_so_far += (HOST_WIDE_INT) 1 << log;
2910           break;
2911
2912         case alg_sub_t_m2:
2913           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2914           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2915                                  add_target ? add_target : accum_target);
2916           val_so_far -= (HOST_WIDE_INT) 1 << log;
2917           break;
2918
2919         case alg_add_t2_m:
2920           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2921                                 log, shift_subtarget, 0);
2922           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2923                                  add_target ? add_target : accum_target);
2924           val_so_far = (val_so_far << log) + 1;
2925           break;
2926
2927         case alg_sub_t2_m:
2928           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2929                                 log, shift_subtarget, 0);
2930           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2931                                  add_target ? add_target : accum_target);
2932           val_so_far = (val_so_far << log) - 1;
2933           break;
2934
2935         case alg_add_factor:
2936           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2937           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2938                                  add_target ? add_target : accum_target);
2939           val_so_far += val_so_far << log;
2940           break;
2941
2942         case alg_sub_factor:
2943           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2944           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2945                                  (add_target
2946                                   ? add_target : (optimize ? 0 : tem)));
2947           val_so_far = (val_so_far << log) - val_so_far;
2948           break;
2949
2950         default:
2951           gcc_unreachable ();
2952         }
2953
2954       if (SCALAR_INT_MODE_P (mode))
2955         {
2956           /* Write a REG_EQUAL note on the last insn so that we can cse
2957              multiplication sequences.  Note that if ACCUM is a SUBREG,
2958              we've set the inner register and must properly indicate that.  */
2959           tem = op0, nmode = mode;
2960           accum_inner = accum;
2961           if (GET_CODE (accum) == SUBREG)
2962             {
2963               accum_inner = SUBREG_REG (accum);
2964               nmode = GET_MODE (accum_inner);
2965               tem = gen_lowpart (nmode, op0);
2966             }
2967
2968           insn = get_last_insn ();
2969           set_dst_reg_note (insn, REG_EQUAL,
2970                             gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)),
2971                             accum_inner);
2972         }
2973     }
2974
2975   if (variant == negate_variant)
2976     {
2977       val_so_far = -val_so_far;
2978       accum = expand_unop (mode, neg_optab, accum, target, 0);
2979     }
2980   else if (variant == add_variant)
2981     {
2982       val_so_far = val_so_far + 1;
2983       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
2984     }
2985
2986   /* Compare only the bits of val and val_so_far that are significant
2987      in the result mode, to avoid sign-/zero-extension confusion.  */
2988   nmode = GET_MODE_INNER (mode);
2989   if (nmode == VOIDmode)
2990     nmode = mode;
2991   val &= GET_MODE_MASK (nmode);
2992   val_so_far &= GET_MODE_MASK (nmode);
2993   gcc_assert (val == val_so_far);
2994
2995   return accum;
2996 }
2997
2998 /* Perform a multiplication and return an rtx for the result.
2999    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3000    TARGET is a suggestion for where to store the result (an rtx).
3001
3002    We check specially for a constant integer as OP1.
3003    If you want this check for OP0 as well, then before calling
3004    you should swap the two operands if OP0 would be constant.  */
3005
3006 rtx
3007 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3008              int unsignedp)
3009 {
3010   enum mult_variant variant;
3011   struct algorithm algorithm;
3012   rtx scalar_op1;
3013   int max_cost;
3014   bool speed = optimize_insn_for_speed_p ();
3015   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3016
3017   if (CONSTANT_P (op0))
3018     {
3019       rtx temp = op0;
3020       op0 = op1;
3021       op1 = temp;
3022     }
3023
3024   /* For vectors, there are several simplifications that can be made if
3025      all elements of the vector constant are identical.  */
3026   scalar_op1 = op1;
3027   if (GET_CODE (op1) == CONST_VECTOR)
3028     {
3029       int i, n = CONST_VECTOR_NUNITS (op1);
3030       scalar_op1 = CONST_VECTOR_ELT (op1, 0);
3031       for (i = 1; i < n; ++i)
3032         if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i)))
3033           goto skip_scalar;
3034     }
3035
3036   if (INTEGRAL_MODE_P (mode))
3037     {
3038       rtx fake_reg;
3039       HOST_WIDE_INT coeff;
3040       bool is_neg;
3041       int mode_bitsize;
3042
3043       if (op1 == CONST0_RTX (mode))
3044         return op1;
3045       if (op1 == CONST1_RTX (mode))
3046         return op0;
3047       if (op1 == CONSTM1_RTX (mode))
3048         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3049                             op0, target, 0);
3050
3051       if (do_trapv)
3052         goto skip_synth;
3053
3054       /* These are the operations that are potentially turned into
3055          a sequence of shifts and additions.  */
3056       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3057
3058       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3059          less than or equal in size to `unsigned int' this doesn't matter.
3060          If the mode is larger than `unsigned int', then synth_mult works
3061          only if the constant value exactly fits in an `unsigned int' without
3062          any truncation.  This means that multiplying by negative values does
3063          not work; results are off by 2^32 on a 32 bit machine.  */
3064
3065       if (CONST_INT_P (scalar_op1))
3066         {
3067           coeff = INTVAL (scalar_op1);
3068           is_neg = coeff < 0;
3069         }
3070       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3071         {
3072           /* If we are multiplying in DImode, it may still be a win
3073              to try to work with shifts and adds.  */
3074           if (CONST_DOUBLE_HIGH (scalar_op1) == 0
3075               && CONST_DOUBLE_LOW (scalar_op1) > 0)
3076             {
3077               coeff = CONST_DOUBLE_LOW (scalar_op1);
3078               is_neg = false;
3079             }
3080           else if (CONST_DOUBLE_LOW (scalar_op1) == 0)
3081             {
3082               coeff = CONST_DOUBLE_HIGH (scalar_op1);
3083               if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3084                 {
3085                   int shift = floor_log2 (coeff) + HOST_BITS_PER_WIDE_INT;
3086                   if (shift < HOST_BITS_PER_DOUBLE_INT - 1
3087                       || mode_bitsize <= HOST_BITS_PER_DOUBLE_INT)
3088                     return expand_shift (LSHIFT_EXPR, mode, op0,
3089                                          shift, target, unsignedp);
3090                 }
3091               goto skip_synth;
3092             }
3093           else
3094             goto skip_synth;
3095         }
3096       else
3097         goto skip_synth;
3098
3099       /* We used to test optimize here, on the grounds that it's better to
3100          produce a smaller program when -O is not used.  But this causes
3101          such a terrible slowdown sometimes that it seems better to always
3102          use synth_mult.  */
3103
3104       /* Special case powers of two.  */
3105       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3106         return expand_shift (LSHIFT_EXPR, mode, op0,
3107                              floor_log2 (coeff), target, unsignedp);
3108
3109       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3110
3111       /* Attempt to handle multiplication of DImode values by negative
3112          coefficients, by performing the multiplication by a positive
3113          multiplier and then inverting the result.  */
3114       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3115         {
3116           /* Its safe to use -coeff even for INT_MIN, as the
3117              result is interpreted as an unsigned coefficient.
3118              Exclude cost of op0 from max_cost to match the cost
3119              calculation of the synth_mult.  */
3120           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
3121                       - neg_cost(speed, mode));
3122           if (max_cost > 0
3123               && choose_mult_variant (mode, -coeff, &algorithm,
3124                                       &variant, max_cost))
3125             {
3126               rtx temp = expand_mult_const (mode, op0, -coeff, NULL_RTX,
3127                                             &algorithm, variant);
3128               return expand_unop (mode, neg_optab, temp, target, 0);
3129             }
3130           goto skip_synth;
3131         }
3132
3133       /* Exclude cost of op0 from max_cost to match the cost
3134          calculation of the synth_mult.  */
3135       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3136       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3137         return expand_mult_const (mode, op0, coeff, target,
3138                                   &algorithm, variant);
3139     }
3140  skip_synth:
3141
3142   /* Expand x*2.0 as x+x.  */
3143   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1))
3144     {
3145       REAL_VALUE_TYPE d;
3146       REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1);
3147
3148       if (REAL_VALUES_EQUAL (d, dconst2))
3149         {
3150           op0 = force_reg (GET_MODE (op0), op0);
3151           return expand_binop (mode, add_optab, op0, op0,
3152                                target, unsignedp, OPTAB_LIB_WIDEN);
3153         }
3154     }
3155  skip_scalar:
3156
3157   /* This used to use umul_optab if unsigned, but for non-widening multiply
3158      there is no difference between signed and unsigned.  */
3159   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3160                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3161   gcc_assert (op0);
3162   return op0;
3163 }
3164
3165 /* Return a cost estimate for multiplying a register by the given
3166    COEFFicient in the given MODE and SPEED.  */
3167
3168 int
3169 mult_by_coeff_cost (HOST_WIDE_INT coeff, enum machine_mode mode, bool speed)
3170 {
3171   int max_cost;
3172   struct algorithm algorithm;
3173   enum mult_variant variant;
3174
3175   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3176   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), speed);
3177   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3178     return algorithm.cost.cost;
3179   else
3180     return max_cost;
3181 }
3182
3183 /* Perform a widening multiplication and return an rtx for the result.
3184    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3185    TARGET is a suggestion for where to store the result (an rtx).
3186    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3187    or smul_widen_optab.
3188
3189    We check specially for a constant integer as OP1, comparing the
3190    cost of a widening multiply against the cost of a sequence of shifts
3191    and adds.  */
3192
3193 rtx
3194 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3195                       int unsignedp, optab this_optab)
3196 {
3197   bool speed = optimize_insn_for_speed_p ();
3198   rtx cop1;
3199
3200   if (CONST_INT_P (op1)
3201       && GET_MODE (op0) != VOIDmode
3202       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3203                                 this_optab == umul_widen_optab))
3204       && CONST_INT_P (cop1)
3205       && (INTVAL (cop1) >= 0
3206           || HWI_COMPUTABLE_MODE_P (mode)))
3207     {
3208       HOST_WIDE_INT coeff = INTVAL (cop1);
3209       int max_cost;
3210       enum mult_variant variant;
3211       struct algorithm algorithm;
3212
3213       /* Special case powers of two.  */
3214       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3215         {
3216           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3217           return expand_shift (LSHIFT_EXPR, mode, op0,
3218                                floor_log2 (coeff), target, unsignedp);
3219         }
3220
3221       /* Exclude cost of op0 from max_cost to match the cost
3222          calculation of the synth_mult.  */
3223       max_cost = mul_widen_cost (speed, mode);
3224       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3225                                max_cost))
3226         {
3227           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3228           return expand_mult_const (mode, op0, coeff, target,
3229                                     &algorithm, variant);
3230         }
3231     }
3232   return expand_binop (mode, this_optab, op0, op1, target,
3233                        unsignedp, OPTAB_LIB_WIDEN);
3234 }
3235 \f
3236 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3237    replace division by D, and put the least significant N bits of the result
3238    in *MULTIPLIER_PTR and return the most significant bit.
3239
3240    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3241    needed precision is in PRECISION (should be <= N).
3242
3243    PRECISION should be as small as possible so this function can choose
3244    multiplier more freely.
3245
3246    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3247    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3248
3249    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3250    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3251
3252 unsigned HOST_WIDE_INT
3253 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3254                    unsigned HOST_WIDE_INT *multiplier_ptr,
3255                    int *post_shift_ptr, int *lgup_ptr)
3256 {
3257   double_int mhigh, mlow;
3258   int lgup, post_shift;
3259   int pow, pow2;
3260
3261   /* lgup = ceil(log2(divisor)); */
3262   lgup = ceil_log2 (d);
3263
3264   gcc_assert (lgup <= n);
3265
3266   pow = n + lgup;
3267   pow2 = n + lgup - precision;
3268
3269   /* We could handle this with some effort, but this case is much
3270      better handled directly with a scc insn, so rely on caller using
3271      that.  */
3272   gcc_assert (pow != HOST_BITS_PER_DOUBLE_INT);
3273
3274   /* mlow = 2^(N + lgup)/d */
3275   double_int val = double_int_zero.set_bit (pow);
3276   mlow = val.div (double_int::from_uhwi (d), true, TRUNC_DIV_EXPR);
3277
3278   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3279   val |= double_int_zero.set_bit (pow2);
3280   mhigh = val.div (double_int::from_uhwi (d), true, TRUNC_DIV_EXPR);
3281
3282   gcc_assert (!mhigh.high || val.high - d < d);
3283   gcc_assert (mhigh.high <= 1 && mlow.high <= 1);
3284   /* Assert that mlow < mhigh.  */
3285   gcc_assert (mlow.ult (mhigh));
3286
3287   /* If precision == N, then mlow, mhigh exceed 2^N
3288      (but they do not exceed 2^(N+1)).  */
3289
3290   /* Reduce to lowest terms.  */
3291   for (post_shift = lgup; post_shift > 0; post_shift--)
3292     {
3293       int shft = HOST_BITS_PER_WIDE_INT - 1;
3294       unsigned HOST_WIDE_INT ml_lo = (mlow.high << shft) | (mlow.low >> 1);
3295       unsigned HOST_WIDE_INT mh_lo = (mhigh.high << shft) | (mhigh.low >> 1);
3296       if (ml_lo >= mh_lo)
3297         break;
3298
3299       mlow = double_int::from_uhwi (ml_lo);
3300       mhigh = double_int::from_uhwi (mh_lo);
3301     }
3302
3303   *post_shift_ptr = post_shift;
3304   *lgup_ptr = lgup;
3305   if (n < HOST_BITS_PER_WIDE_INT)
3306     {
3307       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3308       *multiplier_ptr = mhigh.low & mask;
3309       return mhigh.low >= mask;
3310     }
3311   else
3312     {
3313       *multiplier_ptr = mhigh.low;
3314       return mhigh.high;
3315     }
3316 }
3317
3318 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3319    congruent to 1 (mod 2**N).  */
3320
3321 static unsigned HOST_WIDE_INT
3322 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3323 {
3324   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3325
3326   /* The algorithm notes that the choice y = x satisfies
3327      x*y == 1 mod 2^3, since x is assumed odd.
3328      Each iteration doubles the number of bits of significance in y.  */
3329
3330   unsigned HOST_WIDE_INT mask;
3331   unsigned HOST_WIDE_INT y = x;
3332   int nbit = 3;
3333
3334   mask = (n == HOST_BITS_PER_WIDE_INT
3335           ? ~(unsigned HOST_WIDE_INT) 0
3336           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3337
3338   while (nbit < n)
3339     {
3340       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3341       nbit *= 2;
3342     }
3343   return y;
3344 }
3345
3346 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3347    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3348    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3349    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3350    become signed.
3351
3352    The result is put in TARGET if that is convenient.
3353
3354    MODE is the mode of operation.  */
3355
3356 rtx
3357 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3358                              rtx op1, rtx target, int unsignedp)
3359 {
3360   rtx tem;
3361   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3362
3363   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3364                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3365   tem = expand_and (mode, tem, op1, NULL_RTX);
3366   adj_operand
3367     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3368                      adj_operand);
3369
3370   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3371                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3372   tem = expand_and (mode, tem, op0, NULL_RTX);
3373   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3374                           target);
3375
3376   return target;
3377 }
3378
3379 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3380
3381 static rtx
3382 extract_high_half (enum machine_mode mode, rtx op)
3383 {
3384   enum machine_mode wider_mode;
3385
3386   if (mode == word_mode)
3387     return gen_highpart (mode, op);
3388
3389   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3390
3391   wider_mode = GET_MODE_WIDER_MODE (mode);
3392   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3393                      GET_MODE_BITSIZE (mode), 0, 1);
3394   return convert_modes (mode, wider_mode, op, 0);
3395 }
3396
3397 /* Like expmed_mult_highpart, but only consider using a multiplication
3398    optab.  OP1 is an rtx for the constant operand.  */
3399
3400 static rtx
3401 expmed_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3402                             rtx target, int unsignedp, int max_cost)
3403 {
3404   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3405   enum machine_mode wider_mode;
3406   optab moptab;
3407   rtx tem;
3408   int size;
3409   bool speed = optimize_insn_for_speed_p ();
3410
3411   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3412
3413   wider_mode = GET_MODE_WIDER_MODE (mode);
3414   size = GET_MODE_BITSIZE (mode);
3415
3416   /* Firstly, try using a multiplication insn that only generates the needed
3417      high part of the product, and in the sign flavor of unsignedp.  */
3418   if (mul_highpart_cost (speed, mode) < max_cost)
3419     {
3420       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3421       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3422                           unsignedp, OPTAB_DIRECT);
3423       if (tem)
3424         return tem;
3425     }
3426
3427   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3428      Need to adjust the result after the multiplication.  */
3429   if (size - 1 < BITS_PER_WORD
3430       && (mul_highpart_cost (speed, mode)
3431           + 2 * shift_cost (speed, mode, size-1)
3432           + 4 * add_cost (speed, mode) < max_cost))
3433     {
3434       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3435       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3436                           unsignedp, OPTAB_DIRECT);
3437       if (tem)
3438         /* We used the wrong signedness.  Adjust the result.  */
3439         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3440                                             tem, unsignedp);
3441     }
3442
3443   /* Try widening multiplication.  */
3444   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3445   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3446       && mul_widen_cost (speed, wider_mode) < max_cost)
3447     {
3448       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3449                           unsignedp, OPTAB_WIDEN);
3450       if (tem)
3451         return extract_high_half (mode, tem);
3452     }
3453
3454   /* Try widening the mode and perform a non-widening multiplication.  */
3455   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3456       && size - 1 < BITS_PER_WORD
3457       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3458           < max_cost))
3459     {
3460       rtx insns, wop0, wop1;
3461
3462       /* We need to widen the operands, for example to ensure the
3463          constant multiplier is correctly sign or zero extended.
3464          Use a sequence to clean-up any instructions emitted by
3465          the conversions if things don't work out.  */
3466       start_sequence ();
3467       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3468       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3469       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3470                           unsignedp, OPTAB_WIDEN);
3471       insns = get_insns ();
3472       end_sequence ();
3473
3474       if (tem)
3475         {
3476           emit_insn (insns);
3477           return extract_high_half (mode, tem);
3478         }
3479     }
3480
3481   /* Try widening multiplication of opposite signedness, and adjust.  */
3482   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3483   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3484       && size - 1 < BITS_PER_WORD
3485       && (mul_widen_cost (speed, wider_mode)
3486           + 2 * shift_cost (speed, mode, size-1)
3487           + 4 * add_cost (speed, mode) < max_cost))
3488     {
3489       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3490                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3491       if (tem != 0)
3492         {
3493           tem = extract_high_half (mode, tem);
3494           /* We used the wrong signedness.  Adjust the result.  */
3495           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3496                                               target, unsignedp);
3497         }
3498     }
3499
3500   return 0;
3501 }
3502
3503 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3504    putting the high half of the result in TARGET if that is convenient,
3505    and return where the result is.  If the operation can not be performed,
3506    0 is returned.
3507
3508    MODE is the mode of operation and result.
3509
3510    UNSIGNEDP nonzero means unsigned multiply.
3511
3512    MAX_COST is the total allowed cost for the expanded RTL.  */
3513
3514 static rtx
3515 expmed_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3516                       rtx target, int unsignedp, int max_cost)
3517 {
3518   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3519   unsigned HOST_WIDE_INT cnst1;
3520   int extra_cost;
3521   bool sign_adjust = false;
3522   enum mult_variant variant;
3523   struct algorithm alg;
3524   rtx tem;
3525   bool speed = optimize_insn_for_speed_p ();
3526
3527   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3528   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3529   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3530
3531   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3532
3533   /* We can't optimize modes wider than BITS_PER_WORD.
3534      ??? We might be able to perform double-word arithmetic if
3535      mode == word_mode, however all the cost calculations in
3536      synth_mult etc. assume single-word operations.  */
3537   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3538     return expmed_mult_highpart_optab (mode, op0, op1, target,
3539                                        unsignedp, max_cost);
3540
3541   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3542
3543   /* Check whether we try to multiply by a negative constant.  */
3544   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3545     {
3546       sign_adjust = true;
3547       extra_cost += add_cost (speed, mode);
3548     }
3549
3550   /* See whether shift/add multiplication is cheap enough.  */
3551   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3552                            max_cost - extra_cost))
3553     {
3554       /* See whether the specialized multiplication optabs are
3555          cheaper than the shift/add version.  */
3556       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3557                                         alg.cost.cost + extra_cost);
3558       if (tem)
3559         return tem;
3560
3561       tem = convert_to_mode (wider_mode, op0, unsignedp);
3562       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3563       tem = extract_high_half (mode, tem);
3564
3565       /* Adjust result for signedness.  */
3566       if (sign_adjust)
3567         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3568
3569       return tem;
3570     }
3571   return expmed_mult_highpart_optab (mode, op0, op1, target,
3572                                      unsignedp, max_cost);
3573 }
3574
3575
3576 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3577
3578 static rtx
3579 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3580 {
3581   unsigned HOST_WIDE_INT masklow, maskhigh;
3582   rtx result, temp, shift, label;
3583   int logd;
3584
3585   logd = floor_log2 (d);
3586   result = gen_reg_rtx (mode);
3587
3588   /* Avoid conditional branches when they're expensive.  */
3589   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3590       && optimize_insn_for_speed_p ())
3591     {
3592       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3593                                       mode, 0, -1);
3594       if (signmask)
3595         {
3596           signmask = force_reg (mode, signmask);
3597           masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3598           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3599
3600           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3601              which instruction sequence to use.  If logical right shifts
3602              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3603              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3604
3605           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3606           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3607               || (set_src_cost (temp, optimize_insn_for_speed_p ())
3608                   > COSTS_N_INSNS (2)))
3609             {
3610               temp = expand_binop (mode, xor_optab, op0, signmask,
3611                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3612               temp = expand_binop (mode, sub_optab, temp, signmask,
3613                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3614               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3615                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3616               temp = expand_binop (mode, xor_optab, temp, signmask,
3617                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3618               temp = expand_binop (mode, sub_optab, temp, signmask,
3619                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3620             }
3621           else
3622             {
3623               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3624                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3625               signmask = force_reg (mode, signmask);
3626
3627               temp = expand_binop (mode, add_optab, op0, signmask,
3628                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3629               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3630                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3631               temp = expand_binop (mode, sub_optab, temp, signmask,
3632                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3633             }
3634           return temp;
3635         }
3636     }
3637
3638   /* Mask contains the mode's signbit and the significant bits of the
3639      modulus.  By including the signbit in the operation, many targets
3640      can avoid an explicit compare operation in the following comparison
3641      against zero.  */
3642
3643   masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3644   if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3645     {
3646       masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3647       maskhigh = -1;
3648     }
3649   else
3650     maskhigh = (HOST_WIDE_INT) -1
3651                  << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3652
3653   temp = expand_binop (mode, and_optab, op0,
3654                        immed_double_const (masklow, maskhigh, mode),
3655                        result, 1, OPTAB_LIB_WIDEN);
3656   if (temp != result)
3657     emit_move_insn (result, temp);
3658
3659   label = gen_label_rtx ();
3660   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3661
3662   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3663                        0, OPTAB_LIB_WIDEN);
3664   masklow = (HOST_WIDE_INT) -1 << logd;
3665   maskhigh = -1;
3666   temp = expand_binop (mode, ior_optab, temp,
3667                        immed_double_const (masklow, maskhigh, mode),
3668                        result, 1, OPTAB_LIB_WIDEN);
3669   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3670                        0, OPTAB_LIB_WIDEN);
3671   if (temp != result)
3672     emit_move_insn (result, temp);
3673   emit_label (label);
3674   return result;
3675 }
3676
3677 /* Expand signed division of OP0 by a power of two D in mode MODE.
3678    This routine is only called for positive values of D.  */
3679
3680 static rtx
3681 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3682 {
3683   rtx temp, label;
3684   int logd;
3685
3686   logd = floor_log2 (d);
3687
3688   if (d == 2
3689       && BRANCH_COST (optimize_insn_for_speed_p (),
3690                       false) >= 1)
3691     {
3692       temp = gen_reg_rtx (mode);
3693       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3694       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3695                            0, OPTAB_LIB_WIDEN);
3696       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3697     }
3698
3699 #ifdef HAVE_conditional_move
3700   if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3701       >= 2)
3702     {
3703       rtx temp2;
3704
3705       /* ??? emit_conditional_move forces a stack adjustment via
3706          compare_from_rtx so, if the sequence is discarded, it will
3707          be lost.  Do it now instead.  */
3708       do_pending_stack_adjust ();
3709
3710       start_sequence ();
3711       temp2 = copy_to_mode_reg (mode, op0);
3712       temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3713                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3714       temp = force_reg (mode, temp);
3715
3716       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3717       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3718                                      mode, temp, temp2, mode, 0);
3719       if (temp2)
3720         {
3721           rtx seq = get_insns ();
3722           end_sequence ();
3723           emit_insn (seq);
3724           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3725         }
3726       end_sequence ();
3727     }
3728 #endif
3729
3730   if (BRANCH_COST (optimize_insn_for_speed_p (),
3731                    false) >= 2)
3732     {
3733       int ushift = GET_MODE_BITSIZE (mode) - logd;
3734
3735       temp = gen_reg_rtx (mode);
3736       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3737       if (shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3738           > COSTS_N_INSNS (1))
3739         temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3740                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3741       else
3742         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3743                              ushift, NULL_RTX, 1);
3744       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3745                            0, OPTAB_LIB_WIDEN);
3746       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3747     }
3748
3749   label = gen_label_rtx ();
3750   temp = copy_to_mode_reg (mode, op0);
3751   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3752   expand_inc (temp, GEN_INT (d - 1));
3753   emit_label (label);
3754   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3755 }
3756 \f
3757 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3758    if that is convenient, and returning where the result is.
3759    You may request either the quotient or the remainder as the result;
3760    specify REM_FLAG nonzero to get the remainder.
3761
3762    CODE is the expression code for which kind of division this is;
3763    it controls how rounding is done.  MODE is the machine mode to use.
3764    UNSIGNEDP nonzero means do unsigned division.  */
3765
3766 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3767    and then correct it by or'ing in missing high bits
3768    if result of ANDI is nonzero.
3769    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3770    This could optimize to a bfexts instruction.
3771    But C doesn't use these operations, so their optimizations are
3772    left for later.  */
3773 /* ??? For modulo, we don't actually need the highpart of the first product,
3774    the low part will do nicely.  And for small divisors, the second multiply
3775    can also be a low-part only multiply or even be completely left out.
3776    E.g. to calculate the remainder of a division by 3 with a 32 bit
3777    multiply, multiply with 0x55555556 and extract the upper two bits;
3778    the result is exact for inputs up to 0x1fffffff.
3779    The input range can be reduced by using cross-sum rules.
3780    For odd divisors >= 3, the following table gives right shift counts
3781    so that if a number is shifted by an integer multiple of the given
3782    amount, the remainder stays the same:
3783    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3784    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3785    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3786    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3787    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3788
3789    Cross-sum rules for even numbers can be derived by leaving as many bits
3790    to the right alone as the divisor has zeros to the right.
3791    E.g. if x is an unsigned 32 bit number:
3792    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3793    */
3794
3795 rtx
3796 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3797                rtx op0, rtx op1, rtx target, int unsignedp)
3798 {
3799   enum machine_mode compute_mode;
3800   rtx tquotient;
3801   rtx quotient = 0, remainder = 0;
3802   rtx last;
3803   int size;
3804   rtx insn;
3805   optab optab1, optab2;
3806   int op1_is_constant, op1_is_pow2 = 0;
3807   int max_cost, extra_cost;
3808   static HOST_WIDE_INT last_div_const = 0;
3809   static HOST_WIDE_INT ext_op1;
3810   bool speed = optimize_insn_for_speed_p ();
3811
3812   op1_is_constant = CONST_INT_P (op1);
3813   if (op1_is_constant)
3814     {
3815       ext_op1 = INTVAL (op1);
3816       if (unsignedp)
3817         ext_op1 &= GET_MODE_MASK (mode);
3818       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3819                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3820     }
3821
3822   /*
3823      This is the structure of expand_divmod:
3824
3825      First comes code to fix up the operands so we can perform the operations
3826      correctly and efficiently.
3827
3828      Second comes a switch statement with code specific for each rounding mode.
3829      For some special operands this code emits all RTL for the desired
3830      operation, for other cases, it generates only a quotient and stores it in
3831      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3832      to indicate that it has not done anything.
3833
3834      Last comes code that finishes the operation.  If QUOTIENT is set and
3835      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3836      QUOTIENT is not set, it is computed using trunc rounding.
3837
3838      We try to generate special code for division and remainder when OP1 is a
3839      constant.  If |OP1| = 2**n we can use shifts and some other fast
3840      operations.  For other values of OP1, we compute a carefully selected
3841      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3842      by m.
3843
3844      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3845      half of the product.  Different strategies for generating the product are
3846      implemented in expmed_mult_highpart.
3847
3848      If what we actually want is the remainder, we generate that by another
3849      by-constant multiplication and a subtraction.  */
3850
3851   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3852      code below will malfunction if we are, so check here and handle
3853      the special case if so.  */
3854   if (op1 == const1_rtx)
3855     return rem_flag ? const0_rtx : op0;
3856
3857     /* When dividing by -1, we could get an overflow.
3858      negv_optab can handle overflows.  */
3859   if (! unsignedp && op1 == constm1_rtx)
3860     {
3861       if (rem_flag)
3862         return const0_rtx;
3863       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3864                           ? negv_optab : neg_optab, op0, target, 0);
3865     }
3866
3867   if (target
3868       /* Don't use the function value register as a target
3869          since we have to read it as well as write it,
3870          and function-inlining gets confused by this.  */
3871       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3872           /* Don't clobber an operand while doing a multi-step calculation.  */
3873           || ((rem_flag || op1_is_constant)
3874               && (reg_mentioned_p (target, op0)
3875                   || (MEM_P (op0) && MEM_P (target))))
3876           || reg_mentioned_p (target, op1)
3877           || (MEM_P (op1) && MEM_P (target))))
3878     target = 0;
3879
3880   /* Get the mode in which to perform this computation.  Normally it will
3881      be MODE, but sometimes we can't do the desired operation in MODE.
3882      If so, pick a wider mode in which we can do the operation.  Convert
3883      to that mode at the start to avoid repeated conversions.
3884
3885      First see what operations we need.  These depend on the expression
3886      we are evaluating.  (We assume that divxx3 insns exist under the
3887      same conditions that modxx3 insns and that these insns don't normally
3888      fail.  If these assumptions are not correct, we may generate less
3889      efficient code in some cases.)
3890
3891      Then see if we find a mode in which we can open-code that operation
3892      (either a division, modulus, or shift).  Finally, check for the smallest
3893      mode for which we can do the operation with a library call.  */
3894
3895   /* We might want to refine this now that we have division-by-constant
3896      optimization.  Since expmed_mult_highpart tries so many variants, it is
3897      not straightforward to generalize this.  Maybe we should make an array
3898      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3899
3900   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3901             ? (unsignedp ? lshr_optab : ashr_optab)
3902             : (unsignedp ? udiv_optab : sdiv_optab));
3903   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3904             ? optab1
3905             : (unsignedp ? udivmod_optab : sdivmod_optab));
3906
3907   for (compute_mode = mode; compute_mode != VOIDmode;
3908        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3909     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3910         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
3911       break;
3912
3913   if (compute_mode == VOIDmode)
3914     for (compute_mode = mode; compute_mode != VOIDmode;
3915          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3916       if (optab_libfunc (optab1, compute_mode)
3917           || optab_libfunc (optab2, compute_mode))
3918         break;
3919
3920   /* If we still couldn't find a mode, use MODE, but expand_binop will
3921      probably die.  */
3922   if (compute_mode == VOIDmode)
3923     compute_mode = mode;
3924
3925   if (target && GET_MODE (target) == compute_mode)
3926     tquotient = target;
3927   else
3928     tquotient = gen_reg_rtx (compute_mode);
3929
3930   size = GET_MODE_BITSIZE (compute_mode);
3931 #if 0
3932   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3933      (mode), and thereby get better code when OP1 is a constant.  Do that
3934      later.  It will require going over all usages of SIZE below.  */
3935   size = GET_MODE_BITSIZE (mode);
3936 #endif
3937
3938   /* Only deduct something for a REM if the last divide done was
3939      for a different constant.   Then set the constant of the last
3940      divide.  */
3941   max_cost = (unsignedp
3942               ? udiv_cost (speed, compute_mode)
3943               : sdiv_cost (speed, compute_mode));
3944   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
3945                      && INTVAL (op1) == last_div_const))
3946     max_cost -= (mul_cost (speed, compute_mode)
3947                  + add_cost (speed, compute_mode));
3948
3949   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3950
3951   /* Now convert to the best mode to use.  */
3952   if (compute_mode != mode)
3953     {
3954       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3955       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3956
3957       /* convert_modes may have placed op1 into a register, so we
3958          must recompute the following.  */
3959       op1_is_constant = CONST_INT_P (op1);
3960       op1_is_pow2 = (op1_is_constant
3961                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3962                           || (! unsignedp
3963                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3964     }
3965
3966   /* If one of the operands is a volatile MEM, copy it into a register.  */
3967
3968   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3969     op0 = force_reg (compute_mode, op0);
3970   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3971     op1 = force_reg (compute_mode, op1);
3972
3973   /* If we need the remainder or if OP1 is constant, we need to
3974      put OP0 in a register in case it has any queued subexpressions.  */
3975   if (rem_flag || op1_is_constant)
3976     op0 = force_reg (compute_mode, op0);
3977
3978   last = get_last_insn ();
3979
3980   /* Promote floor rounding to trunc rounding for unsigned operations.  */
3981   if (unsignedp)
3982     {
3983       if (code == FLOOR_DIV_EXPR)
3984         code = TRUNC_DIV_EXPR;
3985       if (code == FLOOR_MOD_EXPR)
3986         code = TRUNC_MOD_EXPR;
3987       if (code == EXACT_DIV_EXPR && op1_is_pow2)
3988         code = TRUNC_DIV_EXPR;
3989     }
3990
3991   if (op1 != const0_rtx)
3992     switch (code)
3993       {
3994       case TRUNC_MOD_EXPR:
3995       case TRUNC_DIV_EXPR:
3996         if (op1_is_constant)
3997           {
3998             if (unsignedp)
3999               {
4000                 unsigned HOST_WIDE_INT mh, ml;
4001                 int pre_shift, post_shift;
4002                 int dummy;
4003                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4004                                             & GET_MODE_MASK (compute_mode));
4005
4006                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4007                   {
4008                     pre_shift = floor_log2 (d);
4009                     if (rem_flag)
4010                       {
4011                         remainder
4012                           = expand_binop (compute_mode, and_optab, op0,
4013                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4014                                           remainder, 1,
4015                                           OPTAB_LIB_WIDEN);
4016                         if (remainder)
4017                           return gen_lowpart (mode, remainder);
4018                       }
4019                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4020                                              pre_shift, tquotient, 1);
4021                   }
4022                 else if (size <= HOST_BITS_PER_WIDE_INT)
4023                   {
4024                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4025                       {
4026                         /* Most significant bit of divisor is set; emit an scc
4027                            insn.  */
4028                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4029                                                           compute_mode, 1, 1);
4030                       }
4031                     else
4032                       {
4033                         /* Find a suitable multiplier and right shift count
4034                            instead of multiplying with D.  */
4035
4036                         mh = choose_multiplier (d, size, size,
4037                                                 &ml, &post_shift, &dummy);
4038
4039                         /* If the suggested multiplier is more than SIZE bits,
4040                            we can do better for even divisors, using an
4041                            initial right shift.  */
4042                         if (mh != 0 && (d & 1) == 0)
4043                           {
4044                             pre_shift = floor_log2 (d & -d);
4045                             mh = choose_multiplier (d >> pre_shift, size,
4046                                                     size - pre_shift,
4047                                                     &ml, &post_shift, &dummy);
4048                             gcc_assert (!mh);
4049                           }
4050                         else
4051                           pre_shift = 0;
4052
4053                         if (mh != 0)
4054                           {
4055                             rtx t1, t2, t3, t4;
4056
4057                             if (post_shift - 1 >= BITS_PER_WORD)
4058                               goto fail1;
4059
4060                             extra_cost
4061                               = (shift_cost (speed, compute_mode, post_shift - 1)
4062                                  + shift_cost (speed, compute_mode, 1)
4063                                  + 2 * add_cost (speed, compute_mode));
4064                             t1 = expmed_mult_highpart (compute_mode, op0,
4065                                                        GEN_INT (ml),
4066                                                        NULL_RTX, 1,
4067                                                        max_cost - extra_cost);
4068                             if (t1 == 0)
4069                               goto fail1;
4070                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4071                                                                op0, t1),
4072                                                 NULL_RTX);
4073                             t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4074                                                t2, 1, NULL_RTX, 1);
4075                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4076                                                               t1, t3),
4077                                                 NULL_RTX);
4078                             quotient = expand_shift
4079                               (RSHIFT_EXPR, compute_mode, t4,
4080                                post_shift - 1, tquotient, 1);
4081                           }
4082                         else
4083                           {
4084                             rtx t1, t2;
4085
4086                             if (pre_shift >= BITS_PER_WORD
4087                                 || post_shift >= BITS_PER_WORD)
4088                               goto fail1;
4089
4090                             t1 = expand_shift
4091                               (RSHIFT_EXPR, compute_mode, op0,
4092                                pre_shift, NULL_RTX, 1);
4093                             extra_cost
4094                               = (shift_cost (speed, compute_mode, pre_shift)
4095                                  + shift_cost (speed, compute_mode, post_shift));
4096                             t2 = expmed_mult_highpart (compute_mode, t1,
4097                                                        GEN_INT (ml),
4098                                                        NULL_RTX, 1,
4099                                                        max_cost - extra_cost);
4100                             if (t2 == 0)
4101                               goto fail1;
4102                             quotient = expand_shift
4103                               (RSHIFT_EXPR, compute_mode, t2,
4104                                post_shift, tquotient, 1);
4105                           }
4106                       }
4107                   }
4108                 else            /* Too wide mode to use tricky code */
4109                   break;
4110
4111                 insn = get_last_insn ();
4112                 if (insn != last)
4113                   set_dst_reg_note (insn, REG_EQUAL,
4114                                     gen_rtx_UDIV (compute_mode, op0, op1),
4115                                     quotient);
4116               }
4117             else                /* TRUNC_DIV, signed */
4118               {
4119                 unsigned HOST_WIDE_INT ml;
4120                 int lgup, post_shift;
4121                 rtx mlr;
4122                 HOST_WIDE_INT d = INTVAL (op1);
4123                 unsigned HOST_WIDE_INT abs_d;
4124
4125                 /* Since d might be INT_MIN, we have to cast to
4126                    unsigned HOST_WIDE_INT before negating to avoid
4127                    undefined signed overflow.  */
4128                 abs_d = (d >= 0
4129                          ? (unsigned HOST_WIDE_INT) d
4130                          : - (unsigned HOST_WIDE_INT) d);
4131
4132                 /* n rem d = n rem -d */
4133                 if (rem_flag && d < 0)
4134                   {
4135                     d = abs_d;
4136                     op1 = gen_int_mode (abs_d, compute_mode);
4137                   }
4138
4139                 if (d == 1)
4140                   quotient = op0;
4141                 else if (d == -1)
4142                   quotient = expand_unop (compute_mode, neg_optab, op0,
4143                                           tquotient, 0);
4144                 else if (HOST_BITS_PER_WIDE_INT >= size
4145                          && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4146                   {
4147                     /* This case is not handled correctly below.  */
4148                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4149                                                 compute_mode, 1, 1);
4150                     if (quotient == 0)
4151                       goto fail1;
4152                   }
4153                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4154                          && (rem_flag
4155                              ? smod_pow2_cheap (speed, compute_mode)
4156                              : sdiv_pow2_cheap (speed, compute_mode))
4157                          /* We assume that cheap metric is true if the
4158                             optab has an expander for this mode.  */
4159                          && ((optab_handler ((rem_flag ? smod_optab
4160                                               : sdiv_optab),
4161                                              compute_mode)
4162                               != CODE_FOR_nothing)
4163                              || (optab_handler (sdivmod_optab,
4164                                                 compute_mode)
4165                                  != CODE_FOR_nothing)))
4166                   ;
4167                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4168                   {
4169                     if (rem_flag)
4170                       {
4171                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4172                         if (remainder)
4173                           return gen_lowpart (mode, remainder);
4174                       }
4175
4176                     if (sdiv_pow2_cheap (speed, compute_mode)
4177                         && ((optab_handler (sdiv_optab, compute_mode)
4178                              != CODE_FOR_nothing)
4179                             || (optab_handler (sdivmod_optab, compute_mode)
4180                                 != CODE_FOR_nothing)))
4181                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4182                                                 compute_mode, op0,
4183                                                 gen_int_mode (abs_d,
4184                                                               compute_mode),
4185                                                 NULL_RTX, 0);
4186                     else
4187                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4188
4189                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4190                        negate the quotient.  */
4191                     if (d < 0)
4192                       {
4193                         insn = get_last_insn ();
4194                         if (insn != last
4195                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4196                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4197                           set_dst_reg_note (insn, REG_EQUAL,
4198                                             gen_rtx_DIV (compute_mode, op0,
4199                                                          gen_int_mode
4200                                                            (abs_d,
4201                                                             compute_mode)),
4202                                             quotient);
4203
4204                         quotient = expand_unop (compute_mode, neg_optab,
4205                                                 quotient, quotient, 0);
4206                       }
4207                   }
4208                 else if (size <= HOST_BITS_PER_WIDE_INT)
4209                   {
4210                     choose_multiplier (abs_d, size, size - 1,
4211                                        &ml, &post_shift, &lgup);
4212                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4213                       {
4214                         rtx t1, t2, t3;
4215
4216                         if (post_shift >= BITS_PER_WORD
4217                             || size - 1 >= BITS_PER_WORD)
4218                           goto fail1;
4219
4220                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4221                                       + shift_cost (speed, compute_mode, size - 1)
4222                                       + add_cost (speed, compute_mode));
4223                         t1 = expmed_mult_highpart (compute_mode, op0,
4224                                                    GEN_INT (ml), NULL_RTX, 0,
4225                                                    max_cost - extra_cost);
4226                         if (t1 == 0)
4227                           goto fail1;
4228                         t2 = expand_shift
4229                           (RSHIFT_EXPR, compute_mode, t1,
4230                            post_shift, NULL_RTX, 0);
4231                         t3 = expand_shift
4232                           (RSHIFT_EXPR, compute_mode, op0,
4233                            size - 1, NULL_RTX, 0);
4234                         if (d < 0)
4235                           quotient
4236                             = force_operand (gen_rtx_MINUS (compute_mode,
4237                                                             t3, t2),
4238                                              tquotient);
4239                         else
4240                           quotient
4241                             = force_operand (gen_rtx_MINUS (compute_mode,
4242                                                             t2, t3),
4243                                              tquotient);
4244                       }
4245                     else
4246                       {
4247                         rtx t1, t2, t3, t4;
4248
4249                         if (post_shift >= BITS_PER_WORD
4250                             || size - 1 >= BITS_PER_WORD)
4251                           goto fail1;
4252
4253                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4254                         mlr = gen_int_mode (ml, compute_mode);
4255                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4256                                       + shift_cost (speed, compute_mode, size - 1)
4257                                       + 2 * add_cost (speed, compute_mode));
4258                         t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4259                                                    NULL_RTX, 0,
4260                                                    max_cost - extra_cost);
4261                         if (t1 == 0)
4262                           goto fail1;
4263                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4264                                                           t1, op0),
4265                                             NULL_RTX);
4266                         t3 = expand_shift
4267                           (RSHIFT_EXPR, compute_mode, t2,
4268                            post_shift, NULL_RTX, 0);
4269                         t4 = expand_shift
4270                           (RSHIFT_EXPR, compute_mode, op0,
4271                            size - 1, NULL_RTX, 0);
4272                         if (d < 0)
4273                           quotient
4274                             = force_operand (gen_rtx_MINUS (compute_mode,
4275                                                             t4, t3),
4276                                              tquotient);
4277                         else
4278                           quotient
4279                             = force_operand (gen_rtx_MINUS (compute_mode,
4280                                                             t3, t4),
4281                                              tquotient);
4282                       }
4283                   }
4284                 else            /* Too wide mode to use tricky code */
4285                   break;
4286
4287                 insn = get_last_insn ();
4288                 if (insn != last)
4289                   set_dst_reg_note (insn, REG_EQUAL,
4290                                     gen_rtx_DIV (compute_mode, op0, op1),
4291                                     quotient);
4292               }
4293             break;
4294           }
4295       fail1:
4296         delete_insns_since (last);
4297         break;
4298
4299       case FLOOR_DIV_EXPR:
4300       case FLOOR_MOD_EXPR:
4301       /* We will come here only for signed operations.  */
4302         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4303           {
4304             unsigned HOST_WIDE_INT mh, ml;
4305             int pre_shift, lgup, post_shift;
4306             HOST_WIDE_INT d = INTVAL (op1);
4307
4308             if (d > 0)
4309               {
4310                 /* We could just as easily deal with negative constants here,
4311                    but it does not seem worth the trouble for GCC 2.6.  */
4312                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4313                   {
4314                     pre_shift = floor_log2 (d);
4315                     if (rem_flag)
4316                       {
4317                         remainder = expand_binop (compute_mode, and_optab, op0,
4318                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4319                                                   remainder, 0, OPTAB_LIB_WIDEN);
4320                         if (remainder)
4321                           return gen_lowpart (mode, remainder);
4322                       }
4323                     quotient = expand_shift
4324                       (RSHIFT_EXPR, compute_mode, op0,
4325                        pre_shift, tquotient, 0);
4326                   }
4327                 else
4328                   {
4329                     rtx t1, t2, t3, t4;
4330
4331                     mh = choose_multiplier (d, size, size - 1,
4332                                             &ml, &post_shift, &lgup);
4333                     gcc_assert (!mh);
4334
4335                     if (post_shift < BITS_PER_WORD
4336                         && size - 1 < BITS_PER_WORD)
4337                       {
4338                         t1 = expand_shift
4339                           (RSHIFT_EXPR, compute_mode, op0,
4340                            size - 1, NULL_RTX, 0);
4341                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4342                                            NULL_RTX, 0, OPTAB_WIDEN);
4343                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4344                                       + shift_cost (speed, compute_mode, size - 1)
4345                                       + 2 * add_cost (speed, compute_mode));
4346                         t3 = expmed_mult_highpart (compute_mode, t2,
4347                                                    GEN_INT (ml), NULL_RTX, 1,
4348                                                    max_cost - extra_cost);
4349                         if (t3 != 0)
4350                           {
4351                             t4 = expand_shift
4352                               (RSHIFT_EXPR, compute_mode, t3,
4353                                post_shift, NULL_RTX, 1);
4354                             quotient = expand_binop (compute_mode, xor_optab,
4355                                                      t4, t1, tquotient, 0,
4356                                                      OPTAB_WIDEN);
4357                           }
4358                       }
4359                   }
4360               }
4361             else
4362               {
4363                 rtx nsign, t1, t2, t3, t4;
4364                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4365                                                   op0, constm1_rtx), NULL_RTX);
4366                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4367                                    0, OPTAB_WIDEN);
4368                 nsign = expand_shift
4369                   (RSHIFT_EXPR, compute_mode, t2,
4370                    size - 1, NULL_RTX, 0);
4371                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4372                                     NULL_RTX);
4373                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4374                                     NULL_RTX, 0);
4375                 if (t4)
4376                   {
4377                     rtx t5;
4378                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4379                                       NULL_RTX, 0);
4380                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4381                                                             t4, t5),
4382                                               tquotient);
4383                   }
4384               }
4385           }
4386
4387         if (quotient != 0)
4388           break;
4389         delete_insns_since (last);
4390
4391         /* Try using an instruction that produces both the quotient and
4392            remainder, using truncation.  We can easily compensate the quotient
4393            or remainder to get floor rounding, once we have the remainder.
4394            Notice that we compute also the final remainder value here,
4395            and return the result right away.  */
4396         if (target == 0 || GET_MODE (target) != compute_mode)
4397           target = gen_reg_rtx (compute_mode);
4398
4399         if (rem_flag)
4400           {
4401             remainder
4402               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4403             quotient = gen_reg_rtx (compute_mode);
4404           }
4405         else
4406           {
4407             quotient
4408               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4409             remainder = gen_reg_rtx (compute_mode);
4410           }
4411
4412         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4413                                  quotient, remainder, 0))
4414           {
4415             /* This could be computed with a branch-less sequence.
4416                Save that for later.  */
4417             rtx tem;
4418             rtx label = gen_label_rtx ();
4419             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4420             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4421                                 NULL_RTX, 0, OPTAB_WIDEN);
4422             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4423             expand_dec (quotient, const1_rtx);
4424             expand_inc (remainder, op1);
4425             emit_label (label);
4426             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4427           }
4428
4429         /* No luck with division elimination or divmod.  Have to do it
4430            by conditionally adjusting op0 *and* the result.  */
4431         {
4432           rtx label1, label2, label3, label4, label5;
4433           rtx adjusted_op0;
4434           rtx tem;
4435
4436           quotient = gen_reg_rtx (compute_mode);
4437           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4438           label1 = gen_label_rtx ();
4439           label2 = gen_label_rtx ();
4440           label3 = gen_label_rtx ();
4441           label4 = gen_label_rtx ();
4442           label5 = gen_label_rtx ();
4443           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4444           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4445           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4446                               quotient, 0, OPTAB_LIB_WIDEN);
4447           if (tem != quotient)
4448             emit_move_insn (quotient, tem);
4449           emit_jump_insn (gen_jump (label5));
4450           emit_barrier ();
4451           emit_label (label1);
4452           expand_inc (adjusted_op0, const1_rtx);
4453           emit_jump_insn (gen_jump (label4));
4454           emit_barrier ();
4455           emit_label (label2);
4456           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4457           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4458                               quotient, 0, OPTAB_LIB_WIDEN);
4459           if (tem != quotient)
4460             emit_move_insn (quotient, tem);
4461           emit_jump_insn (gen_jump (label5));
4462           emit_barrier ();
4463           emit_label (label3);
4464           expand_dec (adjusted_op0, const1_rtx);
4465           emit_label (label4);
4466           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4467                               quotient, 0, OPTAB_LIB_WIDEN);
4468           if (tem != quotient)
4469             emit_move_insn (quotient, tem);
4470           expand_dec (quotient, const1_rtx);
4471           emit_label (label5);
4472         }
4473         break;
4474
4475       case CEIL_DIV_EXPR:
4476       case CEIL_MOD_EXPR:
4477         if (unsignedp)
4478           {
4479             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4480               {
4481                 rtx t1, t2, t3;
4482                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4483                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4484                                    floor_log2 (d), tquotient, 1);
4485                 t2 = expand_binop (compute_mode, and_optab, op0,
4486                                    GEN_INT (d - 1),
4487                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4488                 t3 = gen_reg_rtx (compute_mode);
4489                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4490                                       compute_mode, 1, 1);
4491                 if (t3 == 0)
4492                   {
4493                     rtx lab;
4494                     lab = gen_label_rtx ();
4495                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4496                     expand_inc (t1, const1_rtx);
4497                     emit_label (lab);
4498                     quotient = t1;
4499                   }
4500                 else
4501                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4502                                                           t1, t3),
4503                                             tquotient);
4504                 break;
4505               }
4506
4507             /* Try using an instruction that produces both the quotient and
4508                remainder, using truncation.  We can easily compensate the
4509                quotient or remainder to get ceiling rounding, once we have the
4510                remainder.  Notice that we compute also the final remainder
4511                value here, and return the result right away.  */
4512             if (target == 0 || GET_MODE (target) != compute_mode)
4513               target = gen_reg_rtx (compute_mode);
4514
4515             if (rem_flag)
4516               {
4517                 remainder = (REG_P (target)
4518                              ? target : gen_reg_rtx (compute_mode));
4519                 quotient = gen_reg_rtx (compute_mode);
4520               }
4521             else
4522               {
4523                 quotient = (REG_P (target)
4524                             ? target : gen_reg_rtx (compute_mode));
4525                 remainder = gen_reg_rtx (compute_mode);
4526               }
4527
4528             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4529                                      remainder, 1))
4530               {
4531                 /* This could be computed with a branch-less sequence.
4532                    Save that for later.  */
4533                 rtx label = gen_label_rtx ();
4534                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4535                                  compute_mode, label);
4536                 expand_inc (quotient, const1_rtx);
4537                 expand_dec (remainder, op1);
4538                 emit_label (label);
4539                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4540               }
4541
4542             /* No luck with division elimination or divmod.  Have to do it
4543                by conditionally adjusting op0 *and* the result.  */
4544             {
4545               rtx label1, label2;
4546               rtx adjusted_op0, tem;
4547
4548               quotient = gen_reg_rtx (compute_mode);
4549               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4550               label1 = gen_label_rtx ();
4551               label2 = gen_label_rtx ();
4552               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4553                                compute_mode, label1);
4554               emit_move_insn  (quotient, const0_rtx);
4555               emit_jump_insn (gen_jump (label2));
4556               emit_barrier ();
4557               emit_label (label1);
4558               expand_dec (adjusted_op0, const1_rtx);
4559               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4560                                   quotient, 1, OPTAB_LIB_WIDEN);
4561               if (tem != quotient)
4562                 emit_move_insn (quotient, tem);
4563               expand_inc (quotient, const1_rtx);
4564               emit_label (label2);
4565             }
4566           }
4567         else /* signed */
4568           {
4569             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4570                 && INTVAL (op1) >= 0)
4571               {
4572                 /* This is extremely similar to the code for the unsigned case
4573                    above.  For 2.7 we should merge these variants, but for
4574                    2.6.1 I don't want to touch the code for unsigned since that
4575                    get used in C.  The signed case will only be used by other
4576                    languages (Ada).  */
4577
4578                 rtx t1, t2, t3;
4579                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4580                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4581                                    floor_log2 (d), tquotient, 0);
4582                 t2 = expand_binop (compute_mode, and_optab, op0,
4583                                    GEN_INT (d - 1),
4584                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4585                 t3 = gen_reg_rtx (compute_mode);
4586                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4587                                       compute_mode, 1, 1);
4588                 if (t3 == 0)
4589                   {
4590                     rtx lab;
4591                     lab = gen_label_rtx ();
4592                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4593                     expand_inc (t1, const1_rtx);
4594                     emit_label (lab);
4595                     quotient = t1;
4596                   }
4597                 else
4598                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4599                                                           t1, t3),
4600                                             tquotient);
4601                 break;
4602               }
4603
4604             /* Try using an instruction that produces both the quotient and
4605                remainder, using truncation.  We can easily compensate the
4606                quotient or remainder to get ceiling rounding, once we have the
4607                remainder.  Notice that we compute also the final remainder
4608                value here, and return the result right away.  */
4609             if (target == 0 || GET_MODE (target) != compute_mode)
4610               target = gen_reg_rtx (compute_mode);
4611             if (rem_flag)
4612               {
4613                 remainder= (REG_P (target)
4614                             ? target : gen_reg_rtx (compute_mode));
4615                 quotient = gen_reg_rtx (compute_mode);
4616               }
4617             else
4618               {
4619                 quotient = (REG_P (target)
4620                             ? target : gen_reg_rtx (compute_mode));
4621                 remainder = gen_reg_rtx (compute_mode);
4622               }
4623
4624             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4625                                      remainder, 0))
4626               {
4627                 /* This could be computed with a branch-less sequence.
4628                    Save that for later.  */
4629                 rtx tem;
4630                 rtx label = gen_label_rtx ();
4631                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4632                                  compute_mode, label);
4633                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4634                                     NULL_RTX, 0, OPTAB_WIDEN);
4635                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4636                 expand_inc (quotient, const1_rtx);
4637                 expand_dec (remainder, op1);
4638                 emit_label (label);
4639                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4640               }
4641
4642             /* No luck with division elimination or divmod.  Have to do it
4643                by conditionally adjusting op0 *and* the result.  */
4644             {
4645               rtx label1, label2, label3, label4, label5;
4646               rtx adjusted_op0;
4647               rtx tem;
4648
4649               quotient = gen_reg_rtx (compute_mode);
4650               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4651               label1 = gen_label_rtx ();
4652               label2 = gen_label_rtx ();
4653               label3 = gen_label_rtx ();
4654               label4 = gen_label_rtx ();
4655               label5 = gen_label_rtx ();
4656               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4657               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4658                                compute_mode, label1);
4659               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4660                                   quotient, 0, OPTAB_LIB_WIDEN);
4661               if (tem != quotient)
4662                 emit_move_insn (quotient, tem);
4663               emit_jump_insn (gen_jump (label5));
4664               emit_barrier ();
4665               emit_label (label1);
4666               expand_dec (adjusted_op0, const1_rtx);
4667               emit_jump_insn (gen_jump (label4));
4668               emit_barrier ();
4669               emit_label (label2);
4670               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4671                                compute_mode, label3);
4672               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4673                                   quotient, 0, OPTAB_LIB_WIDEN);
4674               if (tem != quotient)
4675                 emit_move_insn (quotient, tem);
4676               emit_jump_insn (gen_jump (label5));
4677               emit_barrier ();
4678               emit_label (label3);
4679               expand_inc (adjusted_op0, const1_rtx);
4680               emit_label (label4);
4681               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4682                                   quotient, 0, OPTAB_LIB_WIDEN);
4683               if (tem != quotient)
4684                 emit_move_insn (quotient, tem);
4685               expand_inc (quotient, const1_rtx);
4686               emit_label (label5);
4687             }
4688           }
4689         break;
4690
4691       case EXACT_DIV_EXPR:
4692         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4693           {
4694             HOST_WIDE_INT d = INTVAL (op1);
4695             unsigned HOST_WIDE_INT ml;
4696             int pre_shift;
4697             rtx t1;
4698
4699             pre_shift = floor_log2 (d & -d);
4700             ml = invert_mod2n (d >> pre_shift, size);
4701             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4702                                pre_shift, NULL_RTX, unsignedp);
4703             quotient = expand_mult (compute_mode, t1,
4704                                     gen_int_mode (ml, compute_mode),
4705                                     NULL_RTX, 1);
4706
4707             insn = get_last_insn ();
4708             set_dst_reg_note (insn, REG_EQUAL,
4709                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4710                                               compute_mode, op0, op1),
4711                               quotient);
4712           }
4713         break;
4714
4715       case ROUND_DIV_EXPR:
4716       case ROUND_MOD_EXPR:
4717         if (unsignedp)
4718           {
4719             rtx tem;
4720             rtx label;
4721             label = gen_label_rtx ();
4722             quotient = gen_reg_rtx (compute_mode);
4723             remainder = gen_reg_rtx (compute_mode);
4724             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4725               {
4726                 rtx tem;
4727                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4728                                          quotient, 1, OPTAB_LIB_WIDEN);
4729                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4730                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4731                                           remainder, 1, OPTAB_LIB_WIDEN);
4732               }
4733             tem = plus_constant (compute_mode, op1, -1);
4734             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4735             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4736             expand_inc (quotient, const1_rtx);
4737             expand_dec (remainder, op1);
4738             emit_label (label);
4739           }
4740         else
4741           {
4742             rtx abs_rem, abs_op1, tem, mask;
4743             rtx label;
4744             label = gen_label_rtx ();
4745             quotient = gen_reg_rtx (compute_mode);
4746             remainder = gen_reg_rtx (compute_mode);
4747             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4748               {
4749                 rtx tem;
4750                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4751                                          quotient, 0, OPTAB_LIB_WIDEN);
4752                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4753                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4754                                           remainder, 0, OPTAB_LIB_WIDEN);
4755               }
4756             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4757             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4758             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4759                                 1, NULL_RTX, 1);
4760             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4761             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4762                                 NULL_RTX, 0, OPTAB_WIDEN);
4763             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4764                                  size - 1, NULL_RTX, 0);
4765             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4766                                 NULL_RTX, 0, OPTAB_WIDEN);
4767             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4768                                 NULL_RTX, 0, OPTAB_WIDEN);
4769             expand_inc (quotient, tem);
4770             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4771                                 NULL_RTX, 0, OPTAB_WIDEN);
4772             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4773                                 NULL_RTX, 0, OPTAB_WIDEN);
4774             expand_dec (remainder, tem);
4775             emit_label (label);
4776           }
4777         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4778
4779       default:
4780         gcc_unreachable ();
4781       }
4782
4783   if (quotient == 0)
4784     {
4785       if (target && GET_MODE (target) != compute_mode)
4786         target = 0;
4787
4788       if (rem_flag)
4789         {
4790           /* Try to produce the remainder without producing the quotient.
4791              If we seem to have a divmod pattern that does not require widening,
4792              don't try widening here.  We should really have a WIDEN argument
4793              to expand_twoval_binop, since what we'd really like to do here is
4794              1) try a mod insn in compute_mode
4795              2) try a divmod insn in compute_mode
4796              3) try a div insn in compute_mode and multiply-subtract to get
4797                 remainder
4798              4) try the same things with widening allowed.  */
4799           remainder
4800             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4801                                  op0, op1, target,
4802                                  unsignedp,
4803                                  ((optab_handler (optab2, compute_mode)
4804                                    != CODE_FOR_nothing)
4805                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4806           if (remainder == 0)
4807             {
4808               /* No luck there.  Can we do remainder and divide at once
4809                  without a library call?  */
4810               remainder = gen_reg_rtx (compute_mode);
4811               if (! expand_twoval_binop ((unsignedp
4812                                           ? udivmod_optab
4813                                           : sdivmod_optab),
4814                                          op0, op1,
4815                                          NULL_RTX, remainder, unsignedp))
4816                 remainder = 0;
4817             }
4818
4819           if (remainder)
4820             return gen_lowpart (mode, remainder);
4821         }
4822
4823       /* Produce the quotient.  Try a quotient insn, but not a library call.
4824          If we have a divmod in this mode, use it in preference to widening
4825          the div (for this test we assume it will not fail). Note that optab2
4826          is set to the one of the two optabs that the call below will use.  */
4827       quotient
4828         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4829                              op0, op1, rem_flag ? NULL_RTX : target,
4830                              unsignedp,
4831                              ((optab_handler (optab2, compute_mode)
4832                                != CODE_FOR_nothing)
4833                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4834
4835       if (quotient == 0)
4836         {
4837           /* No luck there.  Try a quotient-and-remainder insn,
4838              keeping the quotient alone.  */
4839           quotient = gen_reg_rtx (compute_mode);
4840           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4841                                      op0, op1,
4842                                      quotient, NULL_RTX, unsignedp))
4843             {
4844               quotient = 0;
4845               if (! rem_flag)
4846                 /* Still no luck.  If we are not computing the remainder,
4847                    use a library call for the quotient.  */
4848                 quotient = sign_expand_binop (compute_mode,
4849                                               udiv_optab, sdiv_optab,
4850                                               op0, op1, target,
4851                                               unsignedp, OPTAB_LIB_WIDEN);
4852             }
4853         }
4854     }
4855
4856   if (rem_flag)
4857     {
4858       if (target && GET_MODE (target) != compute_mode)
4859         target = 0;
4860
4861       if (quotient == 0)
4862         {
4863           /* No divide instruction either.  Use library for remainder.  */
4864           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4865                                          op0, op1, target,
4866                                          unsignedp, OPTAB_LIB_WIDEN);
4867           /* No remainder function.  Try a quotient-and-remainder
4868              function, keeping the remainder.  */
4869           if (!remainder)
4870             {
4871               remainder = gen_reg_rtx (compute_mode);
4872               if (!expand_twoval_binop_libfunc
4873                   (unsignedp ? udivmod_optab : sdivmod_optab,
4874                    op0, op1,
4875                    NULL_RTX, remainder,
4876                    unsignedp ? UMOD : MOD))
4877                 remainder = NULL_RTX;
4878             }
4879         }
4880       else
4881         {
4882           /* We divided.  Now finish doing X - Y * (X / Y).  */
4883           remainder = expand_mult (compute_mode, quotient, op1,
4884                                    NULL_RTX, unsignedp);
4885           remainder = expand_binop (compute_mode, sub_optab, op0,
4886                                     remainder, target, unsignedp,
4887                                     OPTAB_LIB_WIDEN);
4888         }
4889     }
4890
4891   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4892 }
4893 \f
4894 /* Return a tree node with data type TYPE, describing the value of X.
4895    Usually this is an VAR_DECL, if there is no obvious better choice.
4896    X may be an expression, however we only support those expressions
4897    generated by loop.c.  */
4898
4899 tree
4900 make_tree (tree type, rtx x)
4901 {
4902   tree t;
4903
4904   switch (GET_CODE (x))
4905     {
4906     case CONST_INT:
4907       {
4908         HOST_WIDE_INT hi = 0;
4909
4910         if (INTVAL (x) < 0
4911             && !(TYPE_UNSIGNED (type)
4912                  && (GET_MODE_BITSIZE (TYPE_MODE (type))
4913                      < HOST_BITS_PER_WIDE_INT)))
4914           hi = -1;
4915
4916         t = build_int_cst_wide (type, INTVAL (x), hi);
4917
4918         return t;
4919       }
4920
4921     case CONST_DOUBLE:
4922       if (GET_MODE (x) == VOIDmode)
4923         t = build_int_cst_wide (type,
4924                                 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4925       else
4926         {
4927           REAL_VALUE_TYPE d;
4928
4929           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4930           t = build_real (type, d);
4931         }
4932
4933       return t;
4934
4935     case CONST_VECTOR:
4936       {
4937         int units = CONST_VECTOR_NUNITS (x);
4938         tree itype = TREE_TYPE (type);
4939         tree *elts;
4940         int i;
4941
4942         /* Build a tree with vector elements.  */
4943         elts = XALLOCAVEC (tree, units);
4944         for (i = units - 1; i >= 0; --i)
4945           {
4946             rtx elt = CONST_VECTOR_ELT (x, i);
4947             elts[i] = make_tree (itype, elt);
4948           }
4949
4950         return build_vector (type, elts);
4951       }
4952
4953     case PLUS:
4954       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4955                           make_tree (type, XEXP (x, 1)));
4956
4957     case MINUS:
4958       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4959                           make_tree (type, XEXP (x, 1)));
4960
4961     case NEG:
4962       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
4963
4964     case MULT:
4965       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4966                           make_tree (type, XEXP (x, 1)));
4967
4968     case ASHIFT:
4969       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4970                           make_tree (type, XEXP (x, 1)));
4971
4972     case LSHIFTRT:
4973       t = unsigned_type_for (type);
4974       return fold_convert (type, build2 (RSHIFT_EXPR, t,
4975                                          make_tree (t, XEXP (x, 0)),
4976                                          make_tree (type, XEXP (x, 1))));
4977
4978     case ASHIFTRT:
4979       t = signed_type_for (type);
4980       return fold_convert (type, build2 (RSHIFT_EXPR, t,
4981                                          make_tree (t, XEXP (x, 0)),
4982                                          make_tree (type, XEXP (x, 1))));
4983
4984     case DIV:
4985       if (TREE_CODE (type) != REAL_TYPE)
4986         t = signed_type_for (type);
4987       else
4988         t = type;
4989
4990       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
4991                                          make_tree (t, XEXP (x, 0)),
4992                                          make_tree (t, XEXP (x, 1))));
4993     case UDIV:
4994       t = unsigned_type_for (type);
4995       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
4996                                          make_tree (t, XEXP (x, 0)),
4997                                          make_tree (t, XEXP (x, 1))));
4998
4999     case SIGN_EXTEND:
5000     case ZERO_EXTEND:
5001       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5002                                           GET_CODE (x) == ZERO_EXTEND);
5003       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5004
5005     case CONST:
5006       return make_tree (type, XEXP (x, 0));
5007
5008     case SYMBOL_REF:
5009       t = SYMBOL_REF_DECL (x);
5010       if (t)
5011         return fold_convert (type, build_fold_addr_expr (t));
5012       /* else fall through.  */
5013
5014     default:
5015       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5016
5017       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5018          address mode to pointer mode.  */
5019       if (POINTER_TYPE_P (type))
5020         x = convert_memory_address_addr_space
5021               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5022
5023       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5024          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5025       t->decl_with_rtl.rtl = x;
5026
5027       return t;
5028     }
5029 }
5030 \f
5031 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5032    and returning TARGET.
5033
5034    If TARGET is 0, a pseudo-register or constant is returned.  */
5035
5036 rtx
5037 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5038 {
5039   rtx tem = 0;
5040
5041   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5042     tem = simplify_binary_operation (AND, mode, op0, op1);
5043   if (tem == 0)
5044     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5045
5046   if (target == 0)
5047     target = tem;
5048   else if (tem != target)
5049     emit_move_insn (target, tem);
5050   return target;
5051 }
5052
5053 /* Helper function for emit_store_flag.  */
5054 static rtx
5055 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5056              enum machine_mode mode, enum machine_mode compare_mode,
5057              int unsignedp, rtx x, rtx y, int normalizep,
5058              enum machine_mode target_mode)
5059 {
5060   struct expand_operand ops[4];
5061   rtx op0, last, comparison, subtarget;
5062   enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode;
5063
5064   last = get_last_insn ();
5065   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5066   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5067   if (!x || !y)
5068     {
5069       delete_insns_since (last);
5070       return NULL_RTX;
5071     }
5072
5073   if (target_mode == VOIDmode)
5074     target_mode = result_mode;
5075   if (!target)
5076     target = gen_reg_rtx (target_mode);
5077
5078   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5079
5080   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5081   create_fixed_operand (&ops[1], comparison);
5082   create_fixed_operand (&ops[2], x);
5083   create_fixed_operand (&ops[3], y);
5084   if (!maybe_expand_insn (icode, 4, ops))
5085     {
5086       delete_insns_since (last);
5087       return NULL_RTX;
5088     }
5089   subtarget = ops[0].value;
5090
5091   /* If we are converting to a wider mode, first convert to
5092      TARGET_MODE, then normalize.  This produces better combining
5093      opportunities on machines that have a SIGN_EXTRACT when we are
5094      testing a single bit.  This mostly benefits the 68k.
5095
5096      If STORE_FLAG_VALUE does not have the sign bit set when
5097      interpreted in MODE, we can do this conversion as unsigned, which
5098      is usually more efficient.  */
5099   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5100     {
5101       convert_move (target, subtarget,
5102                     val_signbit_known_clear_p (result_mode,
5103                                                STORE_FLAG_VALUE));
5104       op0 = target;
5105       result_mode = target_mode;
5106     }
5107   else
5108     op0 = subtarget;
5109
5110   /* If we want to keep subexpressions around, don't reuse our last
5111      target.  */
5112   if (optimize)
5113     subtarget = 0;
5114
5115   /* Now normalize to the proper value in MODE.  Sometimes we don't
5116      have to do anything.  */
5117   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5118     ;
5119   /* STORE_FLAG_VALUE might be the most negative number, so write
5120      the comparison this way to avoid a compiler-time warning.  */
5121   else if (- normalizep == STORE_FLAG_VALUE)
5122     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5123
5124   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5125      it hard to use a value of just the sign bit due to ANSI integer
5126      constant typing rules.  */
5127   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5128     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5129                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5130                         normalizep == 1);
5131   else
5132     {
5133       gcc_assert (STORE_FLAG_VALUE & 1);
5134
5135       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5136       if (normalizep == -1)
5137         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5138     }
5139
5140   /* If we were converting to a smaller mode, do the conversion now.  */
5141   if (target_mode != result_mode)
5142     {
5143       convert_move (target, op0, 0);
5144       return target;
5145     }
5146   else
5147     return op0;
5148 }
5149
5150
5151 /* A subroutine of emit_store_flag only including "tricks" that do not
5152    need a recursive call.  These are kept separate to avoid infinite
5153    loops.  */
5154
5155 static rtx
5156 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5157                    enum machine_mode mode, int unsignedp, int normalizep,
5158                    enum machine_mode target_mode)
5159 {
5160   rtx subtarget;
5161   enum insn_code icode;
5162   enum machine_mode compare_mode;
5163   enum mode_class mclass;
5164   enum rtx_code scode;
5165   rtx tem;
5166
5167   if (unsignedp)
5168     code = unsigned_condition (code);
5169   scode = swap_condition (code);
5170
5171   /* If one operand is constant, make it the second one.  Only do this
5172      if the other operand is not constant as well.  */
5173
5174   if (swap_commutative_operands_p (op0, op1))
5175     {
5176       tem = op0;
5177       op0 = op1;
5178       op1 = tem;
5179       code = swap_condition (code);
5180     }
5181
5182   if (mode == VOIDmode)
5183     mode = GET_MODE (op0);
5184
5185   /* For some comparisons with 1 and -1, we can convert this to
5186      comparisons with zero.  This will often produce more opportunities for
5187      store-flag insns.  */
5188
5189   switch (code)
5190     {
5191     case LT:
5192       if (op1 == const1_rtx)
5193         op1 = const0_rtx, code = LE;
5194       break;
5195     case LE:
5196       if (op1 == constm1_rtx)
5197         op1 = const0_rtx, code = LT;
5198       break;
5199     case GE:
5200       if (op1 == const1_rtx)
5201         op1 = const0_rtx, code = GT;
5202       break;
5203     case GT:
5204       if (op1 == constm1_rtx)
5205         op1 = const0_rtx, code = GE;
5206       break;
5207     case GEU:
5208       if (op1 == const1_rtx)
5209         op1 = const0_rtx, code = NE;
5210       break;
5211     case LTU:
5212       if (op1 == const1_rtx)
5213         op1 = const0_rtx, code = EQ;
5214       break;
5215     default:
5216       break;
5217     }
5218
5219   /* If we are comparing a double-word integer with zero or -1, we can
5220      convert the comparison into one involving a single word.  */
5221   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5222       && GET_MODE_CLASS (mode) == MODE_INT
5223       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5224     {
5225       if ((code == EQ || code == NE)
5226           && (op1 == const0_rtx || op1 == constm1_rtx))
5227         {
5228           rtx op00, op01;
5229
5230           /* Do a logical OR or AND of the two words and compare the
5231              result.  */
5232           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5233           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5234           tem = expand_binop (word_mode,
5235                               op1 == const0_rtx ? ior_optab : and_optab,
5236                               op00, op01, NULL_RTX, unsignedp,
5237                               OPTAB_DIRECT);
5238
5239           if (tem != 0)
5240             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5241                                    unsignedp, normalizep);
5242         }
5243       else if ((code == LT || code == GE) && op1 == const0_rtx)
5244         {
5245           rtx op0h;
5246
5247           /* If testing the sign bit, can just test on high word.  */
5248           op0h = simplify_gen_subreg (word_mode, op0, mode,
5249                                       subreg_highpart_offset (word_mode,
5250                                                               mode));
5251           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5252                                  unsignedp, normalizep);
5253         }
5254       else
5255         tem = NULL_RTX;
5256
5257       if (tem)
5258         {
5259           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5260             return tem;
5261           if (!target)
5262             target = gen_reg_rtx (target_mode);
5263
5264           convert_move (target, tem,
5265                         !val_signbit_known_set_p (word_mode,
5266                                                   (normalizep ? normalizep
5267                                                    : STORE_FLAG_VALUE)));
5268           return target;
5269         }
5270     }
5271
5272   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5273      complement of A (for GE) and shifting the sign bit to the low bit.  */
5274   if (op1 == const0_rtx && (code == LT || code == GE)
5275       && GET_MODE_CLASS (mode) == MODE_INT
5276       && (normalizep || STORE_FLAG_VALUE == 1
5277           || val_signbit_p (mode, STORE_FLAG_VALUE)))
5278     {
5279       subtarget = target;
5280
5281       if (!target)
5282         target_mode = mode;
5283
5284       /* If the result is to be wider than OP0, it is best to convert it
5285          first.  If it is to be narrower, it is *incorrect* to convert it
5286          first.  */
5287       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5288         {
5289           op0 = convert_modes (target_mode, mode, op0, 0);
5290           mode = target_mode;
5291         }
5292
5293       if (target_mode != mode)
5294         subtarget = 0;
5295
5296       if (code == GE)
5297         op0 = expand_unop (mode, one_cmpl_optab, op0,
5298                            ((STORE_FLAG_VALUE == 1 || normalizep)
5299                             ? 0 : subtarget), 0);
5300
5301       if (STORE_FLAG_VALUE == 1 || normalizep)
5302         /* If we are supposed to produce a 0/1 value, we want to do
5303            a logical shift from the sign bit to the low-order bit; for
5304            a -1/0 value, we do an arithmetic shift.  */
5305         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5306                             GET_MODE_BITSIZE (mode) - 1,
5307                             subtarget, normalizep != -1);
5308
5309       if (mode != target_mode)
5310         op0 = convert_modes (target_mode, mode, op0, 0);
5311
5312       return op0;
5313     }
5314
5315   mclass = GET_MODE_CLASS (mode);
5316   for (compare_mode = mode; compare_mode != VOIDmode;
5317        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5318     {
5319      enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5320      icode = optab_handler (cstore_optab, optab_mode);
5321      if (icode != CODE_FOR_nothing)
5322         {
5323           do_pending_stack_adjust ();
5324           tem = emit_cstore (target, icode, code, mode, compare_mode,
5325                              unsignedp, op0, op1, normalizep, target_mode);
5326           if (tem)
5327             return tem;
5328
5329           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5330             {
5331               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5332                                  unsignedp, op1, op0, normalizep, target_mode);
5333               if (tem)
5334                 return tem;
5335             }
5336           break;
5337         }
5338     }
5339
5340   return 0;
5341 }
5342
5343 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5344    and storing in TARGET.  Normally return TARGET.
5345    Return 0 if that cannot be done.
5346
5347    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5348    it is VOIDmode, they cannot both be CONST_INT.
5349
5350    UNSIGNEDP is for the case where we have to widen the operands
5351    to perform the operation.  It says to use zero-extension.
5352
5353    NORMALIZEP is 1 if we should convert the result to be either zero
5354    or one.  Normalize is -1 if we should convert the result to be
5355    either zero or -1.  If NORMALIZEP is zero, the result will be left
5356    "raw" out of the scc insn.  */
5357
5358 rtx
5359 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5360                  enum machine_mode mode, int unsignedp, int normalizep)
5361 {
5362   enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5363   enum rtx_code rcode;
5364   rtx subtarget;
5365   rtx tem, last, trueval;
5366
5367   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5368                            target_mode);
5369   if (tem)
5370     return tem;
5371
5372   /* If we reached here, we can't do this with a scc insn, however there
5373      are some comparisons that can be done in other ways.  Don't do any
5374      of these cases if branches are very cheap.  */
5375   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5376     return 0;
5377
5378   /* See what we need to return.  We can only return a 1, -1, or the
5379      sign bit.  */
5380
5381   if (normalizep == 0)
5382     {
5383       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5384         normalizep = STORE_FLAG_VALUE;
5385
5386       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5387         ;
5388       else
5389         return 0;
5390     }
5391
5392   last = get_last_insn ();
5393
5394   /* If optimizing, use different pseudo registers for each insn, instead
5395      of reusing the same pseudo.  This leads to better CSE, but slows
5396      down the compiler, since there are more pseudos */
5397   subtarget = (!optimize
5398                && (target_mode == mode)) ? target : NULL_RTX;
5399   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5400
5401   /* For floating-point comparisons, try the reverse comparison or try
5402      changing the "orderedness" of the comparison.  */
5403   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5404     {
5405       enum rtx_code first_code;
5406       bool and_them;
5407
5408       rcode = reverse_condition_maybe_unordered (code);
5409       if (can_compare_p (rcode, mode, ccp_store_flag)
5410           && (code == ORDERED || code == UNORDERED
5411               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5412               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5413         {
5414           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5415                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5416
5417           /* For the reverse comparison, use either an addition or a XOR.  */
5418           if (want_add
5419               && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5420                            optimize_insn_for_speed_p ()) == 0)
5421             {
5422               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5423                                        STORE_FLAG_VALUE, target_mode);
5424               if (tem)
5425                 return expand_binop (target_mode, add_optab, tem,
5426                                      GEN_INT (normalizep),
5427                                      target, 0, OPTAB_WIDEN);
5428             }
5429           else if (!want_add
5430                    && rtx_cost (trueval, XOR, 1,
5431                                 optimize_insn_for_speed_p ()) == 0)
5432             {
5433               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5434                                        normalizep, target_mode);
5435               if (tem)
5436                 return expand_binop (target_mode, xor_optab, tem, trueval,
5437                                      target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5438             }
5439         }
5440
5441       delete_insns_since (last);
5442
5443       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5444       if (code == ORDERED || code == UNORDERED)
5445         return 0;
5446
5447       and_them = split_comparison (code, mode, &first_code, &code);
5448
5449       /* If there are no NaNs, the first comparison should always fall through.
5450          Effectively change the comparison to the other one.  */
5451       if (!HONOR_NANS (mode))
5452         {
5453           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5454           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5455                                     target_mode);
5456         }
5457
5458 #ifdef HAVE_conditional_move
5459       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5460          conditional move.  */
5461       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5462                                normalizep, target_mode);
5463       if (tem == 0)
5464         return 0;
5465
5466       if (and_them)
5467         tem = emit_conditional_move (target, code, op0, op1, mode,
5468                                      tem, const0_rtx, GET_MODE (tem), 0);
5469       else
5470         tem = emit_conditional_move (target, code, op0, op1, mode,
5471                                      trueval, tem, GET_MODE (tem), 0);
5472
5473       if (tem == 0)
5474         delete_insns_since (last);
5475       return tem;
5476 #else
5477       return 0;
5478 #endif
5479     }
5480
5481   /* The remaining tricks only apply to integer comparisons.  */
5482
5483   if (GET_MODE_CLASS (mode) != MODE_INT)
5484     return 0;
5485
5486   /* If this is an equality comparison of integers, we can try to exclusive-or
5487      (or subtract) the two operands and use a recursive call to try the
5488      comparison with zero.  Don't do any of these cases if branches are
5489      very cheap.  */
5490
5491   if ((code == EQ || code == NE) && op1 != const0_rtx)
5492     {
5493       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5494                           OPTAB_WIDEN);
5495
5496       if (tem == 0)
5497         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5498                             OPTAB_WIDEN);
5499       if (tem != 0)
5500         tem = emit_store_flag (target, code, tem, const0_rtx,
5501                                mode, unsignedp, normalizep);
5502       if (tem != 0)
5503         return tem;
5504
5505       delete_insns_since (last);
5506     }
5507
5508   /* For integer comparisons, try the reverse comparison.  However, for
5509      small X and if we'd have anyway to extend, implementing "X != 0"
5510      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5511   rcode = reverse_condition (code);
5512   if (can_compare_p (rcode, mode, ccp_store_flag)
5513       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5514             && code == NE
5515             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5516             && op1 == const0_rtx))
5517     {
5518       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5519                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5520
5521       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5522       if (want_add
5523           && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5524                        optimize_insn_for_speed_p ()) == 0)
5525         {
5526           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5527                                    STORE_FLAG_VALUE, target_mode);
5528           if (tem != 0)
5529             tem = expand_binop (target_mode, add_optab, tem,
5530                                 GEN_INT (normalizep), target, 0, OPTAB_WIDEN);
5531         }
5532       else if (!want_add
5533                && rtx_cost (trueval, XOR, 1,
5534                             optimize_insn_for_speed_p ()) == 0)
5535         {
5536           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5537                                    normalizep, target_mode);
5538           if (tem != 0)
5539             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5540                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5541         }
5542
5543       if (tem != 0)
5544         return tem;
5545       delete_insns_since (last);
5546     }
5547
5548   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5549      the constant zero.  Reject all other comparisons at this point.  Only
5550      do LE and GT if branches are expensive since they are expensive on
5551      2-operand machines.  */
5552
5553   if (op1 != const0_rtx
5554       || (code != EQ && code != NE
5555           && (BRANCH_COST (optimize_insn_for_speed_p (),
5556                            false) <= 1 || (code != LE && code != GT))))
5557     return 0;
5558
5559   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5560      do the necessary operation below.  */
5561
5562   tem = 0;
5563
5564   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5565      the sign bit set.  */
5566
5567   if (code == LE)
5568     {
5569       /* This is destructive, so SUBTARGET can't be OP0.  */
5570       if (rtx_equal_p (subtarget, op0))
5571         subtarget = 0;
5572
5573       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5574                           OPTAB_WIDEN);
5575       if (tem)
5576         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5577                             OPTAB_WIDEN);
5578     }
5579
5580   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5581      number of bits in the mode of OP0, minus one.  */
5582
5583   if (code == GT)
5584     {
5585       if (rtx_equal_p (subtarget, op0))
5586         subtarget = 0;
5587
5588       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5589                           GET_MODE_BITSIZE (mode) - 1,
5590                           subtarget, 0);
5591       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5592                           OPTAB_WIDEN);
5593     }
5594
5595   if (code == EQ || code == NE)
5596     {
5597       /* For EQ or NE, one way to do the comparison is to apply an operation
5598          that converts the operand into a positive number if it is nonzero
5599          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5600          for NE we negate.  This puts the result in the sign bit.  Then we
5601          normalize with a shift, if needed.
5602
5603          Two operations that can do the above actions are ABS and FFS, so try
5604          them.  If that doesn't work, and MODE is smaller than a full word,
5605          we can use zero-extension to the wider mode (an unsigned conversion)
5606          as the operation.  */
5607
5608       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5609          that is compensated by the subsequent overflow when subtracting
5610          one / negating.  */
5611
5612       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5613         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5614       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5615         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5616       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5617         {
5618           tem = convert_modes (word_mode, mode, op0, 1);
5619           mode = word_mode;
5620         }
5621
5622       if (tem != 0)
5623         {
5624           if (code == EQ)
5625             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5626                                 0, OPTAB_WIDEN);
5627           else
5628             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5629         }
5630
5631       /* If we couldn't do it that way, for NE we can "or" the two's complement
5632          of the value with itself.  For EQ, we take the one's complement of
5633          that "or", which is an extra insn, so we only handle EQ if branches
5634          are expensive.  */
5635
5636       if (tem == 0
5637           && (code == NE
5638               || BRANCH_COST (optimize_insn_for_speed_p (),
5639                               false) > 1))
5640         {
5641           if (rtx_equal_p (subtarget, op0))
5642             subtarget = 0;
5643
5644           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5645           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5646                               OPTAB_WIDEN);
5647
5648           if (tem && code == EQ)
5649             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5650         }
5651     }
5652
5653   if (tem && normalizep)
5654     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5655                         GET_MODE_BITSIZE (mode) - 1,
5656                         subtarget, normalizep == 1);
5657
5658   if (tem)
5659     {
5660       if (!target)
5661         ;
5662       else if (GET_MODE (tem) != target_mode)
5663         {
5664           convert_move (target, tem, 0);
5665           tem = target;
5666         }
5667       else if (!subtarget)
5668         {
5669           emit_move_insn (target, tem);
5670           tem = target;
5671         }
5672     }
5673   else
5674     delete_insns_since (last);
5675
5676   return tem;
5677 }
5678
5679 /* Like emit_store_flag, but always succeeds.  */
5680
5681 rtx
5682 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5683                        enum machine_mode mode, int unsignedp, int normalizep)
5684 {
5685   rtx tem, label;
5686   rtx trueval, falseval;
5687
5688   /* First see if emit_store_flag can do the job.  */
5689   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5690   if (tem != 0)
5691     return tem;
5692
5693   if (!target)
5694     target = gen_reg_rtx (word_mode);
5695
5696   /* If this failed, we have to do this with set/compare/jump/set code.
5697      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5698   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5699   if (code == NE
5700       && GET_MODE_CLASS (mode) == MODE_INT
5701       && REG_P (target)
5702       && op0 == target
5703       && op1 == const0_rtx)
5704     {
5705       label = gen_label_rtx ();
5706       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5707                                mode, NULL_RTX, NULL_RTX, label, -1);
5708       emit_move_insn (target, trueval);
5709       emit_label (label);
5710       return target;
5711     }
5712
5713   if (!REG_P (target)
5714       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5715     target = gen_reg_rtx (GET_MODE (target));
5716
5717   /* Jump in the right direction if the target cannot implement CODE
5718      but can jump on its reverse condition.  */
5719   falseval = const0_rtx;
5720   if (! can_compare_p (code, mode, ccp_jump)
5721       && (! FLOAT_MODE_P (mode)
5722           || code == ORDERED || code == UNORDERED
5723           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5724           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5725     {
5726       enum rtx_code rcode;
5727       if (FLOAT_MODE_P (mode))
5728         rcode = reverse_condition_maybe_unordered (code);
5729       else
5730         rcode = reverse_condition (code);
5731
5732       /* Canonicalize to UNORDERED for the libcall.  */
5733       if (can_compare_p (rcode, mode, ccp_jump)
5734           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5735         {
5736           falseval = trueval;
5737           trueval = const0_rtx;
5738           code = rcode;
5739         }
5740     }
5741
5742   emit_move_insn (target, trueval);
5743   label = gen_label_rtx ();
5744   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5745                            NULL_RTX, label, -1);
5746
5747   emit_move_insn (target, falseval);
5748   emit_label (label);
5749
5750   return target;
5751 }
5752 \f
5753 /* Perform possibly multi-word comparison and conditional jump to LABEL
5754    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5755    now a thin wrapper around do_compare_rtx_and_jump.  */
5756
5757 static void
5758 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5759                  rtx label)
5760 {
5761   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5762   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5763                            NULL_RTX, NULL_RTX, label, -1);
5764 }