gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
   5    2011, 2012
   6    Free Software Foundation, Inc.
   7
   8 This file is part of GCC.
   9
  10 GCC is free software; you can redistribute it and/or modify it under
  11 the terms of the GNU General Public License as published by the Free
  12 Software Foundation; either version 3, or (at your option) any later
  13 version.
  14
  15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  17 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  18 for more details.
  19
  20 You should have received a copy of the GNU General Public License
  21 along with GCC; see the file COPYING3.  If not see
  22 <http://www.gnu.org/licenses/>.  */
  23
  24
  25 #include "config.h"
  26 #include "system.h"
  27 #include "coretypes.h"
  28 #include "tm.h"
  29 #include "diagnostic-core.h"
  30 #include "rtl.h"
  31 #include "tree.h"
  32 #include "tm_p.h"
  33 #include "flags.h"
  34 #include "insn-config.h"
  35 #include "expr.h"
  36 #include "optabs.h"
  37 #include "recog.h"
  38 #include "langhooks.h"
  39 #include "df.h"
  40 #include "target.h"
  41 #include "expmed.h"
  42
  43 struct target_expmed default_target_expmed;
  44 #if SWITCHABLE_TARGET
  45 struct target_expmed *this_target_expmed = &default_target_expmed;
  46 #endif
  47
  48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  49                                    unsigned HOST_WIDE_INT,
  50                                    unsigned HOST_WIDE_INT,
  51                                    unsigned HOST_WIDE_INT,
  52                                    rtx);
  53 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  54                                    unsigned HOST_WIDE_INT,
  55                                    unsigned HOST_WIDE_INT,
  56                                    unsigned HOST_WIDE_INT,
  57                                    rtx);
  58 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  59                                     unsigned HOST_WIDE_INT,
  60                                     unsigned HOST_WIDE_INT, rtx, int, bool);
  61 static rtx mask_rtx (enum machine_mode, int, int, int);
  62 static rtx lshift_value (enum machine_mode, rtx, int, int);
  63 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  64                                     unsigned HOST_WIDE_INT, int);
  65 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  66 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  67 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  68
  69 /* Test whether a value is zero of a power of two.  */
  70 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
  71
  72 struct init_expmed_rtl
  73 {
  74   struct rtx_def reg;           rtunion reg_fld[2];
  75   struct rtx_def plus;  rtunion plus_fld1;
  76   struct rtx_def neg;
  77   struct rtx_def mult;  rtunion mult_fld1;
  78   struct rtx_def sdiv;  rtunion sdiv_fld1;
  79   struct rtx_def udiv;  rtunion udiv_fld1;
  80   struct rtx_def sdiv_32;       rtunion sdiv_32_fld1;
  81   struct rtx_def smod_32;       rtunion smod_32_fld1;
  82   struct rtx_def wide_mult;     rtunion wide_mult_fld1;
  83   struct rtx_def wide_lshr;     rtunion wide_lshr_fld1;
  84   struct rtx_def wide_trunc;
  85   struct rtx_def shift; rtunion shift_fld1;
  86   struct rtx_def shift_mult;    rtunion shift_mult_fld1;
  87   struct rtx_def shift_add;     rtunion shift_add_fld1;
  88   struct rtx_def shift_sub0;    rtunion shift_sub0_fld1;
  89   struct rtx_def shift_sub1;    rtunion shift_sub1_fld1;
  90   struct rtx_def zext;
  91   struct rtx_def trunc;
  92
  93   rtx pow2[MAX_BITS_PER_WORD];
  94   rtx cint[MAX_BITS_PER_WORD];
  95 };
  96
  97 static void
  98 init_expmed_one_conv (struct init_expmed_rtl *all, enum machine_mode to_mode,
  99                       enum machine_mode from_mode, bool speed)
 100 {
 101   int to_size, from_size;
 102   rtx which;
 103
 104   /* We're given no information about the true size of a partial integer,
 105      only the size of the "full" integer it requires for storage.  For
 106      comparison purposes here, reduce the bit size by one in that case.  */
 107   to_size = (GET_MODE_BITSIZE (to_mode)
 108              - (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT));
 109   from_size = (GET_MODE_BITSIZE (from_mode)
 110                - (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT));
 111
 112   /* Assume cost of zero-extend and sign-extend is the same.  */
 113   which = (to_size < from_size ? &all->trunc : &all->zext);
 114
 115   PUT_MODE (&all->reg, from_mode);
 116   set_convert_cost (to_mode, from_mode, speed, set_src_cost (which, speed));
 117 }
 118
 119 static void
 120 init_expmed_one_mode (struct init_expmed_rtl *all,
 121                       enum machine_mode mode, int speed)
 122 {
 123   int m, n, mode_bitsize;
 124   enum machine_mode mode_from;
 125
 126   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 127
 128   PUT_MODE (&all->reg, mode);
 129   PUT_MODE (&all->plus, mode);
 130   PUT_MODE (&all->neg, mode);
 131   PUT_MODE (&all->mult, mode);
 132   PUT_MODE (&all->sdiv, mode);
 133   PUT_MODE (&all->udiv, mode);
 134   PUT_MODE (&all->sdiv_32, mode);
 135   PUT_MODE (&all->smod_32, mode);
 136   PUT_MODE (&all->wide_trunc, mode);
 137   PUT_MODE (&all->shift, mode);
 138   PUT_MODE (&all->shift_mult, mode);
 139   PUT_MODE (&all->shift_add, mode);
 140   PUT_MODE (&all->shift_sub0, mode);
 141   PUT_MODE (&all->shift_sub1, mode);
 142   PUT_MODE (&all->zext, mode);
 143   PUT_MODE (&all->trunc, mode);
 144
 145   set_add_cost (speed, mode, set_src_cost (&all->plus, speed));
 146   set_neg_cost (speed, mode, set_src_cost (&all->neg, speed));
 147   set_mul_cost (speed, mode, set_src_cost (&all->mult, speed));
 148   set_sdiv_cost (speed, mode, set_src_cost (&all->sdiv, speed));
 149   set_udiv_cost (speed, mode, set_src_cost (&all->udiv, speed));
 150
 151   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (&all->sdiv_32, speed)
 152                                      <= 2 * add_cost (speed, mode)));
 153   set_smod_pow2_cheap (speed, mode, (set_src_cost (&all->smod_32, speed)
 154                                      <= 4 * add_cost (speed, mode)));
 155
 156   set_shift_cost (speed, mode, 0, 0);
 157   {
 158     int cost = add_cost (speed, mode);
 159     set_shiftadd_cost (speed, mode, 0, cost);
 160     set_shiftsub0_cost (speed, mode, 0, cost);
 161     set_shiftsub1_cost (speed, mode, 0, cost);
 162   }
 163
 164   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 165   for (m = 1; m < n; m++)
 166     {
 167       XEXP (&all->shift, 1) = all->cint[m];
 168       XEXP (&all->shift_mult, 1) = all->pow2[m];
 169
 170       set_shift_cost (speed, mode, m, set_src_cost (&all->shift, speed));
 171       set_shiftadd_cost (speed, mode, m, set_src_cost (&all->shift_add, speed));
 172       set_shiftsub0_cost (speed, mode, m, set_src_cost (&all->shift_sub0, speed));
 173       set_shiftsub1_cost (speed, mode, m, set_src_cost (&all->shift_sub1, speed));
 174     }
 175
 176   if (SCALAR_INT_MODE_P (mode))
 177     {
 178       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 179            mode_from = (enum machine_mode)(mode_from + 1))
 180         init_expmed_one_conv (all, mode, mode_from, speed);
 181     }
 182   if (GET_MODE_CLASS (mode) == MODE_INT)
 183     {
 184       enum machine_mode  wider_mode = GET_MODE_WIDER_MODE (mode);
 185       if (wider_mode != VOIDmode)
 186         {
 187           PUT_MODE (&all->zext, wider_mode);
 188           PUT_MODE (&all->wide_mult, wider_mode);
 189           PUT_MODE (&all->wide_lshr, wider_mode);
 190           XEXP (&all->wide_lshr, 1) = GEN_INT (mode_bitsize);
 191
 192           set_mul_widen_cost (speed, wider_mode,
 193                               set_src_cost (&all->wide_mult, speed));
 194           set_mul_highpart_cost (speed, mode,
 195                                  set_src_cost (&all->wide_trunc, speed));
 196         }
 197     }
 198 }
 199
 200 void
 201 init_expmed (void)
 202 {
 203   struct init_expmed_rtl all;
 204   enum machine_mode mode;
 205   int m, speed;
 206
 207   memset (&all, 0, sizeof all);
 208   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 209     {
 210       all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 211       all.cint[m] = GEN_INT (m);
 212     }
 213
 214   PUT_CODE (&all.reg, REG);
 215   /* Avoid using hard regs in ways which may be unsupported.  */
 216   SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
 217
 218   PUT_CODE (&all.plus, PLUS);
 219   XEXP (&all.plus, 0) = &all.reg;
 220   XEXP (&all.plus, 1) = &all.reg;
 221
 222   PUT_CODE (&all.neg, NEG);
 223   XEXP (&all.neg, 0) = &all.reg;
 224
 225   PUT_CODE (&all.mult, MULT);
 226   XEXP (&all.mult, 0) = &all.reg;
 227   XEXP (&all.mult, 1) = &all.reg;
 228
 229   PUT_CODE (&all.sdiv, DIV);
 230   XEXP (&all.sdiv, 0) = &all.reg;
 231   XEXP (&all.sdiv, 1) = &all.reg;
 232
 233   PUT_CODE (&all.udiv, UDIV);
 234   XEXP (&all.udiv, 0) = &all.reg;
 235   XEXP (&all.udiv, 1) = &all.reg;
 236
 237   PUT_CODE (&all.sdiv_32, DIV);
 238   XEXP (&all.sdiv_32, 0) = &all.reg;
 239   XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? all.cint[32] : GEN_INT (32);
 240
 241   PUT_CODE (&all.smod_32, MOD);
 242   XEXP (&all.smod_32, 0) = &all.reg;
 243   XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
 244
 245   PUT_CODE (&all.zext, ZERO_EXTEND);
 246   XEXP (&all.zext, 0) = &all.reg;
 247
 248   PUT_CODE (&all.wide_mult, MULT);
 249   XEXP (&all.wide_mult, 0) = &all.zext;
 250   XEXP (&all.wide_mult, 1) = &all.zext;
 251
 252   PUT_CODE (&all.wide_lshr, LSHIFTRT);
 253   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
 254
 255   PUT_CODE (&all.wide_trunc, TRUNCATE);
 256   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
 257
 258   PUT_CODE (&all.shift, ASHIFT);
 259   XEXP (&all.shift, 0) = &all.reg;
 260
 261   PUT_CODE (&all.shift_mult, MULT);
 262   XEXP (&all.shift_mult, 0) = &all.reg;
 263
 264   PUT_CODE (&all.shift_add, PLUS);
 265   XEXP (&all.shift_add, 0) = &all.shift_mult;
 266   XEXP (&all.shift_add, 1) = &all.reg;
 267
 268   PUT_CODE (&all.shift_sub0, MINUS);
 269   XEXP (&all.shift_sub0, 0) = &all.shift_mult;
 270   XEXP (&all.shift_sub0, 1) = &all.reg;
 271
 272   PUT_CODE (&all.shift_sub1, MINUS);
 273   XEXP (&all.shift_sub1, 0) = &all.reg;
 274   XEXP (&all.shift_sub1, 1) = &all.shift_mult;
 275
 276   PUT_CODE (&all.trunc, TRUNCATE);
 277   XEXP (&all.trunc, 0) = &all.reg;
 278
 279   for (speed = 0; speed < 2; speed++)
 280     {
 281       crtl->maybe_hot_insn_p = speed;
 282       set_zero_cost (speed, set_src_cost (const0_rtx, speed));
 283
 284       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 285            mode = (enum machine_mode)(mode + 1))
 286         init_expmed_one_mode (&all, mode, speed);
 287
 288       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 289         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 290              mode = (enum machine_mode)(mode + 1))
 291           init_expmed_one_mode (&all, mode, speed);
 292
 293       if (MIN_MODE_VECTOR_INT != VOIDmode)
 294         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 295              mode = (enum machine_mode)(mode + 1))
 296           init_expmed_one_mode (&all, mode, speed);
 297     }
 298
 299   if (alg_hash_used_p ())
 300     {
 301       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 302       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 303     }
 304   else
 305     set_alg_hash_used_p (true);
 306   default_rtl_profile ();
 307 }
 308
 309 /* Return an rtx representing minus the value of X.
 310    MODE is the intended mode of the result,
 311    useful if X is a CONST_INT.  */
 312
 313 rtx
 314 negate_rtx (enum machine_mode mode, rtx x)
 315 {
 316   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 317
 318   if (result == 0)
 319     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 320
 321   return result;
 322 }
 323
 324 /* Adjust bitfield memory MEM so that it points to the first unit of mode
 325    MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
 326    If MODE is BLKmode, return a reference to every byte in the bitfield.
 327    Set *NEW_BITNUM to the bit position of the field within the new memory.  */
 328
 329 static rtx
 330 narrow_bit_field_mem (rtx mem, enum machine_mode mode,
 331                       unsigned HOST_WIDE_INT bitsize,
 332                       unsigned HOST_WIDE_INT bitnum,
 333                       unsigned HOST_WIDE_INT *new_bitnum)
 334 {
 335   if (mode == BLKmode)
 336     {
 337       *new_bitnum = bitnum % BITS_PER_UNIT;
 338       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 339       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 340                             / BITS_PER_UNIT);
 341       return adjust_bitfield_address_size (mem, mode, offset, size);
 342     }
 343   else
 344     {
 345       unsigned int unit = GET_MODE_BITSIZE (mode);
 346       *new_bitnum = bitnum % unit;
 347       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 348       return adjust_bitfield_address (mem, mode, offset);
 349     }
 350 }
 351
 352 /* The caller wants to perform insertion or extraction PATTERN on a
 353    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 354    BITREGION_START and BITREGION_END are as for store_bit_field
 355    and FIELDMODE is the natural mode of the field.
 356
 357    Search for a mode that is compatible with the memory access
 358    restrictions and (where applicable) with a register insertion or
 359    extraction.  Return the new memory on success, storing the adjusted
 360    bit position in *NEW_BITNUM.  Return null otherwise.  */
 361
 362 static rtx
 363 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 364                               rtx op0, HOST_WIDE_INT bitsize,
 365                               HOST_WIDE_INT bitnum,
 366                               unsigned HOST_WIDE_INT bitregion_start,
 367                               unsigned HOST_WIDE_INT bitregion_end,
 368                               enum machine_mode fieldmode,
 369                               unsigned HOST_WIDE_INT *new_bitnum)
 370 {
 371   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 372                                 bitregion_end, MEM_ALIGN (op0),
 373                                 MEM_VOLATILE_P (op0));
 374   enum machine_mode best_mode;
 375   if (iter.next_mode (&best_mode))
 376     {
 377       /* We can use a memory in BEST_MODE.  See whether this is true for
 378          any wider modes.  All other things being equal, we prefer to
 379          use the widest mode possible because it tends to expose more
 380          CSE opportunities.  */
 381       if (!iter.prefer_smaller_modes ())
 382         {
 383           /* Limit the search to the mode required by the corresponding
 384              register insertion or extraction instruction, if any.  */
 385           enum machine_mode limit_mode = word_mode;
 386           extraction_insn insn;
 387           if (get_best_reg_extraction_insn (&insn, pattern,
 388                                             GET_MODE_BITSIZE (best_mode),
 389                                             fieldmode))
 390             limit_mode = insn.field_mode;
 391
 392           enum machine_mode wider_mode;
 393           while (iter.next_mode (&wider_mode)
 394                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 395             best_mode = wider_mode;
 396         }
 397       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 398                                    new_bitnum);
 399     }
 400   return NULL_RTX;
 401 }
 402
 403 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 404    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 405    offset is then BITNUM / BITS_PER_UNIT.  */
 406
 407 static bool
 408 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
 409                      unsigned HOST_WIDE_INT bitsize,
 410                      enum machine_mode struct_mode)
 411 {
 412   if (BYTES_BIG_ENDIAN)
 413     return (bitnum % BITS_PER_UNIT == 0
 414             && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
 415                 || (bitnum + bitsize) % BITS_PER_WORD == 0));
 416   else
 417     return bitnum % BITS_PER_WORD == 0;
 418 }
 419
 420 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 421    bit number BITNUM can be treated as a simple value of mode MODE.  */
 422
 423 static bool
 424 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 425                        unsigned HOST_WIDE_INT bitnum, enum machine_mode mode)
 426 {
 427   return (MEM_P (op0)
 428           && bitnum % BITS_PER_UNIT == 0
 429           && bitsize == GET_MODE_BITSIZE (mode)
 430           && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
 431               || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
 432                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 433 }
 434 \f
 435 /* Try to use instruction INSV to store VALUE into a field of OP0.
 436    BITSIZE and BITNUM are as for store_bit_field.  */
 437
 438 static bool
 439 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 440                             unsigned HOST_WIDE_INT bitsize,
 441                             unsigned HOST_WIDE_INT bitnum, rtx value)
 442 {
 443   struct expand_operand ops[4];
 444   rtx value1;
 445   rtx xop0 = op0;
 446   rtx last = get_last_insn ();
 447   bool copy_back = false;
 448
 449   enum machine_mode op_mode = insv->field_mode;
 450   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 451   if (bitsize == 0 || bitsize > unit)
 452     return false;
 453
 454   if (MEM_P (xop0))
 455     /* Get a reference to the first byte of the field.  */
 456     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 457                                  &bitnum);
 458   else
 459     {
 460       /* Convert from counting within OP0 to counting in OP_MODE.  */
 461       if (BYTES_BIG_ENDIAN)
 462         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 463
 464       /* If xop0 is a register, we need it in OP_MODE
 465          to make it acceptable to the format of insv.  */
 466       if (GET_CODE (xop0) == SUBREG)
 467         /* We can't just change the mode, because this might clobber op0,
 468            and we will need the original value of op0 if insv fails.  */
 469         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 470       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 471         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 472     }
 473
 474   /* If the destination is a paradoxical subreg such that we need a
 475      truncate to the inner mode, perform the insertion on a temporary and
 476      truncate the result to the original destination.  Note that we can't
 477      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 478      X) 0)) is (reg:N X).  */
 479   if (GET_CODE (xop0) == SUBREG
 480       && REG_P (SUBREG_REG (xop0))
 481       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 482                                          op_mode))
 483     {
 484       rtx tem = gen_reg_rtx (op_mode);
 485       emit_move_insn (tem, xop0);
 486       xop0 = tem;
 487       copy_back = true;
 488     }
 489
 490   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 491      "backwards" from the size of the unit we are inserting into.
 492      Otherwise, we count bits from the most significant on a
 493      BYTES/BITS_BIG_ENDIAN machine.  */
 494
 495   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 496     bitnum = unit - bitsize - bitnum;
 497
 498   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 499   value1 = value;
 500   if (GET_MODE (value) != op_mode)
 501     {
 502       if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 503         {
 504           /* Optimization: Don't bother really extending VALUE
 505              if it has all the bits we will actually use.  However,
 506              if we must narrow it, be sure we do it correctly.  */
 507
 508           if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 509             {
 510               rtx tmp;
 511
 512               tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 513               if (! tmp)
 514                 tmp = simplify_gen_subreg (op_mode,
 515                                            force_reg (GET_MODE (value),
 516                                                       value1),
 517                                            GET_MODE (value), 0);
 518               value1 = tmp;
 519             }
 520           else
 521             value1 = gen_lowpart (op_mode, value1);
 522         }
 523       else if (CONST_INT_P (value))
 524         value1 = gen_int_mode (INTVAL (value), op_mode);
 525       else
 526         /* Parse phase is supposed to make VALUE's data type
 527            match that of the component reference, which is a type
 528            at least as wide as the field; so VALUE should have
 529            a mode that corresponds to that type.  */
 530         gcc_assert (CONSTANT_P (value));
 531     }
 532
 533   create_fixed_operand (&ops[0], xop0);
 534   create_integer_operand (&ops[1], bitsize);
 535   create_integer_operand (&ops[2], bitnum);
 536   create_input_operand (&ops[3], value1, op_mode);
 537   if (maybe_expand_insn (insv->icode, 4, ops))
 538     {
 539       if (copy_back)
 540         convert_move (op0, xop0, true);
 541       return true;
 542     }
 543   delete_insns_since (last);
 544   return false;
 545 }
 546
 547 /* A subroutine of store_bit_field, with the same arguments.  Return true
 548    if the operation could be implemented.
 549
 550    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 551    no other way of implementing the operation.  If FALLBACK_P is false,
 552    return false instead.  */
 553
 554 static bool
 555 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 556                    unsigned HOST_WIDE_INT bitnum,
 557                    unsigned HOST_WIDE_INT bitregion_start,
 558                    unsigned HOST_WIDE_INT bitregion_end,
 559                    enum machine_mode fieldmode,
 560                    rtx value, bool fallback_p)
 561 {
 562   rtx op0 = str_rtx;
 563   rtx orig_value;
 564
 565   while (GET_CODE (op0) == SUBREG)
 566     {
 567       /* The following line once was done only if WORDS_BIG_ENDIAN,
 568          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 569          meaningful at a much higher level; when structures are copied
 570          between memory and regs, the higher-numbered regs
 571          always get higher addresses.  */
 572       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 573       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 574       int byte_offset = 0;
 575
 576       /* Paradoxical subregs need special handling on big endian machines.  */
 577       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 578         {
 579           int difference = inner_mode_size - outer_mode_size;
 580
 581           if (WORDS_BIG_ENDIAN)
 582             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 583           if (BYTES_BIG_ENDIAN)
 584             byte_offset += difference % UNITS_PER_WORD;
 585         }
 586       else
 587         byte_offset = SUBREG_BYTE (op0);
 588
 589       bitnum += byte_offset * BITS_PER_UNIT;
 590       op0 = SUBREG_REG (op0);
 591     }
 592
 593   /* No action is needed if the target is a register and if the field
 594      lies completely outside that register.  This can occur if the source
 595      code contains an out-of-bounds access to a small array.  */
 596   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 597     return true;
 598
 599   /* Use vec_set patterns for inserting parts of vectors whenever
 600      available.  */
 601   if (VECTOR_MODE_P (GET_MODE (op0))
 602       && !MEM_P (op0)
 603       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
 604       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 605       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 606       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 607     {
 608       struct expand_operand ops[3];
 609       enum machine_mode outermode = GET_MODE (op0);
 610       enum machine_mode innermode = GET_MODE_INNER (outermode);
 611       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 612       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 613
 614       create_fixed_operand (&ops[0], op0);
 615       create_input_operand (&ops[1], value, innermode);
 616       create_integer_operand (&ops[2], pos);
 617       if (maybe_expand_insn (icode, 3, ops))
 618         return true;
 619     }
 620
 621   /* If the target is a register, overwriting the entire object, or storing
 622      a full-word or multi-word field can be done with just a SUBREG.  */
 623   if (!MEM_P (op0)
 624       && bitsize == GET_MODE_BITSIZE (fieldmode)
 625       && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
 626           || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
 627     {
 628       /* Use the subreg machinery either to narrow OP0 to the required
 629          words or to cope with mode punning between equal-sized modes.  */
 630       rtx sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 631                                      bitnum / BITS_PER_UNIT);
 632       if (sub)
 633         {
 634           emit_move_insn (sub, value);
 635           return true;
 636         }
 637     }
 638
 639   /* If the target is memory, storing any naturally aligned field can be
 640      done with a simple store.  For targets that support fast unaligned
 641      memory, any naturally sized, unit aligned field can be done directly.  */
 642   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
 643     {
 644       op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
 645       emit_move_insn (op0, value);
 646       return true;
 647     }
 648
 649   /* Make sure we are playing with integral modes.  Pun with subregs
 650      if we aren't.  This must come after the entire register case above,
 651      since that case is valid for any mode.  The following cases are only
 652      valid for integral modes.  */
 653   {
 654     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 655     if (imode != GET_MODE (op0))
 656       {
 657         if (MEM_P (op0))
 658           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
 659         else
 660           {
 661             gcc_assert (imode != BLKmode);
 662             op0 = gen_lowpart (imode, op0);
 663           }
 664       }
 665   }
 666
 667   /* Storing an lsb-aligned field in a register
 668      can be done with a movstrict instruction.  */
 669
 670   if (!MEM_P (op0)
 671       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
 672       && bitsize == GET_MODE_BITSIZE (fieldmode)
 673       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 674     {
 675       struct expand_operand ops[2];
 676       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 677       rtx arg0 = op0;
 678       unsigned HOST_WIDE_INT subreg_off;
 679
 680       if (GET_CODE (arg0) == SUBREG)
 681         {
 682           /* Else we've got some float mode source being extracted into
 683              a different float mode destination -- this combination of
 684              subregs results in Severe Tire Damage.  */
 685           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 686                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 687                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 688           arg0 = SUBREG_REG (arg0);
 689         }
 690
 691       subreg_off = bitnum / BITS_PER_UNIT;
 692       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 693         {
 694           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 695
 696           create_fixed_operand (&ops[0], arg0);
 697           /* Shrink the source operand to FIELDMODE.  */
 698           create_convert_operand_to (&ops[1], value, fieldmode, false);
 699           if (maybe_expand_insn (icode, 2, ops))
 700             return true;
 701         }
 702     }
 703
 704   /* Handle fields bigger than a word.  */
 705
 706   if (bitsize > BITS_PER_WORD)
 707     {
 708       /* Here we transfer the words of the field
 709          in the order least significant first.
 710          This is because the most significant word is the one which may
 711          be less than full.
 712          However, only do that if the value is not BLKmode.  */
 713
 714       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 715       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 716       unsigned int i;
 717       rtx last;
 718
 719       /* This is the mode we must force value to, so that there will be enough
 720          subwords to extract.  Note that fieldmode will often (always?) be
 721          VOIDmode, because that is what store_field uses to indicate that this
 722          is a bit field, but passing VOIDmode to operand_subword_force
 723          is not allowed.  */
 724       fieldmode = GET_MODE (value);
 725       if (fieldmode == VOIDmode)
 726         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 727
 728       last = get_last_insn ();
 729       for (i = 0; i < nwords; i++)
 730         {
 731           /* If I is 0, use the low-order word in both field and target;
 732              if I is 1, use the next to lowest word; and so on.  */
 733           unsigned int wordnum = (backwards
 734                                   ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
 735                                   - i - 1
 736                                   : i);
 737           unsigned int bit_offset = (backwards
 738                                      ? MAX ((int) bitsize - ((int) i + 1)
 739                                             * BITS_PER_WORD,
 740                                             0)
 741                                      : (int) i * BITS_PER_WORD);
 742           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 743           unsigned HOST_WIDE_INT new_bitsize =
 744             MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 745
 746           /* If the remaining chunk doesn't have full wordsize we have
 747              to make sure that for big endian machines the higher order
 748              bits are used.  */
 749           if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
 750             value_word = simplify_expand_binop (word_mode, lshr_optab,
 751                                                 value_word,
 752                                                 GEN_INT (BITS_PER_WORD
 753                                                          - new_bitsize),
 754                                                 NULL_RTX, true,
 755                                                 OPTAB_LIB_WIDEN);
 756
 757           if (!store_bit_field_1 (op0, new_bitsize,
 758                                   bitnum + bit_offset,
 759                                   bitregion_start, bitregion_end,
 760                                   word_mode,
 761                                   value_word, fallback_p))
 762             {
 763               delete_insns_since (last);
 764               return false;
 765             }
 766         }
 767       return true;
 768     }
 769
 770   /* If VALUE has a floating-point or complex mode, access it as an
 771      integer of the corresponding size.  This can occur on a machine
 772      with 64 bit registers that uses SFmode for float.  It can also
 773      occur for unaligned float or complex fields.  */
 774   orig_value = value;
 775   if (GET_MODE (value) != VOIDmode
 776       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 777       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 778     {
 779       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 780       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 781     }
 782
 783   /* If OP0 is a multi-word register, narrow it to the affected word.
 784      If the region spans two words, defer to store_split_bit_field.  */
 785   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 786     {
 787       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
 788                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
 789       gcc_assert (op0);
 790       bitnum %= BITS_PER_WORD;
 791       if (bitnum + bitsize > BITS_PER_WORD)
 792         {
 793           if (!fallback_p)
 794             return false;
 795
 796           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
 797                                  bitregion_end, value);
 798           return true;
 799         }
 800     }
 801
 802   /* From here on we can assume that the field to be stored in fits
 803      within a word.  If the destination is a register, it too fits
 804      in a word.  */
 805
 806   extraction_insn insv;
 807   if (!MEM_P (op0)
 808       && get_best_reg_extraction_insn (&insv, EP_insv,
 809                                        GET_MODE_BITSIZE (GET_MODE (op0)),
 810                                        fieldmode)
 811       && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 812     return true;
 813
 814   /* If OP0 is a memory, try copying it to a register and seeing if a
 815      cheap register alternative is available.  */
 816   if (MEM_P (op0))
 817     {
 818       /* Do not use unaligned memory insvs for volatile bitfields when
 819          -fstrict-volatile-bitfields is in effect.  */
 820       if (!(MEM_VOLATILE_P (op0)
 821             && flag_strict_volatile_bitfields > 0)
 822           && get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
 823                                            fieldmode)
 824           && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 825         return true;
 826
 827       rtx last = get_last_insn ();
 828
 829       /* Try loading part of OP0 into a register, inserting the bitfield
 830          into that, and then copying the result back to OP0.  */
 831       unsigned HOST_WIDE_INT bitpos;
 832       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
 833                                                bitregion_start, bitregion_end,
 834                                                fieldmode, &bitpos);
 835       if (xop0)
 836         {
 837           rtx tempreg = copy_to_reg (xop0);
 838           if (store_bit_field_1 (tempreg, bitsize, bitpos,
 839                                  bitregion_start, bitregion_end,
 840                                  fieldmode, orig_value, false))
 841             {
 842               emit_move_insn (xop0, tempreg);
 843               return true;
 844             }
 845           delete_insns_since (last);
 846         }
 847     }
 848
 849   if (!fallback_p)
 850     return false;
 851
 852   store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
 853                          bitregion_end, value);
 854   return true;
 855 }
 856
 857 /* Generate code to store value from rtx VALUE
 858    into a bit-field within structure STR_RTX
 859    containing BITSIZE bits starting at bit BITNUM.
 860
 861    BITREGION_START is bitpos of the first bitfield in this region.
 862    BITREGION_END is the bitpos of the ending bitfield in this region.
 863    These two fields are 0, if the C++ memory model does not apply,
 864    or we are not interested in keeping track of bitfield regions.
 865
 866    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.  */
 867
 868 void
 869 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 870                  unsigned HOST_WIDE_INT bitnum,
 871                  unsigned HOST_WIDE_INT bitregion_start,
 872                  unsigned HOST_WIDE_INT bitregion_end,
 873                  enum machine_mode fieldmode,
 874                  rtx value)
 875 {
 876   /* Under the C++0x memory model, we must not touch bits outside the
 877      bit region.  Adjust the address to start at the beginning of the
 878      bit region.  */
 879   if (MEM_P (str_rtx) && bitregion_start > 0)
 880     {
 881       enum machine_mode bestmode;
 882       HOST_WIDE_INT offset, size;
 883
 884       gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
 885
 886       offset = bitregion_start / BITS_PER_UNIT;
 887       bitnum -= bitregion_start;
 888       size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
 889       bitregion_end -= bitregion_start;
 890       bitregion_start = 0;
 891       bestmode = get_best_mode (bitsize, bitnum,
 892                                 bitregion_start, bitregion_end,
 893                                 MEM_ALIGN (str_rtx), VOIDmode,
 894                                 MEM_VOLATILE_P (str_rtx));
 895       str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
 896     }
 897
 898   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
 899                           bitregion_start, bitregion_end,
 900                           fieldmode, value, true))
 901     gcc_unreachable ();
 902 }
 903 \f
 904 /* Use shifts and boolean operations to store VALUE into a bit field of
 905    width BITSIZE in OP0, starting at bit BITNUM.  */
 906
 907 static void
 908 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
 909                        unsigned HOST_WIDE_INT bitnum,
 910                        unsigned HOST_WIDE_INT bitregion_start,
 911                        unsigned HOST_WIDE_INT bitregion_end,
 912                        rtx value)
 913 {
 914   enum machine_mode mode;
 915   rtx temp;
 916   int all_zero = 0;
 917   int all_one = 0;
 918
 919   /* There is a case not handled here:
 920      a structure with a known alignment of just a halfword
 921      and a field split across two aligned halfwords within the structure.
 922      Or likewise a structure with a known alignment of just a byte
 923      and a field split across two bytes.
 924      Such cases are not supposed to be able to occur.  */
 925
 926   if (MEM_P (op0))
 927     {
 928       unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
 929
 930       if (bitregion_end)
 931         maxbits = bitregion_end - bitregion_start + 1;
 932
 933       /* Get the proper mode to use for this field.  We want a mode that
 934          includes the entire field.  If such a mode would be larger than
 935          a word, we won't be doing the extraction the normal way.
 936          We don't want a mode bigger than the destination.  */
 937
 938       mode = GET_MODE (op0);
 939       if (GET_MODE_BITSIZE (mode) == 0
 940           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 941         mode = word_mode;
 942
 943       if (MEM_VOLATILE_P (op0)
 944           && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
 945           && GET_MODE_BITSIZE (GET_MODE (op0)) <= maxbits
 946           && flag_strict_volatile_bitfields > 0)
 947         mode = GET_MODE (op0);
 948       else
 949         mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
 950                               MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 951
 952       if (mode == VOIDmode)
 953         {
 954           /* The only way this should occur is if the field spans word
 955              boundaries.  */
 956           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
 957                                  bitregion_end, value);
 958           return;
 959         }
 960
 961       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
 962     }
 963
 964   mode = GET_MODE (op0);
 965   gcc_assert (SCALAR_INT_MODE_P (mode));
 966
 967   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
 968      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
 969
 970   if (BYTES_BIG_ENDIAN)
 971     /* BITNUM is the distance between our msb
 972        and that of the containing datum.
 973        Convert it to the distance from the lsb.  */
 974     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
 975
 976   /* Now BITNUM is always the distance between our lsb
 977      and that of OP0.  */
 978
 979   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
 980      we must first convert its mode to MODE.  */
 981
 982   if (CONST_INT_P (value))
 983     {
 984       HOST_WIDE_INT v = INTVAL (value);
 985
 986       if (bitsize < HOST_BITS_PER_WIDE_INT)
 987         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 988
 989       if (v == 0)
 990         all_zero = 1;
 991       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 992                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 993                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 994         all_one = 1;
 995
 996       value = lshift_value (mode, value, bitnum, bitsize);
 997     }
 998   else
 999     {
1000       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1001                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1002
1003       if (GET_MODE (value) != mode)
1004         value = convert_to_mode (mode, value, 1);
1005
1006       if (must_and)
1007         value = expand_binop (mode, and_optab, value,
1008                               mask_rtx (mode, 0, bitsize, 0),
1009                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1010       if (bitnum > 0)
1011         value = expand_shift (LSHIFT_EXPR, mode, value,
1012                               bitnum, NULL_RTX, 1);
1013     }
1014
1015   /* Now clear the chosen bits in OP0,
1016      except that if VALUE is -1 we need not bother.  */
1017   /* We keep the intermediates in registers to allow CSE to combine
1018      consecutive bitfield assignments.  */
1019
1020   temp = force_reg (mode, op0);
1021
1022   if (! all_one)
1023     {
1024       temp = expand_binop (mode, and_optab, temp,
1025                            mask_rtx (mode, bitnum, bitsize, 1),
1026                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1027       temp = force_reg (mode, temp);
1028     }
1029
1030   /* Now logical-or VALUE into OP0, unless it is zero.  */
1031
1032   if (! all_zero)
1033     {
1034       temp = expand_binop (mode, ior_optab, temp, value,
1035                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1036       temp = force_reg (mode, temp);
1037     }
1038
1039   if (op0 != temp)
1040     {
1041       op0 = copy_rtx (op0);
1042       emit_move_insn (op0, temp);
1043     }
1044 }
1045 \f
1046 /* Store a bit field that is split across multiple accessible memory objects.
1047
1048    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1049    BITSIZE is the field width; BITPOS the position of its first bit
1050    (within the word).
1051    VALUE is the value to store.
1052
1053    This does not yet handle fields wider than BITS_PER_WORD.  */
1054
1055 static void
1056 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1057                        unsigned HOST_WIDE_INT bitpos,
1058                        unsigned HOST_WIDE_INT bitregion_start,
1059                        unsigned HOST_WIDE_INT bitregion_end,
1060                        rtx value)
1061 {
1062   unsigned int unit;
1063   unsigned int bitsdone = 0;
1064
1065   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1066      much at a time.  */
1067   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1068     unit = BITS_PER_WORD;
1069   else
1070     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1071
1072   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1073      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1074      that VALUE might be a floating-point constant.  */
1075   if (CONSTANT_P (value) && !CONST_INT_P (value))
1076     {
1077       rtx word = gen_lowpart_common (word_mode, value);
1078
1079       if (word && (value != word))
1080         value = word;
1081       else
1082         value = gen_lowpart_common (word_mode,
1083                                     force_reg (GET_MODE (value) != VOIDmode
1084                                                ? GET_MODE (value)
1085                                                : word_mode, value));
1086     }
1087
1088   while (bitsdone < bitsize)
1089     {
1090       unsigned HOST_WIDE_INT thissize;
1091       rtx part, word;
1092       unsigned HOST_WIDE_INT thispos;
1093       unsigned HOST_WIDE_INT offset;
1094
1095       offset = (bitpos + bitsdone) / unit;
1096       thispos = (bitpos + bitsdone) % unit;
1097
1098       /* When region of bytes we can touch is restricted, decrease
1099          UNIT close to the end of the region as needed.  */
1100       if (bitregion_end
1101           && unit > BITS_PER_UNIT
1102           && bitpos + bitsdone - thispos + unit > bitregion_end + 1)
1103         {
1104           unit = unit / 2;
1105           continue;
1106         }
1107
1108       /* THISSIZE must not overrun a word boundary.  Otherwise,
1109          store_fixed_bit_field will call us again, and we will mutually
1110          recurse forever.  */
1111       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1112       thissize = MIN (thissize, unit - thispos);
1113
1114       if (BYTES_BIG_ENDIAN)
1115         {
1116           /* Fetch successively less significant portions.  */
1117           if (CONST_INT_P (value))
1118             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1119                              >> (bitsize - bitsdone - thissize))
1120                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1121           else
1122             {
1123               int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1124               /* The args are chosen so that the last part includes the
1125                  lsb.  Give extract_bit_field the value it needs (with
1126                  endianness compensation) to fetch the piece we want.  */
1127               part = extract_fixed_bit_field (word_mode, value, thissize,
1128                                               total_bits - bitsize + bitsdone,
1129                                               NULL_RTX, 1, false);
1130             }
1131         }
1132       else
1133         {
1134           /* Fetch successively more significant portions.  */
1135           if (CONST_INT_P (value))
1136             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1137                              >> bitsdone)
1138                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1139           else
1140             part = extract_fixed_bit_field (word_mode, value, thissize,
1141                                             bitsdone, NULL_RTX, 1, false);
1142         }
1143
1144       /* If OP0 is a register, then handle OFFSET here.
1145
1146          When handling multiword bitfields, extract_bit_field may pass
1147          down a word_mode SUBREG of a larger REG for a bitfield that actually
1148          crosses a word boundary.  Thus, for a SUBREG, we must find
1149          the current word starting from the base register.  */
1150       if (GET_CODE (op0) == SUBREG)
1151         {
1152           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1153           enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1154           if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1155             word = word_offset ? const0_rtx : op0;
1156           else
1157             word = operand_subword_force (SUBREG_REG (op0), word_offset,
1158                                           GET_MODE (SUBREG_REG (op0)));
1159           offset = 0;
1160         }
1161       else if (REG_P (op0))
1162         {
1163           enum machine_mode op0_mode = GET_MODE (op0);
1164           if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1165             word = offset ? const0_rtx : op0;
1166           else
1167             word = operand_subword_force (op0, offset, GET_MODE (op0));
1168           offset = 0;
1169         }
1170       else
1171         word = op0;
1172
1173       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1174          it is just an out-of-bounds access.  Ignore it.  */
1175       if (word != const0_rtx)
1176         store_fixed_bit_field (word, thissize, offset * unit + thispos,
1177                                bitregion_start, bitregion_end, part);
1178       bitsdone += thissize;
1179     }
1180 }
1181 \f
1182 /* A subroutine of extract_bit_field_1 that converts return value X
1183    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1184    to extract_bit_field.  */
1185
1186 static rtx
1187 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1188                              enum machine_mode tmode, bool unsignedp)
1189 {
1190   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1191     return x;
1192
1193   /* If the x mode is not a scalar integral, first convert to the
1194      integer mode of that size and then access it as a floating-point
1195      value via a SUBREG.  */
1196   if (!SCALAR_INT_MODE_P (tmode))
1197     {
1198       enum machine_mode smode;
1199
1200       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1201       x = convert_to_mode (smode, x, unsignedp);
1202       x = force_reg (smode, x);
1203       return gen_lowpart (tmode, x);
1204     }
1205
1206   return convert_to_mode (tmode, x, unsignedp);
1207 }
1208
1209 /* Try to use an ext(z)v pattern to extract a field from OP0.
1210    Return the extracted value on success, otherwise return null.
1211    EXT_MODE is the mode of the extraction and the other arguments
1212    are as for extract_bit_field.  */
1213
1214 static rtx
1215 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1216                               unsigned HOST_WIDE_INT bitsize,
1217                               unsigned HOST_WIDE_INT bitnum,
1218                               int unsignedp, rtx target,
1219                               enum machine_mode mode, enum machine_mode tmode)
1220 {
1221   struct expand_operand ops[4];
1222   rtx spec_target = target;
1223   rtx spec_target_subreg = 0;
1224   enum machine_mode ext_mode = extv->field_mode;
1225   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1226
1227   if (bitsize == 0 || unit < bitsize)
1228     return NULL_RTX;
1229
1230   if (MEM_P (op0))
1231     /* Get a reference to the first byte of the field.  */
1232     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1233                                 &bitnum);
1234   else
1235     {
1236       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1237       if (BYTES_BIG_ENDIAN)
1238         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1239
1240       /* If op0 is a register, we need it in EXT_MODE to make it
1241          acceptable to the format of ext(z)v.  */
1242       if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1243         return NULL_RTX;
1244       if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1245         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1246     }
1247
1248   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1249      "backwards" from the size of the unit we are extracting from.
1250      Otherwise, we count bits from the most significant on a
1251      BYTES/BITS_BIG_ENDIAN machine.  */
1252
1253   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1254     bitnum = unit - bitsize - bitnum;
1255
1256   if (target == 0)
1257     target = spec_target = gen_reg_rtx (tmode);
1258
1259   if (GET_MODE (target) != ext_mode)
1260     {
1261       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1262          between the mode of the extraction (word_mode) and the target
1263          mode.  Instead, create a temporary and use convert_move to set
1264          the target.  */
1265       if (REG_P (target)
1266           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1267         {
1268           target = gen_lowpart (ext_mode, target);
1269           if (GET_MODE_PRECISION (ext_mode)
1270               > GET_MODE_PRECISION (GET_MODE (spec_target)))
1271             spec_target_subreg = target;
1272         }
1273       else
1274         target = gen_reg_rtx (ext_mode);
1275     }
1276
1277   create_output_operand (&ops[0], target, ext_mode);
1278   create_fixed_operand (&ops[1], op0);
1279   create_integer_operand (&ops[2], bitsize);
1280   create_integer_operand (&ops[3], bitnum);
1281   if (maybe_expand_insn (extv->icode, 4, ops))
1282     {
1283       target = ops[0].value;
1284       if (target == spec_target)
1285         return target;
1286       if (target == spec_target_subreg)
1287         return spec_target;
1288       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1289     }
1290   return NULL_RTX;
1291 }
1292
1293 /* A subroutine of extract_bit_field, with the same arguments.
1294    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1295    if we can find no other means of implementing the operation.
1296    if FALLBACK_P is false, return NULL instead.  */
1297
1298 static rtx
1299 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1300                      unsigned HOST_WIDE_INT bitnum,
1301                      int unsignedp, bool packedp, rtx target,
1302                      enum machine_mode mode, enum machine_mode tmode,
1303                      bool fallback_p)
1304 {
1305   rtx op0 = str_rtx;
1306   enum machine_mode int_mode;
1307   enum machine_mode mode1;
1308
1309   if (tmode == VOIDmode)
1310     tmode = mode;
1311
1312   while (GET_CODE (op0) == SUBREG)
1313     {
1314       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1315       op0 = SUBREG_REG (op0);
1316     }
1317
1318   /* If we have an out-of-bounds access to a register, just return an
1319      uninitialized register of the required mode.  This can occur if the
1320      source code contains an out-of-bounds access to a small array.  */
1321   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1322     return gen_reg_rtx (tmode);
1323
1324   if (REG_P (op0)
1325       && mode == GET_MODE (op0)
1326       && bitnum == 0
1327       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1328     {
1329       /* We're trying to extract a full register from itself.  */
1330       return op0;
1331     }
1332
1333   /* See if we can get a better vector mode before extracting.  */
1334   if (VECTOR_MODE_P (GET_MODE (op0))
1335       && !MEM_P (op0)
1336       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1337     {
1338       enum machine_mode new_mode;
1339
1340       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1341         new_mode = MIN_MODE_VECTOR_FLOAT;
1342       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1343         new_mode = MIN_MODE_VECTOR_FRACT;
1344       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1345         new_mode = MIN_MODE_VECTOR_UFRACT;
1346       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1347         new_mode = MIN_MODE_VECTOR_ACCUM;
1348       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1349         new_mode = MIN_MODE_VECTOR_UACCUM;
1350       else
1351         new_mode = MIN_MODE_VECTOR_INT;
1352
1353       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1354         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1355             && targetm.vector_mode_supported_p (new_mode))
1356           break;
1357       if (new_mode != VOIDmode)
1358         op0 = gen_lowpart (new_mode, op0);
1359     }
1360
1361   /* Use vec_extract patterns for extracting parts of vectors whenever
1362      available.  */
1363   if (VECTOR_MODE_P (GET_MODE (op0))
1364       && !MEM_P (op0)
1365       && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1366       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1367           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1368     {
1369       struct expand_operand ops[3];
1370       enum machine_mode outermode = GET_MODE (op0);
1371       enum machine_mode innermode = GET_MODE_INNER (outermode);
1372       enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1373       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1374
1375       create_output_operand (&ops[0], target, innermode);
1376       create_input_operand (&ops[1], op0, outermode);
1377       create_integer_operand (&ops[2], pos);
1378       if (maybe_expand_insn (icode, 3, ops))
1379         {
1380           target = ops[0].value;
1381           if (GET_MODE (target) != mode)
1382             return gen_lowpart (tmode, target);
1383           return target;
1384         }
1385     }
1386
1387   /* Make sure we are playing with integral modes.  Pun with subregs
1388      if we aren't.  */
1389   {
1390     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1391     if (imode != GET_MODE (op0))
1392       {
1393         if (MEM_P (op0))
1394           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1395         else if (imode != BLKmode)
1396           {
1397             op0 = gen_lowpart (imode, op0);
1398
1399             /* If we got a SUBREG, force it into a register since we
1400                aren't going to be able to do another SUBREG on it.  */
1401             if (GET_CODE (op0) == SUBREG)
1402               op0 = force_reg (imode, op0);
1403           }
1404         else if (REG_P (op0))
1405           {
1406             rtx reg, subreg;
1407             imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1408                                             MODE_INT);
1409             reg = gen_reg_rtx (imode);
1410             subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1411             emit_move_insn (subreg, op0);
1412             op0 = reg;
1413             bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1414           }
1415         else
1416           {
1417             HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1418             rtx mem = assign_stack_temp (GET_MODE (op0), size);
1419             emit_move_insn (mem, op0);
1420             op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1421           }
1422       }
1423   }
1424
1425   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1426      If that's wrong, the solution is to test for it and set TARGET to 0
1427      if needed.  */
1428
1429   /* If the bitfield is volatile, we need to make sure the access
1430      remains on a type-aligned boundary.  */
1431   if (GET_CODE (op0) == MEM
1432       && MEM_VOLATILE_P (op0)
1433       && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
1434       && flag_strict_volatile_bitfields > 0)
1435     goto no_subreg_mode_swap;
1436
1437   /* Only scalar integer modes can be converted via subregs.  There is an
1438      additional problem for FP modes here in that they can have a precision
1439      which is different from the size.  mode_for_size uses precision, but
1440      we want a mode based on the size, so we must avoid calling it for FP
1441      modes.  */
1442   mode1 = mode;
1443   if (SCALAR_INT_MODE_P (tmode))
1444     {
1445       enum machine_mode try_mode = mode_for_size (bitsize,
1446                                                   GET_MODE_CLASS (tmode), 0);
1447       if (try_mode != BLKmode)
1448         mode1 = try_mode;
1449     }
1450   gcc_assert (mode1 != BLKmode);
1451
1452   /* Extraction of a full MODE1 value can be done with a subreg as long
1453      as the least significant bit of the value is the least significant
1454      bit of either OP0 or a word of OP0.  */
1455   if (!MEM_P (op0)
1456       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1457       && bitsize == GET_MODE_BITSIZE (mode1)
1458       && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1459     {
1460       rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1461                                      bitnum / BITS_PER_UNIT);
1462       if (sub)
1463         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1464     }
1465
1466   /* Extraction of a full MODE1 value can be done with a load as long as
1467      the field is on a byte boundary and is sufficiently aligned.  */
1468   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1469     {
1470       op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1471       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1472     }
1473
1474  no_subreg_mode_swap:
1475
1476   /* Handle fields bigger than a word.  */
1477
1478   if (bitsize > BITS_PER_WORD)
1479     {
1480       /* Here we transfer the words of the field
1481          in the order least significant first.
1482          This is because the most significant word is the one which may
1483          be less than full.  */
1484
1485       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1486       unsigned int i;
1487       rtx last;
1488
1489       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1490         target = gen_reg_rtx (mode);
1491
1492       /* Indicate for flow that the entire target reg is being set.  */
1493       emit_clobber (target);
1494
1495       last = get_last_insn ();
1496       for (i = 0; i < nwords; i++)
1497         {
1498           /* If I is 0, use the low-order word in both field and target;
1499              if I is 1, use the next to lowest word; and so on.  */
1500           /* Word number in TARGET to use.  */
1501           unsigned int wordnum
1502             = (WORDS_BIG_ENDIAN
1503                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1504                : i);
1505           /* Offset from start of field in OP0.  */
1506           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1507                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1508                                                 * (int) BITS_PER_WORD))
1509                                      : (int) i * BITS_PER_WORD);
1510           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1511           rtx result_part
1512             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1513                                              bitsize - i * BITS_PER_WORD),
1514                                    bitnum + bit_offset, 1, false, target_part,
1515                                    mode, word_mode, fallback_p);
1516
1517           gcc_assert (target_part);
1518           if (!result_part)
1519             {
1520               delete_insns_since (last);
1521               return NULL;
1522             }
1523
1524           if (result_part != target_part)
1525             emit_move_insn (target_part, result_part);
1526         }
1527
1528       if (unsignedp)
1529         {
1530           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1531              need to be zero'd out.  */
1532           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1533             {
1534               unsigned int i, total_words;
1535
1536               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1537               for (i = nwords; i < total_words; i++)
1538                 emit_move_insn
1539                   (operand_subword (target,
1540                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1541                                     1, VOIDmode),
1542                    const0_rtx);
1543             }
1544           return target;
1545         }
1546
1547       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1548       target = expand_shift (LSHIFT_EXPR, mode, target,
1549                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1550       return expand_shift (RSHIFT_EXPR, mode, target,
1551                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1552     }
1553
1554   /* If OP0 is a multi-word register, narrow it to the affected word.
1555      If the region spans two words, defer to extract_split_bit_field.  */
1556   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1557     {
1558       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1559                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1560       bitnum %= BITS_PER_WORD;
1561       if (bitnum + bitsize > BITS_PER_WORD)
1562         {
1563           if (!fallback_p)
1564             return NULL_RTX;
1565           target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1566           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1567         }
1568     }
1569
1570   /* From here on we know the desired field is smaller than a word.
1571      If OP0 is a register, it too fits within a word.  */
1572   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1573   extraction_insn extv;
1574   if (!MEM_P (op0)
1575       && get_best_reg_extraction_insn (&extv, pattern, bitnum + bitsize,
1576                                        tmode))
1577     {
1578       rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1579                                                  unsignedp, target, mode,
1580                                                  tmode);
1581       if (result)
1582         return result;
1583     }
1584
1585   /* If OP0 is a memory, try copying it to a register and seeing if a
1586      cheap register alternative is available.  */
1587   if (MEM_P (op0))
1588     {
1589       /* Do not use extv/extzv for volatile bitfields when
1590          -fstrict-volatile-bitfields is in effect.  */
1591       if (!(MEM_VOLATILE_P (op0) && flag_strict_volatile_bitfields > 0)
1592           && get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1593                                            tmode))
1594         {
1595           rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1596                                                      bitnum, unsignedp,
1597                                                      target, mode,
1598                                                      tmode);
1599           if (result)
1600             return result;
1601         }
1602
1603       rtx last = get_last_insn ();
1604
1605       /* Try loading part of OP0 into a register and extracting the
1606          bitfield from that.  */
1607       unsigned HOST_WIDE_INT bitpos;
1608       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1609                                                0, 0, tmode, &bitpos);
1610       if (xop0)
1611         {
1612           xop0 = copy_to_reg (xop0);
1613           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1614                                             unsignedp, packedp, target,
1615                                             mode, tmode, false);
1616           if (result)
1617             return result;
1618           delete_insns_since (last);
1619         }
1620     }
1621
1622   if (!fallback_p)
1623     return NULL;
1624
1625   /* Find a correspondingly-sized integer field, so we can apply
1626      shifts and masks to it.  */
1627   int_mode = int_mode_for_mode (tmode);
1628   if (int_mode == BLKmode)
1629     int_mode = int_mode_for_mode (mode);
1630   /* Should probably push op0 out to memory and then do a load.  */
1631   gcc_assert (int_mode != BLKmode);
1632
1633   target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum,
1634                                     target, unsignedp, packedp);
1635   return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1636 }
1637
1638 /* Generate code to extract a byte-field from STR_RTX
1639    containing BITSIZE bits, starting at BITNUM,
1640    and put it in TARGET if possible (if TARGET is nonzero).
1641    Regardless of TARGET, we return the rtx for where the value is placed.
1642
1643    STR_RTX is the structure containing the byte (a REG or MEM).
1644    UNSIGNEDP is nonzero if this is an unsigned bit field.
1645    PACKEDP is nonzero if the field has the packed attribute.
1646    MODE is the natural mode of the field value once extracted.
1647    TMODE is the mode the caller would like the value to have;
1648    but the value may be returned with type MODE instead.
1649
1650    If a TARGET is specified and we can store in it at no extra cost,
1651    we do so, and return TARGET.
1652    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1653    if they are equally easy.  */
1654
1655 rtx
1656 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1657                    unsigned HOST_WIDE_INT bitnum, int unsignedp, bool packedp,
1658                    rtx target, enum machine_mode mode, enum machine_mode tmode)
1659 {
1660   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, packedp,
1661                               target, mode, tmode, true);
1662 }
1663 \f
1664 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1665    from bit BITNUM of OP0.
1666
1667    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1668    PACKEDP is true if the field has the packed attribute.
1669
1670    If TARGET is nonzero, attempts to store the value there
1671    and return TARGET, but this is not guaranteed.
1672    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1673
1674 static rtx
1675 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1676                          unsigned HOST_WIDE_INT bitsize,
1677                          unsigned HOST_WIDE_INT bitnum, rtx target,
1678                          int unsignedp, bool packedp)
1679 {
1680   enum machine_mode mode;
1681
1682   if (MEM_P (op0))
1683     {
1684       /* Get the proper mode to use for this field.  We want a mode that
1685          includes the entire field.  If such a mode would be larger than
1686          a word, we won't be doing the extraction the normal way.  */
1687
1688       if (MEM_VOLATILE_P (op0)
1689           && flag_strict_volatile_bitfields > 0)
1690         {
1691           if (GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1692             mode = GET_MODE (op0);
1693           else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1694             mode = GET_MODE (target);
1695           else
1696             mode = tmode;
1697         }
1698       else
1699         mode = get_best_mode (bitsize, bitnum, 0, 0,
1700                               MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1701
1702       if (mode == VOIDmode)
1703         /* The only way this should occur is if the field spans word
1704            boundaries.  */
1705         return extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1706
1707       unsigned int total_bits = GET_MODE_BITSIZE (mode);
1708       HOST_WIDE_INT bit_offset = bitnum - bitnum % total_bits;
1709
1710       /* If we're accessing a volatile MEM, we can't apply BIT_OFFSET
1711          if it results in a multi-word access where we otherwise wouldn't
1712          have one.  So, check for that case here.  */
1713       if (MEM_P (op0)
1714           && MEM_VOLATILE_P (op0)
1715           && flag_strict_volatile_bitfields > 0
1716           && bitnum % BITS_PER_UNIT + bitsize <= total_bits
1717           && bitnum % GET_MODE_BITSIZE (mode) + bitsize > total_bits)
1718         {
1719           if (STRICT_ALIGNMENT)
1720             {
1721               static bool informed_about_misalignment = false;
1722
1723               if (packedp)
1724                 {
1725                   if (bitsize == total_bits)
1726                     warning_at (input_location, OPT_fstrict_volatile_bitfields,
1727                                 "multiple accesses to volatile structure"
1728                                 " member because of packed attribute");
1729                   else
1730                     warning_at (input_location, OPT_fstrict_volatile_bitfields,
1731                                 "multiple accesses to volatile structure"
1732                                 " bitfield because of packed attribute");
1733
1734                   return extract_split_bit_field (op0, bitsize, bitnum,
1735                                                   unsignedp);
1736                 }
1737
1738               if (bitsize == total_bits)
1739                 warning_at (input_location, OPT_fstrict_volatile_bitfields,
1740                             "mis-aligned access used for structure member");
1741               else
1742                 warning_at (input_location, OPT_fstrict_volatile_bitfields,
1743                             "mis-aligned access used for structure bitfield");
1744
1745               if (! informed_about_misalignment)
1746                 {
1747                   informed_about_misalignment = true;
1748                   inform (input_location,
1749                           "when a volatile object spans multiple type-sized"
1750                           " locations, the compiler must choose between using"
1751                           " a single mis-aligned access to preserve the"
1752                           " volatility, or using multiple aligned accesses"
1753                           " to avoid runtime faults; this code may fail at"
1754                           " runtime if the hardware does not allow this"
1755                           " access");
1756                 }
1757             }
1758           bit_offset = bitnum - bitnum % BITS_PER_UNIT;
1759         }
1760       op0 = adjust_bitfield_address (op0, mode, bit_offset / BITS_PER_UNIT);
1761       bitnum -= bit_offset;
1762     }
1763
1764   mode = GET_MODE (op0);
1765   gcc_assert (SCALAR_INT_MODE_P (mode));
1766
1767   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1768      for invalid input, such as extract equivalent of f5 from
1769      gcc.dg/pr48335-2.c.  */
1770
1771   if (BYTES_BIG_ENDIAN)
1772     /* BITNUM is the distance between our msb and that of OP0.
1773        Convert it to the distance from the lsb.  */
1774     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1775
1776   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1777      We have reduced the big-endian case to the little-endian case.  */
1778
1779   if (unsignedp)
1780     {
1781       if (bitnum)
1782         {
1783           /* If the field does not already start at the lsb,
1784              shift it so it does.  */
1785           /* Maybe propagate the target for the shift.  */
1786           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1787           if (tmode != mode)
1788             subtarget = 0;
1789           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
1790         }
1791       /* Convert the value to the desired mode.  */
1792       if (mode != tmode)
1793         op0 = convert_to_mode (tmode, op0, 1);
1794
1795       /* Unless the msb of the field used to be the msb when we shifted,
1796          mask out the upper bits.  */
1797
1798       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
1799         return expand_binop (GET_MODE (op0), and_optab, op0,
1800                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1801                              target, 1, OPTAB_LIB_WIDEN);
1802       return op0;
1803     }
1804
1805   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1806      then arithmetic-shift its lsb to the lsb of the word.  */
1807   op0 = force_reg (mode, op0);
1808
1809   /* Find the narrowest integer mode that contains the field.  */
1810
1811   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1812        mode = GET_MODE_WIDER_MODE (mode))
1813     if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
1814       {
1815         op0 = convert_to_mode (mode, op0, 0);
1816         break;
1817       }
1818
1819   if (mode != tmode)
1820     target = 0;
1821
1822   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
1823     {
1824       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
1825       /* Maybe propagate the target for the shift.  */
1826       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1827       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1828     }
1829
1830   return expand_shift (RSHIFT_EXPR, mode, op0,
1831                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1832 }
1833 \f
1834 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1835    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1836    complement of that if COMPLEMENT.  The mask is truncated if
1837    necessary to the width of mode MODE.  The mask is zero-extended if
1838    BITSIZE+BITPOS is too small for MODE.  */
1839
1840 static rtx
1841 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1842 {
1843   double_int mask;
1844
1845   mask = double_int::mask (bitsize);
1846   mask = mask.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);
1847
1848   if (complement)
1849     mask = ~mask;
1850
1851   return immed_double_int_const (mask, mode);
1852 }
1853
1854 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1855    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1856
1857 static rtx
1858 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1859 {
1860   double_int val;
1861
1862   val = double_int::from_uhwi (INTVAL (value)).zext (bitsize);
1863   val = val.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);
1864
1865   return immed_double_int_const (val, mode);
1866 }
1867 \f
1868 /* Extract a bit field that is split across two words
1869    and return an RTX for the result.
1870
1871    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1872    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1873    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1874
1875 static rtx
1876 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1877                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1878 {
1879   unsigned int unit;
1880   unsigned int bitsdone = 0;
1881   rtx result = NULL_RTX;
1882   int first = 1;
1883
1884   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1885      much at a time.  */
1886   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1887     unit = BITS_PER_WORD;
1888   else
1889     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1890
1891   while (bitsdone < bitsize)
1892     {
1893       unsigned HOST_WIDE_INT thissize;
1894       rtx part, word;
1895       unsigned HOST_WIDE_INT thispos;
1896       unsigned HOST_WIDE_INT offset;
1897
1898       offset = (bitpos + bitsdone) / unit;
1899       thispos = (bitpos + bitsdone) % unit;
1900
1901       /* THISSIZE must not overrun a word boundary.  Otherwise,
1902          extract_fixed_bit_field will call us again, and we will mutually
1903          recurse forever.  */
1904       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1905       thissize = MIN (thissize, unit - thispos);
1906
1907       /* If OP0 is a register, then handle OFFSET here.
1908
1909          When handling multiword bitfields, extract_bit_field may pass
1910          down a word_mode SUBREG of a larger REG for a bitfield that actually
1911          crosses a word boundary.  Thus, for a SUBREG, we must find
1912          the current word starting from the base register.  */
1913       if (GET_CODE (op0) == SUBREG)
1914         {
1915           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1916           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1917                                         GET_MODE (SUBREG_REG (op0)));
1918           offset = 0;
1919         }
1920       else if (REG_P (op0))
1921         {
1922           word = operand_subword_force (op0, offset, GET_MODE (op0));
1923           offset = 0;
1924         }
1925       else
1926         word = op0;
1927
1928       /* Extract the parts in bit-counting order,
1929          whose meaning is determined by BYTES_PER_UNIT.
1930          OFFSET is in UNITs, and UNIT is in bits.  */
1931       part = extract_fixed_bit_field (word_mode, word, thissize,
1932                                       offset * unit + thispos, 0, 1, false);
1933       bitsdone += thissize;
1934
1935       /* Shift this part into place for the result.  */
1936       if (BYTES_BIG_ENDIAN)
1937         {
1938           if (bitsize != bitsdone)
1939             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1940                                  bitsize - bitsdone, 0, 1);
1941         }
1942       else
1943         {
1944           if (bitsdone != thissize)
1945             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1946                                  bitsdone - thissize, 0, 1);
1947         }
1948
1949       if (first)
1950         result = part;
1951       else
1952         /* Combine the parts with bitwise or.  This works
1953            because we extracted each part as an unsigned bit field.  */
1954         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1955                                OPTAB_LIB_WIDEN);
1956
1957       first = 0;
1958     }
1959
1960   /* Unsigned bit field: we are done.  */
1961   if (unsignedp)
1962     return result;
1963   /* Signed bit field: sign-extend with two arithmetic shifts.  */
1964   result = expand_shift (LSHIFT_EXPR, word_mode, result,
1965                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
1966   return expand_shift (RSHIFT_EXPR, word_mode, result,
1967                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
1968 }
1969 \f
1970 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
1971    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
1972    MODE, fill the upper bits with zeros.  Fail if the layout of either
1973    mode is unknown (as for CC modes) or if the extraction would involve
1974    unprofitable mode punning.  Return the value on success, otherwise
1975    return null.
1976
1977    This is different from gen_lowpart* in these respects:
1978
1979      - the returned value must always be considered an rvalue
1980
1981      - when MODE is wider than SRC_MODE, the extraction involves
1982        a zero extension
1983
1984      - when MODE is smaller than SRC_MODE, the extraction involves
1985        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
1986
1987    In other words, this routine performs a computation, whereas the
1988    gen_lowpart* routines are conceptually lvalue or rvalue subreg
1989    operations.  */
1990
1991 rtx
1992 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
1993 {
1994   enum machine_mode int_mode, src_int_mode;
1995
1996   if (mode == src_mode)
1997     return src;
1998
1999   if (CONSTANT_P (src))
2000     {
2001       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2002          fails, it will happily create (subreg (symbol_ref)) or similar
2003          invalid SUBREGs.  */
2004       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2005       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2006       if (ret)
2007         return ret;
2008
2009       if (GET_MODE (src) == VOIDmode
2010           || !validate_subreg (mode, src_mode, src, byte))
2011         return NULL_RTX;
2012
2013       src = force_reg (GET_MODE (src), src);
2014       return gen_rtx_SUBREG (mode, src, byte);
2015     }
2016
2017   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2018     return NULL_RTX;
2019
2020   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2021       && MODES_TIEABLE_P (mode, src_mode))
2022     {
2023       rtx x = gen_lowpart_common (mode, src);
2024       if (x)
2025         return x;
2026     }
2027
2028   src_int_mode = int_mode_for_mode (src_mode);
2029   int_mode = int_mode_for_mode (mode);
2030   if (src_int_mode == BLKmode || int_mode == BLKmode)
2031     return NULL_RTX;
2032
2033   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2034     return NULL_RTX;
2035   if (!MODES_TIEABLE_P (int_mode, mode))
2036     return NULL_RTX;
2037
2038   src = gen_lowpart (src_int_mode, src);
2039   src = convert_modes (int_mode, src_int_mode, src, true);
2040   src = gen_lowpart (mode, src);
2041   return src;
2042 }
2043 \f
2044 /* Add INC into TARGET.  */
2045
2046 void
2047 expand_inc (rtx target, rtx inc)
2048 {
2049   rtx value = expand_binop (GET_MODE (target), add_optab,
2050                             target, inc,
2051                             target, 0, OPTAB_LIB_WIDEN);
2052   if (value != target)
2053     emit_move_insn (target, value);
2054 }
2055
2056 /* Subtract DEC from TARGET.  */
2057
2058 void
2059 expand_dec (rtx target, rtx dec)
2060 {
2061   rtx value = expand_binop (GET_MODE (target), sub_optab,
2062                             target, dec,
2063                             target, 0, OPTAB_LIB_WIDEN);
2064   if (value != target)
2065     emit_move_insn (target, value);
2066 }
2067 \f
2068 /* Output a shift instruction for expression code CODE,
2069    with SHIFTED being the rtx for the value to shift,
2070    and AMOUNT the rtx for the amount to shift by.
2071    Store the result in the rtx TARGET, if that is convenient.
2072    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2073    Return the rtx for where the value is.  */
2074
2075 static rtx
2076 expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted,
2077                 rtx amount, rtx target, int unsignedp)
2078 {
2079   rtx op1, temp = 0;
2080   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2081   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2082   optab lshift_optab = ashl_optab;
2083   optab rshift_arith_optab = ashr_optab;
2084   optab rshift_uns_optab = lshr_optab;
2085   optab lrotate_optab = rotl_optab;
2086   optab rrotate_optab = rotr_optab;
2087   enum machine_mode op1_mode;
2088   int attempt;
2089   bool speed = optimize_insn_for_speed_p ();
2090
2091   op1 = amount;
2092   op1_mode = GET_MODE (op1);
2093
2094   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2095      shift amount is a vector, use the vector/vector shift patterns.  */
2096   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2097     {
2098       lshift_optab = vashl_optab;
2099       rshift_arith_optab = vashr_optab;
2100       rshift_uns_optab = vlshr_optab;
2101       lrotate_optab = vrotl_optab;
2102       rrotate_optab = vrotr_optab;
2103     }
2104
2105   /* Previously detected shift-counts computed by NEGATE_EXPR
2106      and shifted in the other direction; but that does not work
2107      on all machines.  */
2108
2109   if (SHIFT_COUNT_TRUNCATED)
2110     {
2111       if (CONST_INT_P (op1)
2112           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2113               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2114         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2115                        % GET_MODE_BITSIZE (mode));
2116       else if (GET_CODE (op1) == SUBREG
2117                && subreg_lowpart_p (op1)
2118                && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (op1)))
2119                && INTEGRAL_MODE_P (GET_MODE (op1)))
2120         op1 = SUBREG_REG (op1);
2121     }
2122
2123   if (op1 == const0_rtx)
2124     return shifted;
2125
2126   /* Check whether its cheaper to implement a left shift by a constant
2127      bit count by a sequence of additions.  */
2128   if (code == LSHIFT_EXPR
2129       && CONST_INT_P (op1)
2130       && INTVAL (op1) > 0
2131       && INTVAL (op1) < GET_MODE_PRECISION (mode)
2132       && INTVAL (op1) < MAX_BITS_PER_WORD
2133       && (shift_cost (speed, mode, INTVAL (op1))
2134           > INTVAL (op1) * add_cost (speed, mode))
2135       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2136     {
2137       int i;
2138       for (i = 0; i < INTVAL (op1); i++)
2139         {
2140           temp = force_reg (mode, shifted);
2141           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2142                                   unsignedp, OPTAB_LIB_WIDEN);
2143         }
2144       return shifted;
2145     }
2146
2147   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2148     {
2149       enum optab_methods methods;
2150
2151       if (attempt == 0)
2152         methods = OPTAB_DIRECT;
2153       else if (attempt == 1)
2154         methods = OPTAB_WIDEN;
2155       else
2156         methods = OPTAB_LIB_WIDEN;
2157
2158       if (rotate)
2159         {
2160           /* Widening does not work for rotation.  */
2161           if (methods == OPTAB_WIDEN)
2162             continue;
2163           else if (methods == OPTAB_LIB_WIDEN)
2164             {
2165               /* If we have been unable to open-code this by a rotation,
2166                  do it as the IOR of two shifts.  I.e., to rotate A
2167                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2168                  where C is the bitsize of A.
2169
2170                  It is theoretically possible that the target machine might
2171                  not be able to perform either shift and hence we would
2172                  be making two libcalls rather than just the one for the
2173                  shift (similarly if IOR could not be done).  We will allow
2174                  this extremely unlikely lossage to avoid complicating the
2175                  code below.  */
2176
2177               rtx subtarget = target == shifted ? 0 : target;
2178               rtx new_amount, other_amount;
2179               rtx temp1;
2180
2181               new_amount = op1;
2182               if (CONST_INT_P (op1))
2183                 other_amount = GEN_INT (GET_MODE_BITSIZE (mode)
2184                                         - INTVAL (op1));
2185               else
2186                 other_amount
2187                   = simplify_gen_binary (MINUS, GET_MODE (op1),
2188                                          GEN_INT (GET_MODE_PRECISION (mode)),
2189                                          op1);
2190
2191               shifted = force_reg (mode, shifted);
2192
2193               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2194                                      mode, shifted, new_amount, 0, 1);
2195               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2196                                       mode, shifted, other_amount,
2197                                       subtarget, 1);
2198               return expand_binop (mode, ior_optab, temp, temp1, target,
2199                                    unsignedp, methods);
2200             }
2201
2202           temp = expand_binop (mode,
2203                                left ? lrotate_optab : rrotate_optab,
2204                                shifted, op1, target, unsignedp, methods);
2205         }
2206       else if (unsignedp)
2207         temp = expand_binop (mode,
2208                              left ? lshift_optab : rshift_uns_optab,
2209                              shifted, op1, target, unsignedp, methods);
2210
2211       /* Do arithmetic shifts.
2212          Also, if we are going to widen the operand, we can just as well
2213          use an arithmetic right-shift instead of a logical one.  */
2214       if (temp == 0 && ! rotate
2215           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2216         {
2217           enum optab_methods methods1 = methods;
2218
2219           /* If trying to widen a log shift to an arithmetic shift,
2220              don't accept an arithmetic shift of the same size.  */
2221           if (unsignedp)
2222             methods1 = OPTAB_MUST_WIDEN;
2223
2224           /* Arithmetic shift */
2225
2226           temp = expand_binop (mode,
2227                                left ? lshift_optab : rshift_arith_optab,
2228                                shifted, op1, target, unsignedp, methods1);
2229         }
2230
2231       /* We used to try extzv here for logical right shifts, but that was
2232          only useful for one machine, the VAX, and caused poor code
2233          generation there for lshrdi3, so the code was deleted and a
2234          define_expand for lshrsi3 was added to vax.md.  */
2235     }
2236
2237   gcc_assert (temp);
2238   return temp;
2239 }
2240
2241 /* Output a shift instruction for expression code CODE,
2242    with SHIFTED being the rtx for the value to shift,
2243    and AMOUNT the amount to shift by.
2244    Store the result in the rtx TARGET, if that is convenient.
2245    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2246    Return the rtx for where the value is.  */
2247
2248 rtx
2249 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2250               int amount, rtx target, int unsignedp)
2251 {
2252   return expand_shift_1 (code, mode,
2253                          shifted, GEN_INT (amount), target, unsignedp);
2254 }
2255
2256 /* Output a shift instruction for expression code CODE,
2257    with SHIFTED being the rtx for the value to shift,
2258    and AMOUNT the tree for the amount to shift by.
2259    Store the result in the rtx TARGET, if that is convenient.
2260    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2261    Return the rtx for where the value is.  */
2262
2263 rtx
2264 expand_variable_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2265                        tree amount, rtx target, int unsignedp)
2266 {
2267   return expand_shift_1 (code, mode,
2268                          shifted, expand_normal (amount), target, unsignedp);
2269 }
2270
2271 \f
2272 /* Indicates the type of fixup needed after a constant multiplication.
2273    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2274    the result should be negated, and ADD_VARIANT means that the
2275    multiplicand should be added to the result.  */
2276 enum mult_variant {basic_variant, negate_variant, add_variant};
2277
2278 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2279                         const struct mult_cost *, enum machine_mode mode);
2280 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2281                                  struct algorithm *, enum mult_variant *, int);
2282 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2283                               const struct algorithm *, enum mult_variant);
2284 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2285 static rtx extract_high_half (enum machine_mode, rtx);
2286 static rtx expmed_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2287 static rtx expmed_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2288                                        int, int);
2289 /* Compute and return the best algorithm for multiplying by T.
2290    The algorithm must cost less than cost_limit
2291    If retval.cost >= COST_LIMIT, no algorithm was found and all
2292    other field of the returned struct are undefined.
2293    MODE is the machine mode of the multiplication.  */
2294
2295 static void
2296 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2297             const struct mult_cost *cost_limit, enum machine_mode mode)
2298 {
2299   int m;
2300   struct algorithm *alg_in, *best_alg;
2301   struct mult_cost best_cost;
2302   struct mult_cost new_limit;
2303   int op_cost, op_latency;
2304   unsigned HOST_WIDE_INT orig_t = t;
2305   unsigned HOST_WIDE_INT q;
2306   int maxm, hash_index;
2307   bool cache_hit = false;
2308   enum alg_code cache_alg = alg_zero;
2309   bool speed = optimize_insn_for_speed_p ();
2310   enum machine_mode imode;
2311   struct alg_hash_entry *entry_ptr;
2312
2313   /* Indicate that no algorithm is yet found.  If no algorithm
2314      is found, this value will be returned and indicate failure.  */
2315   alg_out->cost.cost = cost_limit->cost + 1;
2316   alg_out->cost.latency = cost_limit->latency + 1;
2317
2318   if (cost_limit->cost < 0
2319       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2320     return;
2321
2322   /* Be prepared for vector modes.  */
2323   imode = GET_MODE_INNER (mode);
2324   if (imode == VOIDmode)
2325     imode = mode;
2326
2327   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2328
2329   /* Restrict the bits of "t" to the multiplication's mode.  */
2330   t &= GET_MODE_MASK (imode);
2331
2332   /* t == 1 can be done in zero cost.  */
2333   if (t == 1)
2334     {
2335       alg_out->ops = 1;
2336       alg_out->cost.cost = 0;
2337       alg_out->cost.latency = 0;
2338       alg_out->op[0] = alg_m;
2339       return;
2340     }
2341
2342   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2343      fail now.  */
2344   if (t == 0)
2345     {
2346       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2347         return;
2348       else
2349         {
2350           alg_out->ops = 1;
2351           alg_out->cost.cost = zero_cost (speed);
2352           alg_out->cost.latency = zero_cost (speed);
2353           alg_out->op[0] = alg_zero;
2354           return;
2355         }
2356     }
2357
2358   /* We'll be needing a couple extra algorithm structures now.  */
2359
2360   alg_in = XALLOCA (struct algorithm);
2361   best_alg = XALLOCA (struct algorithm);
2362   best_cost = *cost_limit;
2363
2364   /* Compute the hash index.  */
2365   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2366
2367   /* See if we already know what to do for T.  */
2368   entry_ptr = alg_hash_entry_ptr (hash_index);
2369   if (entry_ptr->t == t
2370       && entry_ptr->mode == mode
2371       && entry_ptr->mode == mode
2372       && entry_ptr->speed == speed
2373       && entry_ptr->alg != alg_unknown)
2374     {
2375       cache_alg = entry_ptr->alg;
2376
2377       if (cache_alg == alg_impossible)
2378         {
2379           /* The cache tells us that it's impossible to synthesize
2380              multiplication by T within entry_ptr->cost.  */
2381           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2382             /* COST_LIMIT is at least as restrictive as the one
2383                recorded in the hash table, in which case we have no
2384                hope of synthesizing a multiplication.  Just
2385                return.  */
2386             return;
2387
2388           /* If we get here, COST_LIMIT is less restrictive than the
2389              one recorded in the hash table, so we may be able to
2390              synthesize a multiplication.  Proceed as if we didn't
2391              have the cache entry.  */
2392         }
2393       else
2394         {
2395           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2396             /* The cached algorithm shows that this multiplication
2397                requires more cost than COST_LIMIT.  Just return.  This
2398                way, we don't clobber this cache entry with
2399                alg_impossible but retain useful information.  */
2400             return;
2401
2402           cache_hit = true;
2403
2404           switch (cache_alg)
2405             {
2406             case alg_shift:
2407               goto do_alg_shift;
2408
2409             case alg_add_t_m2:
2410             case alg_sub_t_m2:
2411               goto do_alg_addsub_t_m2;
2412
2413             case alg_add_factor:
2414             case alg_sub_factor:
2415               goto do_alg_addsub_factor;
2416
2417             case alg_add_t2_m:
2418               goto do_alg_add_t2_m;
2419
2420             case alg_sub_t2_m:
2421               goto do_alg_sub_t2_m;
2422
2423             default:
2424               gcc_unreachable ();
2425             }
2426         }
2427     }
2428
2429   /* If we have a group of zero bits at the low-order part of T, try
2430      multiplying by the remaining bits and then doing a shift.  */
2431
2432   if ((t & 1) == 0)
2433     {
2434     do_alg_shift:
2435       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2436       if (m < maxm)
2437         {
2438           q = t >> m;
2439           /* The function expand_shift will choose between a shift and
2440              a sequence of additions, so the observed cost is given as
2441              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2442           op_cost = m * add_cost (speed, mode);
2443           if (shift_cost (speed, mode, m) < op_cost)
2444             op_cost = shift_cost (speed, mode, m);
2445           new_limit.cost = best_cost.cost - op_cost;
2446           new_limit.latency = best_cost.latency - op_cost;
2447           synth_mult (alg_in, q, &new_limit, mode);
2448
2449           alg_in->cost.cost += op_cost;
2450           alg_in->cost.latency += op_cost;
2451           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2452             {
2453               struct algorithm *x;
2454               best_cost = alg_in->cost;
2455               x = alg_in, alg_in = best_alg, best_alg = x;
2456               best_alg->log[best_alg->ops] = m;
2457               best_alg->op[best_alg->ops] = alg_shift;
2458             }
2459
2460           /* See if treating ORIG_T as a signed number yields a better
2461              sequence.  Try this sequence only for a negative ORIG_T
2462              as it would be useless for a non-negative ORIG_T.  */
2463           if ((HOST_WIDE_INT) orig_t < 0)
2464             {
2465               /* Shift ORIG_T as follows because a right shift of a
2466                  negative-valued signed type is implementation
2467                  defined.  */
2468               q = ~(~orig_t >> m);
2469               /* The function expand_shift will choose between a shift
2470                  and a sequence of additions, so the observed cost is
2471                  given as MIN (m * add_cost(speed, mode),
2472                  shift_cost(speed, mode, m)).  */
2473               op_cost = m * add_cost (speed, mode);
2474               if (shift_cost (speed, mode, m) < op_cost)
2475                 op_cost = shift_cost (speed, mode, m);
2476               new_limit.cost = best_cost.cost - op_cost;
2477               new_limit.latency = best_cost.latency - op_cost;
2478               synth_mult (alg_in, q, &new_limit, mode);
2479
2480               alg_in->cost.cost += op_cost;
2481               alg_in->cost.latency += op_cost;
2482               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2483                 {
2484                   struct algorithm *x;
2485                   best_cost = alg_in->cost;
2486                   x = alg_in, alg_in = best_alg, best_alg = x;
2487                   best_alg->log[best_alg->ops] = m;
2488                   best_alg->op[best_alg->ops] = alg_shift;
2489                 }
2490             }
2491         }
2492       if (cache_hit)
2493         goto done;
2494     }
2495
2496   /* If we have an odd number, add or subtract one.  */
2497   if ((t & 1) != 0)
2498     {
2499       unsigned HOST_WIDE_INT w;
2500
2501     do_alg_addsub_t_m2:
2502       for (w = 1; (w & t) != 0; w <<= 1)
2503         ;
2504       /* If T was -1, then W will be zero after the loop.  This is another
2505          case where T ends with ...111.  Handling this with (T + 1) and
2506          subtract 1 produces slightly better code and results in algorithm
2507          selection much faster than treating it like the ...0111 case
2508          below.  */
2509       if (w == 0
2510           || (w > 2
2511               /* Reject the case where t is 3.
2512                  Thus we prefer addition in that case.  */
2513               && t != 3))
2514         {
2515           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2516
2517           op_cost = add_cost (speed, mode);
2518           new_limit.cost = best_cost.cost - op_cost;
2519           new_limit.latency = best_cost.latency - op_cost;
2520           synth_mult (alg_in, t + 1, &new_limit, mode);
2521
2522           alg_in->cost.cost += op_cost;
2523           alg_in->cost.latency += op_cost;
2524           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2525             {
2526               struct algorithm *x;
2527               best_cost = alg_in->cost;
2528               x = alg_in, alg_in = best_alg, best_alg = x;
2529               best_alg->log[best_alg->ops] = 0;
2530               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2531             }
2532         }
2533       else
2534         {
2535           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2536
2537           op_cost = add_cost (speed, mode);
2538           new_limit.cost = best_cost.cost - op_cost;
2539           new_limit.latency = best_cost.latency - op_cost;
2540           synth_mult (alg_in, t - 1, &new_limit, mode);
2541
2542           alg_in->cost.cost += op_cost;
2543           alg_in->cost.latency += op_cost;
2544           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2545             {
2546               struct algorithm *x;
2547               best_cost = alg_in->cost;
2548               x = alg_in, alg_in = best_alg, best_alg = x;
2549               best_alg->log[best_alg->ops] = 0;
2550               best_alg->op[best_alg->ops] = alg_add_t_m2;
2551             }
2552         }
2553
2554       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2555          quickly with a - a * n for some appropriate constant n.  */
2556       m = exact_log2 (-orig_t + 1);
2557       if (m >= 0 && m < maxm)
2558         {
2559           op_cost = shiftsub1_cost (speed, mode, m);
2560           new_limit.cost = best_cost.cost - op_cost;
2561           new_limit.latency = best_cost.latency - op_cost;
2562           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2563                       &new_limit, mode);
2564
2565           alg_in->cost.cost += op_cost;
2566           alg_in->cost.latency += op_cost;
2567           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2568             {
2569               struct algorithm *x;
2570               best_cost = alg_in->cost;
2571               x = alg_in, alg_in = best_alg, best_alg = x;
2572               best_alg->log[best_alg->ops] = m;
2573               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2574             }
2575         }
2576
2577       if (cache_hit)
2578         goto done;
2579     }
2580
2581   /* Look for factors of t of the form
2582      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2583      If we find such a factor, we can multiply by t using an algorithm that
2584      multiplies by q, shift the result by m and add/subtract it to itself.
2585
2586      We search for large factors first and loop down, even if large factors
2587      are less probable than small; if we find a large factor we will find a
2588      good sequence quickly, and therefore be able to prune (by decreasing
2589      COST_LIMIT) the search.  */
2590
2591  do_alg_addsub_factor:
2592   for (m = floor_log2 (t - 1); m >= 2; m--)
2593     {
2594       unsigned HOST_WIDE_INT d;
2595
2596       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2597       if (t % d == 0 && t > d && m < maxm
2598           && (!cache_hit || cache_alg == alg_add_factor))
2599         {
2600           /* If the target has a cheap shift-and-add instruction use
2601              that in preference to a shift insn followed by an add insn.
2602              Assume that the shift-and-add is "atomic" with a latency
2603              equal to its cost, otherwise assume that on superscalar
2604              hardware the shift may be executed concurrently with the
2605              earlier steps in the algorithm.  */
2606           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2607           if (shiftadd_cost (speed, mode, m) < op_cost)
2608             {
2609               op_cost = shiftadd_cost (speed, mode, m);
2610               op_latency = op_cost;
2611             }
2612           else
2613             op_latency = add_cost (speed, mode);
2614
2615           new_limit.cost = best_cost.cost - op_cost;
2616           new_limit.latency = best_cost.latency - op_latency;
2617           synth_mult (alg_in, t / d, &new_limit, mode);
2618
2619           alg_in->cost.cost += op_cost;
2620           alg_in->cost.latency += op_latency;
2621           if (alg_in->cost.latency < op_cost)
2622             alg_in->cost.latency = op_cost;
2623           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2624             {
2625               struct algorithm *x;
2626               best_cost = alg_in->cost;
2627               x = alg_in, alg_in = best_alg, best_alg = x;
2628               best_alg->log[best_alg->ops] = m;
2629               best_alg->op[best_alg->ops] = alg_add_factor;
2630             }
2631           /* Other factors will have been taken care of in the recursion.  */
2632           break;
2633         }
2634
2635       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2636       if (t % d == 0 && t > d && m < maxm
2637           && (!cache_hit || cache_alg == alg_sub_factor))
2638         {
2639           /* If the target has a cheap shift-and-subtract insn use
2640              that in preference to a shift insn followed by a sub insn.
2641              Assume that the shift-and-sub is "atomic" with a latency
2642              equal to it's cost, otherwise assume that on superscalar
2643              hardware the shift may be executed concurrently with the
2644              earlier steps in the algorithm.  */
2645           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2646           if (shiftsub0_cost (speed, mode, m) < op_cost)
2647             {
2648               op_cost = shiftsub0_cost (speed, mode, m);
2649               op_latency = op_cost;
2650             }
2651           else
2652             op_latency = add_cost (speed, mode);
2653
2654           new_limit.cost = best_cost.cost - op_cost;
2655           new_limit.latency = best_cost.latency - op_latency;
2656           synth_mult (alg_in, t / d, &new_limit, mode);
2657
2658           alg_in->cost.cost += op_cost;
2659           alg_in->cost.latency += op_latency;
2660           if (alg_in->cost.latency < op_cost)
2661             alg_in->cost.latency = op_cost;
2662           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2663             {
2664               struct algorithm *x;
2665               best_cost = alg_in->cost;
2666               x = alg_in, alg_in = best_alg, best_alg = x;
2667               best_alg->log[best_alg->ops] = m;
2668               best_alg->op[best_alg->ops] = alg_sub_factor;
2669             }
2670           break;
2671         }
2672     }
2673   if (cache_hit)
2674     goto done;
2675
2676   /* Try shift-and-add (load effective address) instructions,
2677      i.e. do a*3, a*5, a*9.  */
2678   if ((t & 1) != 0)
2679     {
2680     do_alg_add_t2_m:
2681       q = t - 1;
2682       q = q & -q;
2683       m = exact_log2 (q);
2684       if (m >= 0 && m < maxm)
2685         {
2686           op_cost = shiftadd_cost (speed, mode, m);
2687           new_limit.cost = best_cost.cost - op_cost;
2688           new_limit.latency = best_cost.latency - op_cost;
2689           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2690
2691           alg_in->cost.cost += op_cost;
2692           alg_in->cost.latency += op_cost;
2693           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2694             {
2695               struct algorithm *x;
2696               best_cost = alg_in->cost;
2697               x = alg_in, alg_in = best_alg, best_alg = x;
2698               best_alg->log[best_alg->ops] = m;
2699               best_alg->op[best_alg->ops] = alg_add_t2_m;
2700             }
2701         }
2702       if (cache_hit)
2703         goto done;
2704
2705     do_alg_sub_t2_m:
2706       q = t + 1;
2707       q = q & -q;
2708       m = exact_log2 (q);
2709       if (m >= 0 && m < maxm)
2710         {
2711           op_cost = shiftsub0_cost (speed, mode, m);
2712           new_limit.cost = best_cost.cost - op_cost;
2713           new_limit.latency = best_cost.latency - op_cost;
2714           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2715
2716           alg_in->cost.cost += op_cost;
2717           alg_in->cost.latency += op_cost;
2718           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2719             {
2720               struct algorithm *x;
2721               best_cost = alg_in->cost;
2722               x = alg_in, alg_in = best_alg, best_alg = x;
2723               best_alg->log[best_alg->ops] = m;
2724               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2725             }
2726         }
2727       if (cache_hit)
2728         goto done;
2729     }
2730
2731  done:
2732   /* If best_cost has not decreased, we have not found any algorithm.  */
2733   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2734     {
2735       /* We failed to find an algorithm.  Record alg_impossible for
2736          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2737          we are asked to find an algorithm for T within the same or
2738          lower COST_LIMIT, we can immediately return to the
2739          caller.  */
2740       entry_ptr->t = t;
2741       entry_ptr->mode = mode;
2742       entry_ptr->speed = speed;
2743       entry_ptr->alg = alg_impossible;
2744       entry_ptr->cost = *cost_limit;
2745       return;
2746     }
2747
2748   /* Cache the result.  */
2749   if (!cache_hit)
2750     {
2751       entry_ptr->t = t;
2752       entry_ptr->mode = mode;
2753       entry_ptr->speed = speed;
2754       entry_ptr->alg = best_alg->op[best_alg->ops];
2755       entry_ptr->cost.cost = best_cost.cost;
2756       entry_ptr->cost.latency = best_cost.latency;
2757     }
2758
2759   /* If we are getting a too long sequence for `struct algorithm'
2760      to record, make this search fail.  */
2761   if (best_alg->ops == MAX_BITS_PER_WORD)
2762     return;
2763
2764   /* Copy the algorithm from temporary space to the space at alg_out.
2765      We avoid using structure assignment because the majority of
2766      best_alg is normally undefined, and this is a critical function.  */
2767   alg_out->ops = best_alg->ops + 1;
2768   alg_out->cost = best_cost;
2769   memcpy (alg_out->op, best_alg->op,
2770           alg_out->ops * sizeof *alg_out->op);
2771   memcpy (alg_out->log, best_alg->log,
2772           alg_out->ops * sizeof *alg_out->log);
2773 }
2774 \f
2775 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2776    Try three variations:
2777
2778        - a shift/add sequence based on VAL itself
2779        - a shift/add sequence based on -VAL, followed by a negation
2780        - a shift/add sequence based on VAL - 1, followed by an addition.
2781
2782    Return true if the cheapest of these cost less than MULT_COST,
2783    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2784
2785 static bool
2786 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2787                      struct algorithm *alg, enum mult_variant *variant,
2788                      int mult_cost)
2789 {
2790   struct algorithm alg2;
2791   struct mult_cost limit;
2792   int op_cost;
2793   bool speed = optimize_insn_for_speed_p ();
2794
2795   /* Fail quickly for impossible bounds.  */
2796   if (mult_cost < 0)
2797     return false;
2798
2799   /* Ensure that mult_cost provides a reasonable upper bound.
2800      Any constant multiplication can be performed with less
2801      than 2 * bits additions.  */
2802   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
2803   if (mult_cost > op_cost)
2804     mult_cost = op_cost;
2805
2806   *variant = basic_variant;
2807   limit.cost = mult_cost;
2808   limit.latency = mult_cost;
2809   synth_mult (alg, val, &limit, mode);
2810
2811   /* This works only if the inverted value actually fits in an
2812      `unsigned int' */
2813   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
2814     {
2815       op_cost = neg_cost(speed, mode);
2816       if (MULT_COST_LESS (&alg->cost, mult_cost))
2817         {
2818           limit.cost = alg->cost.cost - op_cost;
2819           limit.latency = alg->cost.latency - op_cost;
2820         }
2821       else
2822         {
2823           limit.cost = mult_cost - op_cost;
2824           limit.latency = mult_cost - op_cost;
2825         }
2826
2827       synth_mult (&alg2, -val, &limit, mode);
2828       alg2.cost.cost += op_cost;
2829       alg2.cost.latency += op_cost;
2830       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2831         *alg = alg2, *variant = negate_variant;
2832     }
2833
2834   /* This proves very useful for division-by-constant.  */
2835   op_cost = add_cost (speed, mode);
2836   if (MULT_COST_LESS (&alg->cost, mult_cost))
2837     {
2838       limit.cost = alg->cost.cost - op_cost;
2839       limit.latency = alg->cost.latency - op_cost;
2840     }
2841   else
2842     {
2843       limit.cost = mult_cost - op_cost;
2844       limit.latency = mult_cost - op_cost;
2845     }
2846
2847   synth_mult (&alg2, val - 1, &limit, mode);
2848   alg2.cost.cost += op_cost;
2849   alg2.cost.latency += op_cost;
2850   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2851     *alg = alg2, *variant = add_variant;
2852
2853   return MULT_COST_LESS (&alg->cost, mult_cost);
2854 }
2855
2856 /* A subroutine of expand_mult, used for constant multiplications.
2857    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2858    convenient.  Use the shift/add sequence described by ALG and apply
2859    the final fixup specified by VARIANT.  */
2860
2861 static rtx
2862 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2863                    rtx target, const struct algorithm *alg,
2864                    enum mult_variant variant)
2865 {
2866   HOST_WIDE_INT val_so_far;
2867   rtx insn, accum, tem;
2868   int opno;
2869   enum machine_mode nmode;
2870
2871   /* Avoid referencing memory over and over and invalid sharing
2872      on SUBREGs.  */
2873   op0 = force_reg (mode, op0);
2874
2875   /* ACCUM starts out either as OP0 or as a zero, depending on
2876      the first operation.  */
2877
2878   if (alg->op[0] == alg_zero)
2879     {
2880       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
2881       val_so_far = 0;
2882     }
2883   else if (alg->op[0] == alg_m)
2884     {
2885       accum = copy_to_mode_reg (mode, op0);
2886       val_so_far = 1;
2887     }
2888   else
2889     gcc_unreachable ();
2890
2891   for (opno = 1; opno < alg->ops; opno++)
2892     {
2893       int log = alg->log[opno];
2894       rtx shift_subtarget = optimize ? 0 : accum;
2895       rtx add_target
2896         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2897            && !optimize)
2898           ? target : 0;
2899       rtx accum_target = optimize ? 0 : accum;
2900       rtx accum_inner;
2901
2902       switch (alg->op[opno])
2903         {
2904         case alg_shift:
2905           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2906           /* REG_EQUAL note will be attached to the following insn.  */
2907           emit_move_insn (accum, tem);
2908           val_so_far <<= log;
2909           break;
2910
2911         case alg_add_t_m2:
2912           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2913           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2914                                  add_target ? add_target : accum_target);
2915           val_so_far += (HOST_WIDE_INT) 1 << log;
2916           break;
2917
2918         case alg_sub_t_m2:
2919           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2920           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2921                                  add_target ? add_target : accum_target);
2922           val_so_far -= (HOST_WIDE_INT) 1 << log;
2923           break;
2924
2925         case alg_add_t2_m:
2926           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2927                                 log, shift_subtarget, 0);
2928           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2929                                  add_target ? add_target : accum_target);
2930           val_so_far = (val_so_far << log) + 1;
2931           break;
2932
2933         case alg_sub_t2_m:
2934           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2935                                 log, shift_subtarget, 0);
2936           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2937                                  add_target ? add_target : accum_target);
2938           val_so_far = (val_so_far << log) - 1;
2939           break;
2940
2941         case alg_add_factor:
2942           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2943           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2944                                  add_target ? add_target : accum_target);
2945           val_so_far += val_so_far << log;
2946           break;
2947
2948         case alg_sub_factor:
2949           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2950           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2951                                  (add_target
2952                                   ? add_target : (optimize ? 0 : tem)));
2953           val_so_far = (val_so_far << log) - val_so_far;
2954           break;
2955
2956         default:
2957           gcc_unreachable ();
2958         }
2959
2960       if (SCALAR_INT_MODE_P (mode))
2961         {
2962           /* Write a REG_EQUAL note on the last insn so that we can cse
2963              multiplication sequences.  Note that if ACCUM is a SUBREG,
2964              we've set the inner register and must properly indicate that.  */
2965           tem = op0, nmode = mode;
2966           accum_inner = accum;
2967           if (GET_CODE (accum) == SUBREG)
2968             {
2969               accum_inner = SUBREG_REG (accum);
2970               nmode = GET_MODE (accum_inner);
2971               tem = gen_lowpart (nmode, op0);
2972             }
2973
2974           insn = get_last_insn ();
2975           set_dst_reg_note (insn, REG_EQUAL,
2976                             gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)),
2977                             accum_inner);
2978         }
2979     }
2980
2981   if (variant == negate_variant)
2982     {
2983       val_so_far = -val_so_far;
2984       accum = expand_unop (mode, neg_optab, accum, target, 0);
2985     }
2986   else if (variant == add_variant)
2987     {
2988       val_so_far = val_so_far + 1;
2989       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
2990     }
2991
2992   /* Compare only the bits of val and val_so_far that are significant
2993      in the result mode, to avoid sign-/zero-extension confusion.  */
2994   nmode = GET_MODE_INNER (mode);
2995   if (nmode == VOIDmode)
2996     nmode = mode;
2997   val &= GET_MODE_MASK (nmode);
2998   val_so_far &= GET_MODE_MASK (nmode);
2999   gcc_assert (val == val_so_far);
3000
3001   return accum;
3002 }
3003
3004 /* Perform a multiplication and return an rtx for the result.
3005    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3006    TARGET is a suggestion for where to store the result (an rtx).
3007
3008    We check specially for a constant integer as OP1.
3009    If you want this check for OP0 as well, then before calling
3010    you should swap the two operands if OP0 would be constant.  */
3011
3012 rtx
3013 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3014              int unsignedp)
3015 {
3016   enum mult_variant variant;
3017   struct algorithm algorithm;
3018   rtx scalar_op1;
3019   int max_cost;
3020   bool speed = optimize_insn_for_speed_p ();
3021   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3022
3023   if (CONSTANT_P (op0))
3024     {
3025       rtx temp = op0;
3026       op0 = op1;
3027       op1 = temp;
3028     }
3029
3030   /* For vectors, there are several simplifications that can be made if
3031      all elements of the vector constant are identical.  */
3032   scalar_op1 = op1;
3033   if (GET_CODE (op1) == CONST_VECTOR)
3034     {
3035       int i, n = CONST_VECTOR_NUNITS (op1);
3036       scalar_op1 = CONST_VECTOR_ELT (op1, 0);
3037       for (i = 1; i < n; ++i)
3038         if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i)))
3039           goto skip_scalar;
3040     }
3041
3042   if (INTEGRAL_MODE_P (mode))
3043     {
3044       rtx fake_reg;
3045       HOST_WIDE_INT coeff;
3046       bool is_neg;
3047       int mode_bitsize;
3048
3049       if (op1 == CONST0_RTX (mode))
3050         return op1;
3051       if (op1 == CONST1_RTX (mode))
3052         return op0;
3053       if (op1 == CONSTM1_RTX (mode))
3054         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3055                             op0, target, 0);
3056
3057       if (do_trapv)
3058         goto skip_synth;
3059
3060       /* These are the operations that are potentially turned into
3061          a sequence of shifts and additions.  */
3062       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3063
3064       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3065          less than or equal in size to `unsigned int' this doesn't matter.
3066          If the mode is larger than `unsigned int', then synth_mult works
3067          only if the constant value exactly fits in an `unsigned int' without
3068          any truncation.  This means that multiplying by negative values does
3069          not work; results are off by 2^32 on a 32 bit machine.  */
3070
3071       if (CONST_INT_P (scalar_op1))
3072         {
3073           coeff = INTVAL (scalar_op1);
3074           is_neg = coeff < 0;
3075         }
3076       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3077         {
3078           /* If we are multiplying in DImode, it may still be a win
3079              to try to work with shifts and adds.  */
3080           if (CONST_DOUBLE_HIGH (scalar_op1) == 0
3081               && CONST_DOUBLE_LOW (scalar_op1) > 0)
3082             {
3083               coeff = CONST_DOUBLE_LOW (scalar_op1);
3084               is_neg = false;
3085             }
3086           else if (CONST_DOUBLE_LOW (scalar_op1) == 0)
3087             {
3088               coeff = CONST_DOUBLE_HIGH (scalar_op1);
3089               if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3090                 {
3091                   int shift = floor_log2 (coeff) + HOST_BITS_PER_WIDE_INT;
3092                   if (shift < HOST_BITS_PER_DOUBLE_INT - 1
3093                       || mode_bitsize <= HOST_BITS_PER_DOUBLE_INT)
3094                     return expand_shift (LSHIFT_EXPR, mode, op0,
3095                                          shift, target, unsignedp);
3096                 }
3097               goto skip_synth;
3098             }
3099           else
3100             goto skip_synth;
3101         }
3102       else
3103         goto skip_synth;
3104
3105       /* We used to test optimize here, on the grounds that it's better to
3106          produce a smaller program when -O is not used.  But this causes
3107          such a terrible slowdown sometimes that it seems better to always
3108          use synth_mult.  */
3109
3110       /* Special case powers of two.  */
3111       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3112         return expand_shift (LSHIFT_EXPR, mode, op0,
3113                              floor_log2 (coeff), target, unsignedp);
3114
3115       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3116
3117       /* Attempt to handle multiplication of DImode values by negative
3118          coefficients, by performing the multiplication by a positive
3119          multiplier and then inverting the result.  */
3120       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3121         {
3122           /* Its safe to use -coeff even for INT_MIN, as the
3123              result is interpreted as an unsigned coefficient.
3124              Exclude cost of op0 from max_cost to match the cost
3125              calculation of the synth_mult.  */
3126           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
3127                       - neg_cost(speed, mode));
3128           if (max_cost > 0
3129               && choose_mult_variant (mode, -coeff, &algorithm,
3130                                       &variant, max_cost))
3131             {
3132               rtx temp = expand_mult_const (mode, op0, -coeff, NULL_RTX,
3133                                             &algorithm, variant);
3134               return expand_unop (mode, neg_optab, temp, target, 0);
3135             }
3136           goto skip_synth;
3137         }
3138
3139       /* Exclude cost of op0 from max_cost to match the cost
3140          calculation of the synth_mult.  */
3141       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3142       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3143         return expand_mult_const (mode, op0, coeff, target,
3144                                   &algorithm, variant);
3145     }
3146  skip_synth:
3147
3148   /* Expand x*2.0 as x+x.  */
3149   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1))
3150     {
3151       REAL_VALUE_TYPE d;
3152       REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1);
3153
3154       if (REAL_VALUES_EQUAL (d, dconst2))
3155         {
3156           op0 = force_reg (GET_MODE (op0), op0);
3157           return expand_binop (mode, add_optab, op0, op0,
3158                                target, unsignedp, OPTAB_LIB_WIDEN);
3159         }
3160     }
3161  skip_scalar:
3162
3163   /* This used to use umul_optab if unsigned, but for non-widening multiply
3164      there is no difference between signed and unsigned.  */
3165   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3166                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3167   gcc_assert (op0);
3168   return op0;
3169 }
3170
3171 /* Return a cost estimate for multiplying a register by the given
3172    COEFFicient in the given MODE and SPEED.  */
3173
3174 int
3175 mult_by_coeff_cost (HOST_WIDE_INT coeff, enum machine_mode mode, bool speed)
3176 {
3177   int max_cost;
3178   struct algorithm algorithm;
3179   enum mult_variant variant;
3180
3181   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3182   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), speed);
3183   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3184     return algorithm.cost.cost;
3185   else
3186     return max_cost;
3187 }
3188
3189 /* Perform a widening multiplication and return an rtx for the result.
3190    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3191    TARGET is a suggestion for where to store the result (an rtx).
3192    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3193    or smul_widen_optab.
3194
3195    We check specially for a constant integer as OP1, comparing the
3196    cost of a widening multiply against the cost of a sequence of shifts
3197    and adds.  */
3198
3199 rtx
3200 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3201                       int unsignedp, optab this_optab)
3202 {
3203   bool speed = optimize_insn_for_speed_p ();
3204   rtx cop1;
3205
3206   if (CONST_INT_P (op1)
3207       && GET_MODE (op0) != VOIDmode
3208       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3209                                 this_optab == umul_widen_optab))
3210       && CONST_INT_P (cop1)
3211       && (INTVAL (cop1) >= 0
3212           || HWI_COMPUTABLE_MODE_P (mode)))
3213     {
3214       HOST_WIDE_INT coeff = INTVAL (cop1);
3215       int max_cost;
3216       enum mult_variant variant;
3217       struct algorithm algorithm;
3218
3219       /* Special case powers of two.  */
3220       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3221         {
3222           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3223           return expand_shift (LSHIFT_EXPR, mode, op0,
3224                                floor_log2 (coeff), target, unsignedp);
3225         }
3226
3227       /* Exclude cost of op0 from max_cost to match the cost
3228          calculation of the synth_mult.  */
3229       max_cost = mul_widen_cost (speed, mode);
3230       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3231                                max_cost))
3232         {
3233           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3234           return expand_mult_const (mode, op0, coeff, target,
3235                                     &algorithm, variant);
3236         }
3237     }
3238   return expand_binop (mode, this_optab, op0, op1, target,
3239                        unsignedp, OPTAB_LIB_WIDEN);
3240 }
3241 \f
3242 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3243    replace division by D, and put the least significant N bits of the result
3244    in *MULTIPLIER_PTR and return the most significant bit.
3245
3246    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3247    needed precision is in PRECISION (should be <= N).
3248
3249    PRECISION should be as small as possible so this function can choose
3250    multiplier more freely.
3251
3252    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3253    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3254
3255    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3256    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3257
3258 unsigned HOST_WIDE_INT
3259 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3260                    unsigned HOST_WIDE_INT *multiplier_ptr,
3261                    int *post_shift_ptr, int *lgup_ptr)
3262 {
3263   double_int mhigh, mlow;
3264   int lgup, post_shift;
3265   int pow, pow2;
3266
3267   /* lgup = ceil(log2(divisor)); */
3268   lgup = ceil_log2 (d);
3269
3270   gcc_assert (lgup <= n);
3271
3272   pow = n + lgup;
3273   pow2 = n + lgup - precision;
3274
3275   /* We could handle this with some effort, but this case is much
3276      better handled directly with a scc insn, so rely on caller using
3277      that.  */
3278   gcc_assert (pow != HOST_BITS_PER_DOUBLE_INT);
3279
3280   /* mlow = 2^(N + lgup)/d */
3281   double_int val = double_int_zero.set_bit (pow);
3282   mlow = val.div (double_int::from_uhwi (d), true, TRUNC_DIV_EXPR);
3283
3284   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3285   val |= double_int_zero.set_bit (pow2);
3286   mhigh = val.div (double_int::from_uhwi (d), true, TRUNC_DIV_EXPR);
3287
3288   gcc_assert (!mhigh.high || val.high - d < d);
3289   gcc_assert (mhigh.high <= 1 && mlow.high <= 1);
3290   /* Assert that mlow < mhigh.  */
3291   gcc_assert (mlow.ult (mhigh));
3292
3293   /* If precision == N, then mlow, mhigh exceed 2^N
3294      (but they do not exceed 2^(N+1)).  */
3295
3296   /* Reduce to lowest terms.  */
3297   for (post_shift = lgup; post_shift > 0; post_shift--)
3298     {
3299       int shft = HOST_BITS_PER_WIDE_INT - 1;
3300       unsigned HOST_WIDE_INT ml_lo = (mlow.high << shft) | (mlow.low >> 1);
3301       unsigned HOST_WIDE_INT mh_lo = (mhigh.high << shft) | (mhigh.low >> 1);
3302       if (ml_lo >= mh_lo)
3303         break;
3304
3305       mlow = double_int::from_uhwi (ml_lo);
3306       mhigh = double_int::from_uhwi (mh_lo);
3307     }
3308
3309   *post_shift_ptr = post_shift;
3310   *lgup_ptr = lgup;
3311   if (n < HOST_BITS_PER_WIDE_INT)
3312     {
3313       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3314       *multiplier_ptr = mhigh.low & mask;
3315       return mhigh.low >= mask;
3316     }
3317   else
3318     {
3319       *multiplier_ptr = mhigh.low;
3320       return mhigh.high;
3321     }
3322 }
3323
3324 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3325    congruent to 1 (mod 2**N).  */
3326
3327 static unsigned HOST_WIDE_INT
3328 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3329 {
3330   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3331
3332   /* The algorithm notes that the choice y = x satisfies
3333      x*y == 1 mod 2^3, since x is assumed odd.
3334      Each iteration doubles the number of bits of significance in y.  */
3335
3336   unsigned HOST_WIDE_INT mask;
3337   unsigned HOST_WIDE_INT y = x;
3338   int nbit = 3;
3339
3340   mask = (n == HOST_BITS_PER_WIDE_INT
3341           ? ~(unsigned HOST_WIDE_INT) 0
3342           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3343
3344   while (nbit < n)
3345     {
3346       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3347       nbit *= 2;
3348     }
3349   return y;
3350 }
3351
3352 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3353    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3354    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3355    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3356    become signed.
3357
3358    The result is put in TARGET if that is convenient.
3359
3360    MODE is the mode of operation.  */
3361
3362 rtx
3363 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3364                              rtx op1, rtx target, int unsignedp)
3365 {
3366   rtx tem;
3367   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3368
3369   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3370                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3371   tem = expand_and (mode, tem, op1, NULL_RTX);
3372   adj_operand
3373     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3374                      adj_operand);
3375
3376   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3377                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3378   tem = expand_and (mode, tem, op0, NULL_RTX);
3379   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3380                           target);
3381
3382   return target;
3383 }
3384
3385 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3386
3387 static rtx
3388 extract_high_half (enum machine_mode mode, rtx op)
3389 {
3390   enum machine_mode wider_mode;
3391
3392   if (mode == word_mode)
3393     return gen_highpart (mode, op);
3394
3395   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3396
3397   wider_mode = GET_MODE_WIDER_MODE (mode);
3398   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3399                      GET_MODE_BITSIZE (mode), 0, 1);
3400   return convert_modes (mode, wider_mode, op, 0);
3401 }
3402
3403 /* Like expmed_mult_highpart, but only consider using a multiplication
3404    optab.  OP1 is an rtx for the constant operand.  */
3405
3406 static rtx
3407 expmed_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3408                             rtx target, int unsignedp, int max_cost)
3409 {
3410   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3411   enum machine_mode wider_mode;
3412   optab moptab;
3413   rtx tem;
3414   int size;
3415   bool speed = optimize_insn_for_speed_p ();
3416
3417   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3418
3419   wider_mode = GET_MODE_WIDER_MODE (mode);
3420   size = GET_MODE_BITSIZE (mode);
3421
3422   /* Firstly, try using a multiplication insn that only generates the needed
3423      high part of the product, and in the sign flavor of unsignedp.  */
3424   if (mul_highpart_cost (speed, mode) < max_cost)
3425     {
3426       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3427       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3428                           unsignedp, OPTAB_DIRECT);
3429       if (tem)
3430         return tem;
3431     }
3432
3433   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3434      Need to adjust the result after the multiplication.  */
3435   if (size - 1 < BITS_PER_WORD
3436       && (mul_highpart_cost (speed, mode)
3437           + 2 * shift_cost (speed, mode, size-1)
3438           + 4 * add_cost (speed, mode) < max_cost))
3439     {
3440       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3441       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3442                           unsignedp, OPTAB_DIRECT);
3443       if (tem)
3444         /* We used the wrong signedness.  Adjust the result.  */
3445         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3446                                             tem, unsignedp);
3447     }
3448
3449   /* Try widening multiplication.  */
3450   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3451   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3452       && mul_widen_cost (speed, wider_mode) < max_cost)
3453     {
3454       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3455                           unsignedp, OPTAB_WIDEN);
3456       if (tem)
3457         return extract_high_half (mode, tem);
3458     }
3459
3460   /* Try widening the mode and perform a non-widening multiplication.  */
3461   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3462       && size - 1 < BITS_PER_WORD
3463       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3464           < max_cost))
3465     {
3466       rtx insns, wop0, wop1;
3467
3468       /* We need to widen the operands, for example to ensure the
3469          constant multiplier is correctly sign or zero extended.
3470          Use a sequence to clean-up any instructions emitted by
3471          the conversions if things don't work out.  */
3472       start_sequence ();
3473       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3474       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3475       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3476                           unsignedp, OPTAB_WIDEN);
3477       insns = get_insns ();
3478       end_sequence ();
3479
3480       if (tem)
3481         {
3482           emit_insn (insns);
3483           return extract_high_half (mode, tem);
3484         }
3485     }
3486
3487   /* Try widening multiplication of opposite signedness, and adjust.  */
3488   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3489   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3490       && size - 1 < BITS_PER_WORD
3491       && (mul_widen_cost (speed, wider_mode)
3492           + 2 * shift_cost (speed, mode, size-1)
3493           + 4 * add_cost (speed, mode) < max_cost))
3494     {
3495       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3496                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3497       if (tem != 0)
3498         {
3499           tem = extract_high_half (mode, tem);
3500           /* We used the wrong signedness.  Adjust the result.  */
3501           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3502                                               target, unsignedp);
3503         }
3504     }
3505
3506   return 0;
3507 }
3508
3509 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3510    putting the high half of the result in TARGET if that is convenient,
3511    and return where the result is.  If the operation can not be performed,
3512    0 is returned.
3513
3514    MODE is the mode of operation and result.
3515
3516    UNSIGNEDP nonzero means unsigned multiply.
3517
3518    MAX_COST is the total allowed cost for the expanded RTL.  */
3519
3520 static rtx
3521 expmed_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3522                       rtx target, int unsignedp, int max_cost)
3523 {
3524   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3525   unsigned HOST_WIDE_INT cnst1;
3526   int extra_cost;
3527   bool sign_adjust = false;
3528   enum mult_variant variant;
3529   struct algorithm alg;
3530   rtx tem;
3531   bool speed = optimize_insn_for_speed_p ();
3532
3533   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3534   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3535   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3536
3537   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3538
3539   /* We can't optimize modes wider than BITS_PER_WORD.
3540      ??? We might be able to perform double-word arithmetic if
3541      mode == word_mode, however all the cost calculations in
3542      synth_mult etc. assume single-word operations.  */
3543   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3544     return expmed_mult_highpart_optab (mode, op0, op1, target,
3545                                        unsignedp, max_cost);
3546
3547   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3548
3549   /* Check whether we try to multiply by a negative constant.  */
3550   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3551     {
3552       sign_adjust = true;
3553       extra_cost += add_cost (speed, mode);
3554     }
3555
3556   /* See whether shift/add multiplication is cheap enough.  */
3557   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3558                            max_cost - extra_cost))
3559     {
3560       /* See whether the specialized multiplication optabs are
3561          cheaper than the shift/add version.  */
3562       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3563                                         alg.cost.cost + extra_cost);
3564       if (tem)
3565         return tem;
3566
3567       tem = convert_to_mode (wider_mode, op0, unsignedp);
3568       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3569       tem = extract_high_half (mode, tem);
3570
3571       /* Adjust result for signedness.  */
3572       if (sign_adjust)
3573         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3574
3575       return tem;
3576     }
3577   return expmed_mult_highpart_optab (mode, op0, op1, target,
3578                                      unsignedp, max_cost);
3579 }
3580
3581
3582 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3583
3584 static rtx
3585 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3586 {
3587   unsigned HOST_WIDE_INT masklow, maskhigh;
3588   rtx result, temp, shift, label;
3589   int logd;
3590
3591   logd = floor_log2 (d);
3592   result = gen_reg_rtx (mode);
3593
3594   /* Avoid conditional branches when they're expensive.  */
3595   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3596       && optimize_insn_for_speed_p ())
3597     {
3598       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3599                                       mode, 0, -1);
3600       if (signmask)
3601         {
3602           signmask = force_reg (mode, signmask);
3603           masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3604           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3605
3606           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3607              which instruction sequence to use.  If logical right shifts
3608              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3609              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3610
3611           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3612           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3613               || (set_src_cost (temp, optimize_insn_for_speed_p ())
3614                   > COSTS_N_INSNS (2)))
3615             {
3616               temp = expand_binop (mode, xor_optab, op0, signmask,
3617                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3618               temp = expand_binop (mode, sub_optab, temp, signmask,
3619                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3620               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3621                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3622               temp = expand_binop (mode, xor_optab, temp, signmask,
3623                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3624               temp = expand_binop (mode, sub_optab, temp, signmask,
3625                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3626             }
3627           else
3628             {
3629               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3630                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3631               signmask = force_reg (mode, signmask);
3632
3633               temp = expand_binop (mode, add_optab, op0, signmask,
3634                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3635               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3636                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3637               temp = expand_binop (mode, sub_optab, temp, signmask,
3638                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3639             }
3640           return temp;
3641         }
3642     }
3643
3644   /* Mask contains the mode's signbit and the significant bits of the
3645      modulus.  By including the signbit in the operation, many targets
3646      can avoid an explicit compare operation in the following comparison
3647      against zero.  */
3648
3649   masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3650   if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3651     {
3652       masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3653       maskhigh = -1;
3654     }
3655   else
3656     maskhigh = (HOST_WIDE_INT) -1
3657                  << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3658
3659   temp = expand_binop (mode, and_optab, op0,
3660                        immed_double_const (masklow, maskhigh, mode),
3661                        result, 1, OPTAB_LIB_WIDEN);
3662   if (temp != result)
3663     emit_move_insn (result, temp);
3664
3665   label = gen_label_rtx ();
3666   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3667
3668   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3669                        0, OPTAB_LIB_WIDEN);
3670   masklow = (HOST_WIDE_INT) -1 << logd;
3671   maskhigh = -1;
3672   temp = expand_binop (mode, ior_optab, temp,
3673                        immed_double_const (masklow, maskhigh, mode),
3674                        result, 1, OPTAB_LIB_WIDEN);
3675   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3676                        0, OPTAB_LIB_WIDEN);
3677   if (temp != result)
3678     emit_move_insn (result, temp);
3679   emit_label (label);
3680   return result;
3681 }
3682
3683 /* Expand signed division of OP0 by a power of two D in mode MODE.
3684    This routine is only called for positive values of D.  */
3685
3686 static rtx
3687 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3688 {
3689   rtx temp, label;
3690   int logd;
3691
3692   logd = floor_log2 (d);
3693
3694   if (d == 2
3695       && BRANCH_COST (optimize_insn_for_speed_p (),
3696                       false) >= 1)
3697     {
3698       temp = gen_reg_rtx (mode);
3699       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3700       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3701                            0, OPTAB_LIB_WIDEN);
3702       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3703     }
3704
3705 #ifdef HAVE_conditional_move
3706   if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3707       >= 2)
3708     {
3709       rtx temp2;
3710
3711       /* ??? emit_conditional_move forces a stack adjustment via
3712          compare_from_rtx so, if the sequence is discarded, it will
3713          be lost.  Do it now instead.  */
3714       do_pending_stack_adjust ();
3715
3716       start_sequence ();
3717       temp2 = copy_to_mode_reg (mode, op0);
3718       temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3719                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3720       temp = force_reg (mode, temp);
3721
3722       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3723       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3724                                      mode, temp, temp2, mode, 0);
3725       if (temp2)
3726         {
3727           rtx seq = get_insns ();
3728           end_sequence ();
3729           emit_insn (seq);
3730           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3731         }
3732       end_sequence ();
3733     }
3734 #endif
3735
3736   if (BRANCH_COST (optimize_insn_for_speed_p (),
3737                    false) >= 2)
3738     {
3739       int ushift = GET_MODE_BITSIZE (mode) - logd;
3740
3741       temp = gen_reg_rtx (mode);
3742       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3743       if (shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3744           > COSTS_N_INSNS (1))
3745         temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3746                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3747       else
3748         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3749                              ushift, NULL_RTX, 1);
3750       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3751                            0, OPTAB_LIB_WIDEN);
3752       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3753     }
3754
3755   label = gen_label_rtx ();
3756   temp = copy_to_mode_reg (mode, op0);
3757   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3758   expand_inc (temp, GEN_INT (d - 1));
3759   emit_label (label);
3760   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3761 }
3762 \f
3763 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3764    if that is convenient, and returning where the result is.
3765    You may request either the quotient or the remainder as the result;
3766    specify REM_FLAG nonzero to get the remainder.
3767
3768    CODE is the expression code for which kind of division this is;
3769    it controls how rounding is done.  MODE is the machine mode to use.
3770    UNSIGNEDP nonzero means do unsigned division.  */
3771
3772 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3773    and then correct it by or'ing in missing high bits
3774    if result of ANDI is nonzero.
3775    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3776    This could optimize to a bfexts instruction.
3777    But C doesn't use these operations, so their optimizations are
3778    left for later.  */
3779 /* ??? For modulo, we don't actually need the highpart of the first product,
3780    the low part will do nicely.  And for small divisors, the second multiply
3781    can also be a low-part only multiply or even be completely left out.
3782    E.g. to calculate the remainder of a division by 3 with a 32 bit
3783    multiply, multiply with 0x55555556 and extract the upper two bits;
3784    the result is exact for inputs up to 0x1fffffff.
3785    The input range can be reduced by using cross-sum rules.
3786    For odd divisors >= 3, the following table gives right shift counts
3787    so that if a number is shifted by an integer multiple of the given
3788    amount, the remainder stays the same:
3789    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3790    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3791    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3792    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3793    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3794
3795    Cross-sum rules for even numbers can be derived by leaving as many bits
3796    to the right alone as the divisor has zeros to the right.
3797    E.g. if x is an unsigned 32 bit number:
3798    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3799    */
3800
3801 rtx
3802 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3803                rtx op0, rtx op1, rtx target, int unsignedp)
3804 {
3805   enum machine_mode compute_mode;
3806   rtx tquotient;
3807   rtx quotient = 0, remainder = 0;
3808   rtx last;
3809   int size;
3810   rtx insn;
3811   optab optab1, optab2;
3812   int op1_is_constant, op1_is_pow2 = 0;
3813   int max_cost, extra_cost;
3814   static HOST_WIDE_INT last_div_const = 0;
3815   static HOST_WIDE_INT ext_op1;
3816   bool speed = optimize_insn_for_speed_p ();
3817
3818   op1_is_constant = CONST_INT_P (op1);
3819   if (op1_is_constant)
3820     {
3821       ext_op1 = INTVAL (op1);
3822       if (unsignedp)
3823         ext_op1 &= GET_MODE_MASK (mode);
3824       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3825                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3826     }
3827
3828   /*
3829      This is the structure of expand_divmod:
3830
3831      First comes code to fix up the operands so we can perform the operations
3832      correctly and efficiently.
3833
3834      Second comes a switch statement with code specific for each rounding mode.
3835      For some special operands this code emits all RTL for the desired
3836      operation, for other cases, it generates only a quotient and stores it in
3837      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3838      to indicate that it has not done anything.
3839
3840      Last comes code that finishes the operation.  If QUOTIENT is set and
3841      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3842      QUOTIENT is not set, it is computed using trunc rounding.
3843
3844      We try to generate special code for division and remainder when OP1 is a
3845      constant.  If |OP1| = 2**n we can use shifts and some other fast
3846      operations.  For other values of OP1, we compute a carefully selected
3847      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3848      by m.
3849
3850      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3851      half of the product.  Different strategies for generating the product are
3852      implemented in expmed_mult_highpart.
3853
3854      If what we actually want is the remainder, we generate that by another
3855      by-constant multiplication and a subtraction.  */
3856
3857   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3858      code below will malfunction if we are, so check here and handle
3859      the special case if so.  */
3860   if (op1 == const1_rtx)
3861     return rem_flag ? const0_rtx : op0;
3862
3863     /* When dividing by -1, we could get an overflow.
3864      negv_optab can handle overflows.  */
3865   if (! unsignedp && op1 == constm1_rtx)
3866     {
3867       if (rem_flag)
3868         return const0_rtx;
3869       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3870                           ? negv_optab : neg_optab, op0, target, 0);
3871     }
3872
3873   if (target
3874       /* Don't use the function value register as a target
3875          since we have to read it as well as write it,
3876          and function-inlining gets confused by this.  */
3877       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3878           /* Don't clobber an operand while doing a multi-step calculation.  */
3879           || ((rem_flag || op1_is_constant)
3880               && (reg_mentioned_p (target, op0)
3881                   || (MEM_P (op0) && MEM_P (target))))
3882           || reg_mentioned_p (target, op1)
3883           || (MEM_P (op1) && MEM_P (target))))
3884     target = 0;
3885
3886   /* Get the mode in which to perform this computation.  Normally it will
3887      be MODE, but sometimes we can't do the desired operation in MODE.
3888      If so, pick a wider mode in which we can do the operation.  Convert
3889      to that mode at the start to avoid repeated conversions.
3890
3891      First see what operations we need.  These depend on the expression
3892      we are evaluating.  (We assume that divxx3 insns exist under the
3893      same conditions that modxx3 insns and that these insns don't normally
3894      fail.  If these assumptions are not correct, we may generate less
3895      efficient code in some cases.)
3896
3897      Then see if we find a mode in which we can open-code that operation
3898      (either a division, modulus, or shift).  Finally, check for the smallest
3899      mode for which we can do the operation with a library call.  */
3900
3901   /* We might want to refine this now that we have division-by-constant
3902      optimization.  Since expmed_mult_highpart tries so many variants, it is
3903      not straightforward to generalize this.  Maybe we should make an array
3904      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3905
3906   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3907             ? (unsignedp ? lshr_optab : ashr_optab)
3908             : (unsignedp ? udiv_optab : sdiv_optab));
3909   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3910             ? optab1
3911             : (unsignedp ? udivmod_optab : sdivmod_optab));
3912
3913   for (compute_mode = mode; compute_mode != VOIDmode;
3914        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3915     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3916         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
3917       break;
3918
3919   if (compute_mode == VOIDmode)
3920     for (compute_mode = mode; compute_mode != VOIDmode;
3921          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3922       if (optab_libfunc (optab1, compute_mode)
3923           || optab_libfunc (optab2, compute_mode))
3924         break;
3925
3926   /* If we still couldn't find a mode, use MODE, but expand_binop will
3927      probably die.  */
3928   if (compute_mode == VOIDmode)
3929     compute_mode = mode;
3930
3931   if (target && GET_MODE (target) == compute_mode)
3932     tquotient = target;
3933   else
3934     tquotient = gen_reg_rtx (compute_mode);
3935
3936   size = GET_MODE_BITSIZE (compute_mode);
3937 #if 0
3938   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3939      (mode), and thereby get better code when OP1 is a constant.  Do that
3940      later.  It will require going over all usages of SIZE below.  */
3941   size = GET_MODE_BITSIZE (mode);
3942 #endif
3943
3944   /* Only deduct something for a REM if the last divide done was
3945      for a different constant.   Then set the constant of the last
3946      divide.  */
3947   max_cost = (unsignedp
3948               ? udiv_cost (speed, compute_mode)
3949               : sdiv_cost (speed, compute_mode));
3950   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
3951                      && INTVAL (op1) == last_div_const))
3952     max_cost -= (mul_cost (speed, compute_mode)
3953                  + add_cost (speed, compute_mode));
3954
3955   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3956
3957   /* Now convert to the best mode to use.  */
3958   if (compute_mode != mode)
3959     {
3960       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3961       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3962
3963       /* convert_modes may have placed op1 into a register, so we
3964          must recompute the following.  */
3965       op1_is_constant = CONST_INT_P (op1);
3966       op1_is_pow2 = (op1_is_constant
3967                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3968                           || (! unsignedp
3969                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3970     }
3971
3972   /* If one of the operands is a volatile MEM, copy it into a register.  */
3973
3974   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3975     op0 = force_reg (compute_mode, op0);
3976   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3977     op1 = force_reg (compute_mode, op1);
3978
3979   /* If we need the remainder or if OP1 is constant, we need to
3980      put OP0 in a register in case it has any queued subexpressions.  */
3981   if (rem_flag || op1_is_constant)
3982     op0 = force_reg (compute_mode, op0);
3983
3984   last = get_last_insn ();
3985
3986   /* Promote floor rounding to trunc rounding for unsigned operations.  */
3987   if (unsignedp)
3988     {
3989       if (code == FLOOR_DIV_EXPR)
3990         code = TRUNC_DIV_EXPR;
3991       if (code == FLOOR_MOD_EXPR)
3992         code = TRUNC_MOD_EXPR;
3993       if (code == EXACT_DIV_EXPR && op1_is_pow2)
3994         code = TRUNC_DIV_EXPR;
3995     }
3996
3997   if (op1 != const0_rtx)
3998     switch (code)
3999       {
4000       case TRUNC_MOD_EXPR:
4001       case TRUNC_DIV_EXPR:
4002         if (op1_is_constant)
4003           {
4004             if (unsignedp)
4005               {
4006                 unsigned HOST_WIDE_INT mh, ml;
4007                 int pre_shift, post_shift;
4008                 int dummy;
4009                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4010                                             & GET_MODE_MASK (compute_mode));
4011
4012                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4013                   {
4014                     pre_shift = floor_log2 (d);
4015                     if (rem_flag)
4016                       {
4017                         remainder
4018                           = expand_binop (compute_mode, and_optab, op0,
4019                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4020                                           remainder, 1,
4021                                           OPTAB_LIB_WIDEN);
4022                         if (remainder)
4023                           return gen_lowpart (mode, remainder);
4024                       }
4025                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4026                                              pre_shift, tquotient, 1);
4027                   }
4028                 else if (size <= HOST_BITS_PER_WIDE_INT)
4029                   {
4030                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4031                       {
4032                         /* Most significant bit of divisor is set; emit an scc
4033                            insn.  */
4034                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4035                                                           compute_mode, 1, 1);
4036                       }
4037                     else
4038                       {
4039                         /* Find a suitable multiplier and right shift count
4040                            instead of multiplying with D.  */
4041
4042                         mh = choose_multiplier (d, size, size,
4043                                                 &ml, &post_shift, &dummy);
4044
4045                         /* If the suggested multiplier is more than SIZE bits,
4046                            we can do better for even divisors, using an
4047                            initial right shift.  */
4048                         if (mh != 0 && (d & 1) == 0)
4049                           {
4050                             pre_shift = floor_log2 (d & -d);
4051                             mh = choose_multiplier (d >> pre_shift, size,
4052                                                     size - pre_shift,
4053                                                     &ml, &post_shift, &dummy);
4054                             gcc_assert (!mh);
4055                           }
4056                         else
4057                           pre_shift = 0;
4058
4059                         if (mh != 0)
4060                           {
4061                             rtx t1, t2, t3, t4;
4062
4063                             if (post_shift - 1 >= BITS_PER_WORD)
4064                               goto fail1;
4065
4066                             extra_cost
4067                               = (shift_cost (speed, compute_mode, post_shift - 1)
4068                                  + shift_cost (speed, compute_mode, 1)
4069                                  + 2 * add_cost (speed, compute_mode));
4070                             t1 = expmed_mult_highpart (compute_mode, op0,
4071                                                        GEN_INT (ml),
4072                                                        NULL_RTX, 1,
4073                                                        max_cost - extra_cost);
4074                             if (t1 == 0)
4075                               goto fail1;
4076                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4077                                                                op0, t1),
4078                                                 NULL_RTX);
4079                             t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4080                                                t2, 1, NULL_RTX, 1);
4081                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4082                                                               t1, t3),
4083                                                 NULL_RTX);
4084                             quotient = expand_shift
4085                               (RSHIFT_EXPR, compute_mode, t4,
4086                                post_shift - 1, tquotient, 1);
4087                           }
4088                         else
4089                           {
4090                             rtx t1, t2;
4091
4092                             if (pre_shift >= BITS_PER_WORD
4093                                 || post_shift >= BITS_PER_WORD)
4094                               goto fail1;
4095
4096                             t1 = expand_shift
4097                               (RSHIFT_EXPR, compute_mode, op0,
4098                                pre_shift, NULL_RTX, 1);
4099                             extra_cost
4100                               = (shift_cost (speed, compute_mode, pre_shift)
4101                                  + shift_cost (speed, compute_mode, post_shift));
4102                             t2 = expmed_mult_highpart (compute_mode, t1,
4103                                                        GEN_INT (ml),
4104                                                        NULL_RTX, 1,
4105                                                        max_cost - extra_cost);
4106                             if (t2 == 0)
4107                               goto fail1;
4108                             quotient = expand_shift
4109                               (RSHIFT_EXPR, compute_mode, t2,
4110                                post_shift, tquotient, 1);
4111                           }
4112                       }
4113                   }
4114                 else            /* Too wide mode to use tricky code */
4115                   break;
4116
4117                 insn = get_last_insn ();
4118                 if (insn != last)
4119                   set_dst_reg_note (insn, REG_EQUAL,
4120                                     gen_rtx_UDIV (compute_mode, op0, op1),
4121                                     quotient);
4122               }
4123             else                /* TRUNC_DIV, signed */
4124               {
4125                 unsigned HOST_WIDE_INT ml;
4126                 int lgup, post_shift;
4127                 rtx mlr;
4128                 HOST_WIDE_INT d = INTVAL (op1);
4129                 unsigned HOST_WIDE_INT abs_d;
4130
4131                 /* Since d might be INT_MIN, we have to cast to
4132                    unsigned HOST_WIDE_INT before negating to avoid
4133                    undefined signed overflow.  */
4134                 abs_d = (d >= 0
4135                          ? (unsigned HOST_WIDE_INT) d
4136                          : - (unsigned HOST_WIDE_INT) d);
4137
4138                 /* n rem d = n rem -d */
4139                 if (rem_flag && d < 0)
4140                   {
4141                     d = abs_d;
4142                     op1 = gen_int_mode (abs_d, compute_mode);
4143                   }
4144
4145                 if (d == 1)
4146                   quotient = op0;
4147                 else if (d == -1)
4148                   quotient = expand_unop (compute_mode, neg_optab, op0,
4149                                           tquotient, 0);
4150                 else if (HOST_BITS_PER_WIDE_INT >= size
4151                          && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4152                   {
4153                     /* This case is not handled correctly below.  */
4154                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4155                                                 compute_mode, 1, 1);
4156                     if (quotient == 0)
4157                       goto fail1;
4158                   }
4159                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4160                          && (rem_flag
4161                              ? smod_pow2_cheap (speed, compute_mode)
4162                              : sdiv_pow2_cheap (speed, compute_mode))
4163                          /* We assume that cheap metric is true if the
4164                             optab has an expander for this mode.  */
4165                          && ((optab_handler ((rem_flag ? smod_optab
4166                                               : sdiv_optab),
4167                                              compute_mode)
4168                               != CODE_FOR_nothing)
4169                              || (optab_handler (sdivmod_optab,
4170                                                 compute_mode)
4171                                  != CODE_FOR_nothing)))
4172                   ;
4173                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4174                   {
4175                     if (rem_flag)
4176                       {
4177                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4178                         if (remainder)
4179                           return gen_lowpart (mode, remainder);
4180                       }
4181
4182                     if (sdiv_pow2_cheap (speed, compute_mode)
4183                         && ((optab_handler (sdiv_optab, compute_mode)
4184                              != CODE_FOR_nothing)
4185                             || (optab_handler (sdivmod_optab, compute_mode)
4186                                 != CODE_FOR_nothing)))
4187                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4188                                                 compute_mode, op0,
4189                                                 gen_int_mode (abs_d,
4190                                                               compute_mode),
4191                                                 NULL_RTX, 0);
4192                     else
4193                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4194
4195                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4196                        negate the quotient.  */
4197                     if (d < 0)
4198                       {
4199                         insn = get_last_insn ();
4200                         if (insn != last
4201                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4202                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4203                           set_dst_reg_note (insn, REG_EQUAL,
4204                                             gen_rtx_DIV (compute_mode, op0,
4205                                                          gen_int_mode
4206                                                            (abs_d,
4207                                                             compute_mode)),
4208                                             quotient);
4209
4210                         quotient = expand_unop (compute_mode, neg_optab,
4211                                                 quotient, quotient, 0);
4212                       }
4213                   }
4214                 else if (size <= HOST_BITS_PER_WIDE_INT)
4215                   {
4216                     choose_multiplier (abs_d, size, size - 1,
4217                                        &ml, &post_shift, &lgup);
4218                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4219                       {
4220                         rtx t1, t2, t3;
4221
4222                         if (post_shift >= BITS_PER_WORD
4223                             || size - 1 >= BITS_PER_WORD)
4224                           goto fail1;
4225
4226                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4227                                       + shift_cost (speed, compute_mode, size - 1)
4228                                       + add_cost (speed, compute_mode));
4229                         t1 = expmed_mult_highpart (compute_mode, op0,
4230                                                    GEN_INT (ml), NULL_RTX, 0,
4231                                                    max_cost - extra_cost);
4232                         if (t1 == 0)
4233                           goto fail1;
4234                         t2 = expand_shift
4235                           (RSHIFT_EXPR, compute_mode, t1,
4236                            post_shift, NULL_RTX, 0);
4237                         t3 = expand_shift
4238                           (RSHIFT_EXPR, compute_mode, op0,
4239                            size - 1, NULL_RTX, 0);
4240                         if (d < 0)
4241                           quotient
4242                             = force_operand (gen_rtx_MINUS (compute_mode,
4243                                                             t3, t2),
4244                                              tquotient);
4245                         else
4246                           quotient
4247                             = force_operand (gen_rtx_MINUS (compute_mode,
4248                                                             t2, t3),
4249                                              tquotient);
4250                       }
4251                     else
4252                       {
4253                         rtx t1, t2, t3, t4;
4254
4255                         if (post_shift >= BITS_PER_WORD
4256                             || size - 1 >= BITS_PER_WORD)
4257                           goto fail1;
4258
4259                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4260                         mlr = gen_int_mode (ml, compute_mode);
4261                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4262                                       + shift_cost (speed, compute_mode, size - 1)
4263                                       + 2 * add_cost (speed, compute_mode));
4264                         t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4265                                                    NULL_RTX, 0,
4266                                                    max_cost - extra_cost);
4267                         if (t1 == 0)
4268                           goto fail1;
4269                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4270                                                           t1, op0),
4271                                             NULL_RTX);
4272                         t3 = expand_shift
4273                           (RSHIFT_EXPR, compute_mode, t2,
4274                            post_shift, NULL_RTX, 0);
4275                         t4 = expand_shift
4276                           (RSHIFT_EXPR, compute_mode, op0,
4277                            size - 1, NULL_RTX, 0);
4278                         if (d < 0)
4279                           quotient
4280                             = force_operand (gen_rtx_MINUS (compute_mode,
4281                                                             t4, t3),
4282                                              tquotient);
4283                         else
4284                           quotient
4285                             = force_operand (gen_rtx_MINUS (compute_mode,
4286                                                             t3, t4),
4287                                              tquotient);
4288                       }
4289                   }
4290                 else            /* Too wide mode to use tricky code */
4291                   break;
4292
4293                 insn = get_last_insn ();
4294                 if (insn != last)
4295                   set_dst_reg_note (insn, REG_EQUAL,
4296                                     gen_rtx_DIV (compute_mode, op0, op1),
4297                                     quotient);
4298               }
4299             break;
4300           }
4301       fail1:
4302         delete_insns_since (last);
4303         break;
4304
4305       case FLOOR_DIV_EXPR:
4306       case FLOOR_MOD_EXPR:
4307       /* We will come here only for signed operations.  */
4308         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4309           {
4310             unsigned HOST_WIDE_INT mh, ml;
4311             int pre_shift, lgup, post_shift;
4312             HOST_WIDE_INT d = INTVAL (op1);
4313
4314             if (d > 0)
4315               {
4316                 /* We could just as easily deal with negative constants here,
4317                    but it does not seem worth the trouble for GCC 2.6.  */
4318                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4319                   {
4320                     pre_shift = floor_log2 (d);
4321                     if (rem_flag)
4322                       {
4323                         remainder = expand_binop (compute_mode, and_optab, op0,
4324                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4325                                                   remainder, 0, OPTAB_LIB_WIDEN);
4326                         if (remainder)
4327                           return gen_lowpart (mode, remainder);
4328                       }
4329                     quotient = expand_shift
4330                       (RSHIFT_EXPR, compute_mode, op0,
4331                        pre_shift, tquotient, 0);
4332                   }
4333                 else
4334                   {
4335                     rtx t1, t2, t3, t4;
4336
4337                     mh = choose_multiplier (d, size, size - 1,
4338                                             &ml, &post_shift, &lgup);
4339                     gcc_assert (!mh);
4340
4341                     if (post_shift < BITS_PER_WORD
4342                         && size - 1 < BITS_PER_WORD)
4343                       {
4344                         t1 = expand_shift
4345                           (RSHIFT_EXPR, compute_mode, op0,
4346                            size - 1, NULL_RTX, 0);
4347                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4348                                            NULL_RTX, 0, OPTAB_WIDEN);
4349                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4350                                       + shift_cost (speed, compute_mode, size - 1)
4351                                       + 2 * add_cost (speed, compute_mode));
4352                         t3 = expmed_mult_highpart (compute_mode, t2,
4353                                                    GEN_INT (ml), NULL_RTX, 1,
4354                                                    max_cost - extra_cost);
4355                         if (t3 != 0)
4356                           {
4357                             t4 = expand_shift
4358                               (RSHIFT_EXPR, compute_mode, t3,
4359                                post_shift, NULL_RTX, 1);
4360                             quotient = expand_binop (compute_mode, xor_optab,
4361                                                      t4, t1, tquotient, 0,
4362                                                      OPTAB_WIDEN);
4363                           }
4364                       }
4365                   }
4366               }
4367             else
4368               {
4369                 rtx nsign, t1, t2, t3, t4;
4370                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4371                                                   op0, constm1_rtx), NULL_RTX);
4372                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4373                                    0, OPTAB_WIDEN);
4374                 nsign = expand_shift
4375                   (RSHIFT_EXPR, compute_mode, t2,
4376                    size - 1, NULL_RTX, 0);
4377                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4378                                     NULL_RTX);
4379                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4380                                     NULL_RTX, 0);
4381                 if (t4)
4382                   {
4383                     rtx t5;
4384                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4385                                       NULL_RTX, 0);
4386                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4387                                                             t4, t5),
4388                                               tquotient);
4389                   }
4390               }
4391           }
4392
4393         if (quotient != 0)
4394           break;
4395         delete_insns_since (last);
4396
4397         /* Try using an instruction that produces both the quotient and
4398            remainder, using truncation.  We can easily compensate the quotient
4399            or remainder to get floor rounding, once we have the remainder.
4400            Notice that we compute also the final remainder value here,
4401            and return the result right away.  */
4402         if (target == 0 || GET_MODE (target) != compute_mode)
4403           target = gen_reg_rtx (compute_mode);
4404
4405         if (rem_flag)
4406           {
4407             remainder
4408               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4409             quotient = gen_reg_rtx (compute_mode);
4410           }
4411         else
4412           {
4413             quotient
4414               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4415             remainder = gen_reg_rtx (compute_mode);
4416           }
4417
4418         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4419                                  quotient, remainder, 0))
4420           {
4421             /* This could be computed with a branch-less sequence.
4422                Save that for later.  */
4423             rtx tem;
4424             rtx label = gen_label_rtx ();
4425             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4426             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4427                                 NULL_RTX, 0, OPTAB_WIDEN);
4428             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4429             expand_dec (quotient, const1_rtx);
4430             expand_inc (remainder, op1);
4431             emit_label (label);
4432             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4433           }
4434
4435         /* No luck with division elimination or divmod.  Have to do it
4436            by conditionally adjusting op0 *and* the result.  */
4437         {
4438           rtx label1, label2, label3, label4, label5;
4439           rtx adjusted_op0;
4440           rtx tem;
4441
4442           quotient = gen_reg_rtx (compute_mode);
4443           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4444           label1 = gen_label_rtx ();
4445           label2 = gen_label_rtx ();
4446           label3 = gen_label_rtx ();
4447           label4 = gen_label_rtx ();
4448           label5 = gen_label_rtx ();
4449           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4450           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4451           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4452                               quotient, 0, OPTAB_LIB_WIDEN);
4453           if (tem != quotient)
4454             emit_move_insn (quotient, tem);
4455           emit_jump_insn (gen_jump (label5));
4456           emit_barrier ();
4457           emit_label (label1);
4458           expand_inc (adjusted_op0, const1_rtx);
4459           emit_jump_insn (gen_jump (label4));
4460           emit_barrier ();
4461           emit_label (label2);
4462           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4463           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4464                               quotient, 0, OPTAB_LIB_WIDEN);
4465           if (tem != quotient)
4466             emit_move_insn (quotient, tem);
4467           emit_jump_insn (gen_jump (label5));
4468           emit_barrier ();
4469           emit_label (label3);
4470           expand_dec (adjusted_op0, const1_rtx);
4471           emit_label (label4);
4472           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4473                               quotient, 0, OPTAB_LIB_WIDEN);
4474           if (tem != quotient)
4475             emit_move_insn (quotient, tem);
4476           expand_dec (quotient, const1_rtx);
4477           emit_label (label5);
4478         }
4479         break;
4480
4481       case CEIL_DIV_EXPR:
4482       case CEIL_MOD_EXPR:
4483         if (unsignedp)
4484           {
4485             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4486               {
4487                 rtx t1, t2, t3;
4488                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4489                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4490                                    floor_log2 (d), tquotient, 1);
4491                 t2 = expand_binop (compute_mode, and_optab, op0,
4492                                    GEN_INT (d - 1),
4493                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4494                 t3 = gen_reg_rtx (compute_mode);
4495                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4496                                       compute_mode, 1, 1);
4497                 if (t3 == 0)
4498                   {
4499                     rtx lab;
4500                     lab = gen_label_rtx ();
4501                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4502                     expand_inc (t1, const1_rtx);
4503                     emit_label (lab);
4504                     quotient = t1;
4505                   }
4506                 else
4507                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4508                                                           t1, t3),
4509                                             tquotient);
4510                 break;
4511               }
4512
4513             /* Try using an instruction that produces both the quotient and
4514                remainder, using truncation.  We can easily compensate the
4515                quotient or remainder to get ceiling rounding, once we have the
4516                remainder.  Notice that we compute also the final remainder
4517                value here, and return the result right away.  */
4518             if (target == 0 || GET_MODE (target) != compute_mode)
4519               target = gen_reg_rtx (compute_mode);
4520
4521             if (rem_flag)
4522               {
4523                 remainder = (REG_P (target)
4524                              ? target : gen_reg_rtx (compute_mode));
4525                 quotient = gen_reg_rtx (compute_mode);
4526               }
4527             else
4528               {
4529                 quotient = (REG_P (target)
4530                             ? target : gen_reg_rtx (compute_mode));
4531                 remainder = gen_reg_rtx (compute_mode);
4532               }
4533
4534             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4535                                      remainder, 1))
4536               {
4537                 /* This could be computed with a branch-less sequence.
4538                    Save that for later.  */
4539                 rtx label = gen_label_rtx ();
4540                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4541                                  compute_mode, label);
4542                 expand_inc (quotient, const1_rtx);
4543                 expand_dec (remainder, op1);
4544                 emit_label (label);
4545                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4546               }
4547
4548             /* No luck with division elimination or divmod.  Have to do it
4549                by conditionally adjusting op0 *and* the result.  */
4550             {
4551               rtx label1, label2;
4552               rtx adjusted_op0, tem;
4553
4554               quotient = gen_reg_rtx (compute_mode);
4555               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4556               label1 = gen_label_rtx ();
4557               label2 = gen_label_rtx ();
4558               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4559                                compute_mode, label1);
4560               emit_move_insn  (quotient, const0_rtx);
4561               emit_jump_insn (gen_jump (label2));
4562               emit_barrier ();
4563               emit_label (label1);
4564               expand_dec (adjusted_op0, const1_rtx);
4565               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4566                                   quotient, 1, OPTAB_LIB_WIDEN);
4567               if (tem != quotient)
4568                 emit_move_insn (quotient, tem);
4569               expand_inc (quotient, const1_rtx);
4570               emit_label (label2);
4571             }
4572           }
4573         else /* signed */
4574           {
4575             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4576                 && INTVAL (op1) >= 0)
4577               {
4578                 /* This is extremely similar to the code for the unsigned case
4579                    above.  For 2.7 we should merge these variants, but for
4580                    2.6.1 I don't want to touch the code for unsigned since that
4581                    get used in C.  The signed case will only be used by other
4582                    languages (Ada).  */
4583
4584                 rtx t1, t2, t3;
4585                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4586                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4587                                    floor_log2 (d), tquotient, 0);
4588                 t2 = expand_binop (compute_mode, and_optab, op0,
4589                                    GEN_INT (d - 1),
4590                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4591                 t3 = gen_reg_rtx (compute_mode);
4592                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4593                                       compute_mode, 1, 1);
4594                 if (t3 == 0)
4595                   {
4596                     rtx lab;
4597                     lab = gen_label_rtx ();
4598                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4599                     expand_inc (t1, const1_rtx);
4600                     emit_label (lab);
4601                     quotient = t1;
4602                   }
4603                 else
4604                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4605                                                           t1, t3),
4606                                             tquotient);
4607                 break;
4608               }
4609
4610             /* Try using an instruction that produces both the quotient and
4611                remainder, using truncation.  We can easily compensate the
4612                quotient or remainder to get ceiling rounding, once we have the
4613                remainder.  Notice that we compute also the final remainder
4614                value here, and return the result right away.  */
4615             if (target == 0 || GET_MODE (target) != compute_mode)
4616               target = gen_reg_rtx (compute_mode);
4617             if (rem_flag)
4618               {
4619                 remainder= (REG_P (target)
4620                             ? target : gen_reg_rtx (compute_mode));
4621                 quotient = gen_reg_rtx (compute_mode);
4622               }
4623             else
4624               {
4625                 quotient = (REG_P (target)
4626                             ? target : gen_reg_rtx (compute_mode));
4627                 remainder = gen_reg_rtx (compute_mode);
4628               }
4629
4630             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4631                                      remainder, 0))
4632               {
4633                 /* This could be computed with a branch-less sequence.
4634                    Save that for later.  */
4635                 rtx tem;
4636                 rtx label = gen_label_rtx ();
4637                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4638                                  compute_mode, label);
4639                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4640                                     NULL_RTX, 0, OPTAB_WIDEN);
4641                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4642                 expand_inc (quotient, const1_rtx);
4643                 expand_dec (remainder, op1);
4644                 emit_label (label);
4645                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4646               }
4647
4648             /* No luck with division elimination or divmod.  Have to do it
4649                by conditionally adjusting op0 *and* the result.  */
4650             {
4651               rtx label1, label2, label3, label4, label5;
4652               rtx adjusted_op0;
4653               rtx tem;
4654
4655               quotient = gen_reg_rtx (compute_mode);
4656               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4657               label1 = gen_label_rtx ();
4658               label2 = gen_label_rtx ();
4659               label3 = gen_label_rtx ();
4660               label4 = gen_label_rtx ();
4661               label5 = gen_label_rtx ();
4662               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4663               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4664                                compute_mode, label1);
4665               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4666                                   quotient, 0, OPTAB_LIB_WIDEN);
4667               if (tem != quotient)
4668                 emit_move_insn (quotient, tem);
4669               emit_jump_insn (gen_jump (label5));
4670               emit_barrier ();
4671               emit_label (label1);
4672               expand_dec (adjusted_op0, const1_rtx);
4673               emit_jump_insn (gen_jump (label4));
4674               emit_barrier ();
4675               emit_label (label2);
4676               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4677                                compute_mode, label3);
4678               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4679                                   quotient, 0, OPTAB_LIB_WIDEN);
4680               if (tem != quotient)
4681                 emit_move_insn (quotient, tem);
4682               emit_jump_insn (gen_jump (label5));
4683               emit_barrier ();
4684               emit_label (label3);
4685               expand_inc (adjusted_op0, const1_rtx);
4686               emit_label (label4);
4687               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4688                                   quotient, 0, OPTAB_LIB_WIDEN);
4689               if (tem != quotient)
4690                 emit_move_insn (quotient, tem);
4691               expand_inc (quotient, const1_rtx);
4692               emit_label (label5);
4693             }
4694           }
4695         break;
4696
4697       case EXACT_DIV_EXPR:
4698         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4699           {
4700             HOST_WIDE_INT d = INTVAL (op1);
4701             unsigned HOST_WIDE_INT ml;
4702             int pre_shift;
4703             rtx t1;
4704
4705             pre_shift = floor_log2 (d & -d);
4706             ml = invert_mod2n (d >> pre_shift, size);
4707             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4708                                pre_shift, NULL_RTX, unsignedp);
4709             quotient = expand_mult (compute_mode, t1,
4710                                     gen_int_mode (ml, compute_mode),
4711                                     NULL_RTX, 1);
4712
4713             insn = get_last_insn ();
4714             set_dst_reg_note (insn, REG_EQUAL,
4715                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4716                                               compute_mode, op0, op1),
4717                               quotient);
4718           }
4719         break;
4720
4721       case ROUND_DIV_EXPR:
4722       case ROUND_MOD_EXPR:
4723         if (unsignedp)
4724           {
4725             rtx tem;
4726             rtx label;
4727             label = gen_label_rtx ();
4728             quotient = gen_reg_rtx (compute_mode);
4729             remainder = gen_reg_rtx (compute_mode);
4730             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4731               {
4732                 rtx tem;
4733                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4734                                          quotient, 1, OPTAB_LIB_WIDEN);
4735                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4736                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4737                                           remainder, 1, OPTAB_LIB_WIDEN);
4738               }
4739             tem = plus_constant (compute_mode, op1, -1);
4740             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4741             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4742             expand_inc (quotient, const1_rtx);
4743             expand_dec (remainder, op1);
4744             emit_label (label);
4745           }
4746         else
4747           {
4748             rtx abs_rem, abs_op1, tem, mask;
4749             rtx label;
4750             label = gen_label_rtx ();
4751             quotient = gen_reg_rtx (compute_mode);
4752             remainder = gen_reg_rtx (compute_mode);
4753             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4754               {
4755                 rtx tem;
4756                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4757                                          quotient, 0, OPTAB_LIB_WIDEN);
4758                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4759                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4760                                           remainder, 0, OPTAB_LIB_WIDEN);
4761               }
4762             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4763             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4764             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4765                                 1, NULL_RTX, 1);
4766             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4767             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4768                                 NULL_RTX, 0, OPTAB_WIDEN);
4769             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4770                                  size - 1, NULL_RTX, 0);
4771             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4772                                 NULL_RTX, 0, OPTAB_WIDEN);
4773             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4774                                 NULL_RTX, 0, OPTAB_WIDEN);
4775             expand_inc (quotient, tem);
4776             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4777                                 NULL_RTX, 0, OPTAB_WIDEN);
4778             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4779                                 NULL_RTX, 0, OPTAB_WIDEN);
4780             expand_dec (remainder, tem);
4781             emit_label (label);
4782           }
4783         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4784
4785       default:
4786         gcc_unreachable ();
4787       }
4788
4789   if (quotient == 0)
4790     {
4791       if (target && GET_MODE (target) != compute_mode)
4792         target = 0;
4793
4794       if (rem_flag)
4795         {
4796           /* Try to produce the remainder without producing the quotient.
4797              If we seem to have a divmod pattern that does not require widening,
4798              don't try widening here.  We should really have a WIDEN argument
4799              to expand_twoval_binop, since what we'd really like to do here is
4800              1) try a mod insn in compute_mode
4801              2) try a divmod insn in compute_mode
4802              3) try a div insn in compute_mode and multiply-subtract to get
4803                 remainder
4804              4) try the same things with widening allowed.  */
4805           remainder
4806             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4807                                  op0, op1, target,
4808                                  unsignedp,
4809                                  ((optab_handler (optab2, compute_mode)
4810                                    != CODE_FOR_nothing)
4811                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4812           if (remainder == 0)
4813             {
4814               /* No luck there.  Can we do remainder and divide at once
4815                  without a library call?  */
4816               remainder = gen_reg_rtx (compute_mode);
4817               if (! expand_twoval_binop ((unsignedp
4818                                           ? udivmod_optab
4819                                           : sdivmod_optab),
4820                                          op0, op1,
4821                                          NULL_RTX, remainder, unsignedp))
4822                 remainder = 0;
4823             }
4824
4825           if (remainder)
4826             return gen_lowpart (mode, remainder);
4827         }
4828
4829       /* Produce the quotient.  Try a quotient insn, but not a library call.
4830          If we have a divmod in this mode, use it in preference to widening
4831          the div (for this test we assume it will not fail). Note that optab2
4832          is set to the one of the two optabs that the call below will use.  */
4833       quotient
4834         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4835                              op0, op1, rem_flag ? NULL_RTX : target,
4836                              unsignedp,
4837                              ((optab_handler (optab2, compute_mode)
4838                                != CODE_FOR_nothing)
4839                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4840
4841       if (quotient == 0)
4842         {
4843           /* No luck there.  Try a quotient-and-remainder insn,
4844              keeping the quotient alone.  */
4845           quotient = gen_reg_rtx (compute_mode);
4846           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4847                                      op0, op1,
4848                                      quotient, NULL_RTX, unsignedp))
4849             {
4850               quotient = 0;
4851               if (! rem_flag)
4852                 /* Still no luck.  If we are not computing the remainder,
4853                    use a library call for the quotient.  */
4854                 quotient = sign_expand_binop (compute_mode,
4855                                               udiv_optab, sdiv_optab,
4856                                               op0, op1, target,
4857                                               unsignedp, OPTAB_LIB_WIDEN);
4858             }
4859         }
4860     }
4861
4862   if (rem_flag)
4863     {
4864       if (target && GET_MODE (target) != compute_mode)
4865         target = 0;
4866
4867       if (quotient == 0)
4868         {
4869           /* No divide instruction either.  Use library for remainder.  */
4870           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4871                                          op0, op1, target,
4872                                          unsignedp, OPTAB_LIB_WIDEN);
4873           /* No remainder function.  Try a quotient-and-remainder
4874              function, keeping the remainder.  */
4875           if (!remainder)
4876             {
4877               remainder = gen_reg_rtx (compute_mode);
4878               if (!expand_twoval_binop_libfunc
4879                   (unsignedp ? udivmod_optab : sdivmod_optab,
4880                    op0, op1,
4881                    NULL_RTX, remainder,
4882                    unsignedp ? UMOD : MOD))
4883                 remainder = NULL_RTX;
4884             }
4885         }
4886       else
4887         {
4888           /* We divided.  Now finish doing X - Y * (X / Y).  */
4889           remainder = expand_mult (compute_mode, quotient, op1,
4890                                    NULL_RTX, unsignedp);
4891           remainder = expand_binop (compute_mode, sub_optab, op0,
4892                                     remainder, target, unsignedp,
4893                                     OPTAB_LIB_WIDEN);
4894         }
4895     }
4896
4897   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4898 }
4899 \f
4900 /* Return a tree node with data type TYPE, describing the value of X.
4901    Usually this is an VAR_DECL, if there is no obvious better choice.
4902    X may be an expression, however we only support those expressions
4903    generated by loop.c.  */
4904
4905 tree
4906 make_tree (tree type, rtx x)
4907 {
4908   tree t;
4909
4910   switch (GET_CODE (x))
4911     {
4912     case CONST_INT:
4913       {
4914         HOST_WIDE_INT hi = 0;
4915
4916         if (INTVAL (x) < 0
4917             && !(TYPE_UNSIGNED (type)
4918                  && (GET_MODE_BITSIZE (TYPE_MODE (type))
4919                      < HOST_BITS_PER_WIDE_INT)))
4920           hi = -1;
4921
4922         t = build_int_cst_wide (type, INTVAL (x), hi);
4923
4924         return t;
4925       }
4926
4927     case CONST_DOUBLE:
4928       if (GET_MODE (x) == VOIDmode)
4929         t = build_int_cst_wide (type,
4930                                 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4931       else
4932         {
4933           REAL_VALUE_TYPE d;
4934
4935           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4936           t = build_real (type, d);
4937         }
4938
4939       return t;
4940
4941     case CONST_VECTOR:
4942       {
4943         int units = CONST_VECTOR_NUNITS (x);
4944         tree itype = TREE_TYPE (type);
4945         tree *elts;
4946         int i;
4947
4948         /* Build a tree with vector elements.  */
4949         elts = XALLOCAVEC (tree, units);
4950         for (i = units - 1; i >= 0; --i)
4951           {
4952             rtx elt = CONST_VECTOR_ELT (x, i);
4953             elts[i] = make_tree (itype, elt);
4954           }
4955
4956         return build_vector (type, elts);
4957       }
4958
4959     case PLUS:
4960       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4961                           make_tree (type, XEXP (x, 1)));
4962
4963     case MINUS:
4964       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4965                           make_tree (type, XEXP (x, 1)));
4966
4967     case NEG:
4968       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
4969
4970     case MULT:
4971       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4972                           make_tree (type, XEXP (x, 1)));
4973
4974     case ASHIFT:
4975       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4976                           make_tree (type, XEXP (x, 1)));
4977
4978     case LSHIFTRT:
4979       t = unsigned_type_for (type);
4980       return fold_convert (type, build2 (RSHIFT_EXPR, t,
4981                                          make_tree (t, XEXP (x, 0)),
4982                                          make_tree (type, XEXP (x, 1))));
4983
4984     case ASHIFTRT:
4985       t = signed_type_for (type);
4986       return fold_convert (type, build2 (RSHIFT_EXPR, t,
4987                                          make_tree (t, XEXP (x, 0)),
4988                                          make_tree (type, XEXP (x, 1))));
4989
4990     case DIV:
4991       if (TREE_CODE (type) != REAL_TYPE)
4992         t = signed_type_for (type);
4993       else
4994         t = type;
4995
4996       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
4997                                          make_tree (t, XEXP (x, 0)),
4998                                          make_tree (t, XEXP (x, 1))));
4999     case UDIV:
5000       t = unsigned_type_for (type);
5001       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5002                                          make_tree (t, XEXP (x, 0)),
5003                                          make_tree (t, XEXP (x, 1))));
5004
5005     case SIGN_EXTEND:
5006     case ZERO_EXTEND:
5007       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5008                                           GET_CODE (x) == ZERO_EXTEND);
5009       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5010
5011     case CONST:
5012       return make_tree (type, XEXP (x, 0));
5013
5014     case SYMBOL_REF:
5015       t = SYMBOL_REF_DECL (x);
5016       if (t)
5017         return fold_convert (type, build_fold_addr_expr (t));
5018       /* else fall through.  */
5019
5020     default:
5021       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5022
5023       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5024          address mode to pointer mode.  */
5025       if (POINTER_TYPE_P (type))
5026         x = convert_memory_address_addr_space
5027               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5028
5029       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5030          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5031       t->decl_with_rtl.rtl = x;
5032
5033       return t;
5034     }
5035 }
5036 \f
5037 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5038    and returning TARGET.
5039
5040    If TARGET is 0, a pseudo-register or constant is returned.  */
5041
5042 rtx
5043 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5044 {
5045   rtx tem = 0;
5046
5047   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5048     tem = simplify_binary_operation (AND, mode, op0, op1);
5049   if (tem == 0)
5050     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5051
5052   if (target == 0)
5053     target = tem;
5054   else if (tem != target)
5055     emit_move_insn (target, tem);
5056   return target;
5057 }
5058
5059 /* Helper function for emit_store_flag.  */
5060 static rtx
5061 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5062              enum machine_mode mode, enum machine_mode compare_mode,
5063              int unsignedp, rtx x, rtx y, int normalizep,
5064              enum machine_mode target_mode)
5065 {
5066   struct expand_operand ops[4];
5067   rtx op0, last, comparison, subtarget;
5068   enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode;
5069
5070   last = get_last_insn ();
5071   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5072   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5073   if (!x || !y)
5074     {
5075       delete_insns_since (last);
5076       return NULL_RTX;
5077     }
5078
5079   if (target_mode == VOIDmode)
5080     target_mode = result_mode;
5081   if (!target)
5082     target = gen_reg_rtx (target_mode);
5083
5084   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5085
5086   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5087   create_fixed_operand (&ops[1], comparison);
5088   create_fixed_operand (&ops[2], x);
5089   create_fixed_operand (&ops[3], y);
5090   if (!maybe_expand_insn (icode, 4, ops))
5091     {
5092       delete_insns_since (last);
5093       return NULL_RTX;
5094     }
5095   subtarget = ops[0].value;
5096
5097   /* If we are converting to a wider mode, first convert to
5098      TARGET_MODE, then normalize.  This produces better combining
5099      opportunities on machines that have a SIGN_EXTRACT when we are
5100      testing a single bit.  This mostly benefits the 68k.
5101
5102      If STORE_FLAG_VALUE does not have the sign bit set when
5103      interpreted in MODE, we can do this conversion as unsigned, which
5104      is usually more efficient.  */
5105   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5106     {
5107       convert_move (target, subtarget,
5108                     val_signbit_known_clear_p (result_mode,
5109                                                STORE_FLAG_VALUE));
5110       op0 = target;
5111       result_mode = target_mode;
5112     }
5113   else
5114     op0 = subtarget;
5115
5116   /* If we want to keep subexpressions around, don't reuse our last
5117      target.  */
5118   if (optimize)
5119     subtarget = 0;
5120
5121   /* Now normalize to the proper value in MODE.  Sometimes we don't
5122      have to do anything.  */
5123   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5124     ;
5125   /* STORE_FLAG_VALUE might be the most negative number, so write
5126      the comparison this way to avoid a compiler-time warning.  */
5127   else if (- normalizep == STORE_FLAG_VALUE)
5128     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5129
5130   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5131      it hard to use a value of just the sign bit due to ANSI integer
5132      constant typing rules.  */
5133   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5134     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5135                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5136                         normalizep == 1);
5137   else
5138     {
5139       gcc_assert (STORE_FLAG_VALUE & 1);
5140
5141       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5142       if (normalizep == -1)
5143         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5144     }
5145
5146   /* If we were converting to a smaller mode, do the conversion now.  */
5147   if (target_mode != result_mode)
5148     {
5149       convert_move (target, op0, 0);
5150       return target;
5151     }
5152   else
5153     return op0;
5154 }
5155
5156
5157 /* A subroutine of emit_store_flag only including "tricks" that do not
5158    need a recursive call.  These are kept separate to avoid infinite
5159    loops.  */
5160
5161 static rtx
5162 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5163                    enum machine_mode mode, int unsignedp, int normalizep,
5164                    enum machine_mode target_mode)
5165 {
5166   rtx subtarget;
5167   enum insn_code icode;
5168   enum machine_mode compare_mode;
5169   enum mode_class mclass;
5170   enum rtx_code scode;
5171   rtx tem;
5172
5173   if (unsignedp)
5174     code = unsigned_condition (code);
5175   scode = swap_condition (code);
5176
5177   /* If one operand is constant, make it the second one.  Only do this
5178      if the other operand is not constant as well.  */
5179
5180   if (swap_commutative_operands_p (op0, op1))
5181     {
5182       tem = op0;
5183       op0 = op1;
5184       op1 = tem;
5185       code = swap_condition (code);
5186     }
5187
5188   if (mode == VOIDmode)
5189     mode = GET_MODE (op0);
5190
5191   /* For some comparisons with 1 and -1, we can convert this to
5192      comparisons with zero.  This will often produce more opportunities for
5193      store-flag insns.  */
5194
5195   switch (code)
5196     {
5197     case LT:
5198       if (op1 == const1_rtx)
5199         op1 = const0_rtx, code = LE;
5200       break;
5201     case LE:
5202       if (op1 == constm1_rtx)
5203         op1 = const0_rtx, code = LT;
5204       break;
5205     case GE:
5206       if (op1 == const1_rtx)
5207         op1 = const0_rtx, code = GT;
5208       break;
5209     case GT:
5210       if (op1 == constm1_rtx)
5211         op1 = const0_rtx, code = GE;
5212       break;
5213     case GEU:
5214       if (op1 == const1_rtx)
5215         op1 = const0_rtx, code = NE;
5216       break;
5217     case LTU:
5218       if (op1 == const1_rtx)
5219         op1 = const0_rtx, code = EQ;
5220       break;
5221     default:
5222       break;
5223     }
5224
5225   /* If we are comparing a double-word integer with zero or -1, we can
5226      convert the comparison into one involving a single word.  */
5227   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5228       && GET_MODE_CLASS (mode) == MODE_INT
5229       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5230     {
5231       if ((code == EQ || code == NE)
5232           && (op1 == const0_rtx || op1 == constm1_rtx))
5233         {
5234           rtx op00, op01;
5235
5236           /* Do a logical OR or AND of the two words and compare the
5237              result.  */
5238           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5239           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5240           tem = expand_binop (word_mode,
5241                               op1 == const0_rtx ? ior_optab : and_optab,
5242                               op00, op01, NULL_RTX, unsignedp,
5243                               OPTAB_DIRECT);
5244
5245           if (tem != 0)
5246             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5247                                    unsignedp, normalizep);
5248         }
5249       else if ((code == LT || code == GE) && op1 == const0_rtx)
5250         {
5251           rtx op0h;
5252
5253           /* If testing the sign bit, can just test on high word.  */
5254           op0h = simplify_gen_subreg (word_mode, op0, mode,
5255                                       subreg_highpart_offset (word_mode,
5256                                                               mode));
5257           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5258                                  unsignedp, normalizep);
5259         }
5260       else
5261         tem = NULL_RTX;
5262
5263       if (tem)
5264         {
5265           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5266             return tem;
5267           if (!target)
5268             target = gen_reg_rtx (target_mode);
5269
5270           convert_move (target, tem,
5271                         !val_signbit_known_set_p (word_mode,
5272                                                   (normalizep ? normalizep
5273                                                    : STORE_FLAG_VALUE)));
5274           return target;
5275         }
5276     }
5277
5278   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5279      complement of A (for GE) and shifting the sign bit to the low bit.  */
5280   if (op1 == const0_rtx && (code == LT || code == GE)
5281       && GET_MODE_CLASS (mode) == MODE_INT
5282       && (normalizep || STORE_FLAG_VALUE == 1
5283           || val_signbit_p (mode, STORE_FLAG_VALUE)))
5284     {
5285       subtarget = target;
5286
5287       if (!target)
5288         target_mode = mode;
5289
5290       /* If the result is to be wider than OP0, it is best to convert it
5291          first.  If it is to be narrower, it is *incorrect* to convert it
5292          first.  */
5293       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5294         {
5295           op0 = convert_modes (target_mode, mode, op0, 0);
5296           mode = target_mode;
5297         }
5298
5299       if (target_mode != mode)
5300         subtarget = 0;
5301
5302       if (code == GE)
5303         op0 = expand_unop (mode, one_cmpl_optab, op0,
5304                            ((STORE_FLAG_VALUE == 1 || normalizep)
5305                             ? 0 : subtarget), 0);
5306
5307       if (STORE_FLAG_VALUE == 1 || normalizep)
5308         /* If we are supposed to produce a 0/1 value, we want to do
5309            a logical shift from the sign bit to the low-order bit; for
5310            a -1/0 value, we do an arithmetic shift.  */
5311         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5312                             GET_MODE_BITSIZE (mode) - 1,
5313                             subtarget, normalizep != -1);
5314
5315       if (mode != target_mode)
5316         op0 = convert_modes (target_mode, mode, op0, 0);
5317
5318       return op0;
5319     }
5320
5321   mclass = GET_MODE_CLASS (mode);
5322   for (compare_mode = mode; compare_mode != VOIDmode;
5323        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5324     {
5325      enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5326      icode = optab_handler (cstore_optab, optab_mode);
5327      if (icode != CODE_FOR_nothing)
5328         {
5329           do_pending_stack_adjust ();
5330           tem = emit_cstore (target, icode, code, mode, compare_mode,
5331                              unsignedp, op0, op1, normalizep, target_mode);
5332           if (tem)
5333             return tem;
5334
5335           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5336             {
5337               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5338                                  unsignedp, op1, op0, normalizep, target_mode);
5339               if (tem)
5340                 return tem;
5341             }
5342           break;
5343         }
5344     }
5345
5346   return 0;
5347 }
5348
5349 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5350    and storing in TARGET.  Normally return TARGET.
5351    Return 0 if that cannot be done.
5352
5353    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5354    it is VOIDmode, they cannot both be CONST_INT.
5355
5356    UNSIGNEDP is for the case where we have to widen the operands
5357    to perform the operation.  It says to use zero-extension.
5358
5359    NORMALIZEP is 1 if we should convert the result to be either zero
5360    or one.  Normalize is -1 if we should convert the result to be
5361    either zero or -1.  If NORMALIZEP is zero, the result will be left
5362    "raw" out of the scc insn.  */
5363
5364 rtx
5365 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5366                  enum machine_mode mode, int unsignedp, int normalizep)
5367 {
5368   enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5369   enum rtx_code rcode;
5370   rtx subtarget;
5371   rtx tem, last, trueval;
5372
5373   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5374                            target_mode);
5375   if (tem)
5376     return tem;
5377
5378   /* If we reached here, we can't do this with a scc insn, however there
5379      are some comparisons that can be done in other ways.  Don't do any
5380      of these cases if branches are very cheap.  */
5381   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5382     return 0;
5383
5384   /* See what we need to return.  We can only return a 1, -1, or the
5385      sign bit.  */
5386
5387   if (normalizep == 0)
5388     {
5389       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5390         normalizep = STORE_FLAG_VALUE;
5391
5392       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5393         ;
5394       else
5395         return 0;
5396     }
5397
5398   last = get_last_insn ();
5399
5400   /* If optimizing, use different pseudo registers for each insn, instead
5401      of reusing the same pseudo.  This leads to better CSE, but slows
5402      down the compiler, since there are more pseudos */
5403   subtarget = (!optimize
5404                && (target_mode == mode)) ? target : NULL_RTX;
5405   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5406
5407   /* For floating-point comparisons, try the reverse comparison or try
5408      changing the "orderedness" of the comparison.  */
5409   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5410     {
5411       enum rtx_code first_code;
5412       bool and_them;
5413
5414       rcode = reverse_condition_maybe_unordered (code);
5415       if (can_compare_p (rcode, mode, ccp_store_flag)
5416           && (code == ORDERED || code == UNORDERED
5417               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5418               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5419         {
5420           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5421                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5422
5423           /* For the reverse comparison, use either an addition or a XOR.  */
5424           if (want_add
5425               && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5426                            optimize_insn_for_speed_p ()) == 0)
5427             {
5428               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5429                                        STORE_FLAG_VALUE, target_mode);
5430               if (tem)
5431                 return expand_binop (target_mode, add_optab, tem,
5432                                      GEN_INT (normalizep),
5433                                      target, 0, OPTAB_WIDEN);
5434             }
5435           else if (!want_add
5436                    && rtx_cost (trueval, XOR, 1,
5437                                 optimize_insn_for_speed_p ()) == 0)
5438             {
5439               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5440                                        normalizep, target_mode);
5441               if (tem)
5442                 return expand_binop (target_mode, xor_optab, tem, trueval,
5443                                      target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5444             }
5445         }
5446
5447       delete_insns_since (last);
5448
5449       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5450       if (code == ORDERED || code == UNORDERED)
5451         return 0;
5452
5453       and_them = split_comparison (code, mode, &first_code, &code);
5454
5455       /* If there are no NaNs, the first comparison should always fall through.
5456          Effectively change the comparison to the other one.  */
5457       if (!HONOR_NANS (mode))
5458         {
5459           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5460           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5461                                     target_mode);
5462         }
5463
5464 #ifdef HAVE_conditional_move
5465       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5466          conditional move.  */
5467       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5468                                normalizep, target_mode);
5469       if (tem == 0)
5470         return 0;
5471
5472       if (and_them)
5473         tem = emit_conditional_move (target, code, op0, op1, mode,
5474                                      tem, const0_rtx, GET_MODE (tem), 0);
5475       else
5476         tem = emit_conditional_move (target, code, op0, op1, mode,
5477                                      trueval, tem, GET_MODE (tem), 0);
5478
5479       if (tem == 0)
5480         delete_insns_since (last);
5481       return tem;
5482 #else
5483       return 0;
5484 #endif
5485     }
5486
5487   /* The remaining tricks only apply to integer comparisons.  */
5488
5489   if (GET_MODE_CLASS (mode) != MODE_INT)
5490     return 0;
5491
5492   /* If this is an equality comparison of integers, we can try to exclusive-or
5493      (or subtract) the two operands and use a recursive call to try the
5494      comparison with zero.  Don't do any of these cases if branches are
5495      very cheap.  */
5496
5497   if ((code == EQ || code == NE) && op1 != const0_rtx)
5498     {
5499       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5500                           OPTAB_WIDEN);
5501
5502       if (tem == 0)
5503         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5504                             OPTAB_WIDEN);
5505       if (tem != 0)
5506         tem = emit_store_flag (target, code, tem, const0_rtx,
5507                                mode, unsignedp, normalizep);
5508       if (tem != 0)
5509         return tem;
5510
5511       delete_insns_since (last);
5512     }
5513
5514   /* For integer comparisons, try the reverse comparison.  However, for
5515      small X and if we'd have anyway to extend, implementing "X != 0"
5516      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5517   rcode = reverse_condition (code);
5518   if (can_compare_p (rcode, mode, ccp_store_flag)
5519       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5520             && code == NE
5521             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5522             && op1 == const0_rtx))
5523     {
5524       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5525                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5526
5527       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5528       if (want_add
5529           && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5530                        optimize_insn_for_speed_p ()) == 0)
5531         {
5532           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5533                                    STORE_FLAG_VALUE, target_mode);
5534           if (tem != 0)
5535             tem = expand_binop (target_mode, add_optab, tem,
5536                                 GEN_INT (normalizep), target, 0, OPTAB_WIDEN);
5537         }
5538       else if (!want_add
5539                && rtx_cost (trueval, XOR, 1,
5540                             optimize_insn_for_speed_p ()) == 0)
5541         {
5542           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5543                                    normalizep, target_mode);
5544           if (tem != 0)
5545             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5546                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5547         }
5548
5549       if (tem != 0)
5550         return tem;
5551       delete_insns_since (last);
5552     }
5553
5554   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5555      the constant zero.  Reject all other comparisons at this point.  Only
5556      do LE and GT if branches are expensive since they are expensive on
5557      2-operand machines.  */
5558
5559   if (op1 != const0_rtx
5560       || (code != EQ && code != NE
5561           && (BRANCH_COST (optimize_insn_for_speed_p (),
5562                            false) <= 1 || (code != LE && code != GT))))
5563     return 0;
5564
5565   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5566      do the necessary operation below.  */
5567
5568   tem = 0;
5569
5570   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5571      the sign bit set.  */
5572
5573   if (code == LE)
5574     {
5575       /* This is destructive, so SUBTARGET can't be OP0.  */
5576       if (rtx_equal_p (subtarget, op0))
5577         subtarget = 0;
5578
5579       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5580                           OPTAB_WIDEN);
5581       if (tem)
5582         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5583                             OPTAB_WIDEN);
5584     }
5585
5586   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5587      number of bits in the mode of OP0, minus one.  */
5588
5589   if (code == GT)
5590     {
5591       if (rtx_equal_p (subtarget, op0))
5592         subtarget = 0;
5593
5594       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5595                           GET_MODE_BITSIZE (mode) - 1,
5596                           subtarget, 0);
5597       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5598                           OPTAB_WIDEN);
5599     }
5600
5601   if (code == EQ || code == NE)
5602     {
5603       /* For EQ or NE, one way to do the comparison is to apply an operation
5604          that converts the operand into a positive number if it is nonzero
5605          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5606          for NE we negate.  This puts the result in the sign bit.  Then we
5607          normalize with a shift, if needed.
5608
5609          Two operations that can do the above actions are ABS and FFS, so try
5610          them.  If that doesn't work, and MODE is smaller than a full word,
5611          we can use zero-extension to the wider mode (an unsigned conversion)
5612          as the operation.  */
5613
5614       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5615          that is compensated by the subsequent overflow when subtracting
5616          one / negating.  */
5617
5618       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5619         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5620       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5621         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5622       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5623         {
5624           tem = convert_modes (word_mode, mode, op0, 1);
5625           mode = word_mode;
5626         }
5627
5628       if (tem != 0)
5629         {
5630           if (code == EQ)
5631             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5632                                 0, OPTAB_WIDEN);
5633           else
5634             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5635         }
5636
5637       /* If we couldn't do it that way, for NE we can "or" the two's complement
5638          of the value with itself.  For EQ, we take the one's complement of
5639          that "or", which is an extra insn, so we only handle EQ if branches
5640          are expensive.  */
5641
5642       if (tem == 0
5643           && (code == NE
5644               || BRANCH_COST (optimize_insn_for_speed_p (),
5645                               false) > 1))
5646         {
5647           if (rtx_equal_p (subtarget, op0))
5648             subtarget = 0;
5649
5650           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5651           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5652                               OPTAB_WIDEN);
5653
5654           if (tem && code == EQ)
5655             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5656         }
5657     }
5658
5659   if (tem && normalizep)
5660     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5661                         GET_MODE_BITSIZE (mode) - 1,
5662                         subtarget, normalizep == 1);
5663
5664   if (tem)
5665     {
5666       if (!target)
5667         ;
5668       else if (GET_MODE (tem) != target_mode)
5669         {
5670           convert_move (target, tem, 0);
5671           tem = target;
5672         }
5673       else if (!subtarget)
5674         {
5675           emit_move_insn (target, tem);
5676           tem = target;
5677         }
5678     }
5679   else
5680     delete_insns_since (last);
5681
5682   return tem;
5683 }
5684
5685 /* Like emit_store_flag, but always succeeds.  */
5686
5687 rtx
5688 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5689                        enum machine_mode mode, int unsignedp, int normalizep)
5690 {
5691   rtx tem, label;
5692   rtx trueval, falseval;
5693
5694   /* First see if emit_store_flag can do the job.  */
5695   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5696   if (tem != 0)
5697     return tem;
5698
5699   if (!target)
5700     target = gen_reg_rtx (word_mode);
5701
5702   /* If this failed, we have to do this with set/compare/jump/set code.
5703      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5704   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5705   if (code == NE
5706       && GET_MODE_CLASS (mode) == MODE_INT
5707       && REG_P (target)
5708       && op0 == target
5709       && op1 == const0_rtx)
5710     {
5711       label = gen_label_rtx ();
5712       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5713                                mode, NULL_RTX, NULL_RTX, label, -1);
5714       emit_move_insn (target, trueval);
5715       emit_label (label);
5716       return target;
5717     }
5718
5719   if (!REG_P (target)
5720       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5721     target = gen_reg_rtx (GET_MODE (target));
5722
5723   /* Jump in the right direction if the target cannot implement CODE
5724      but can jump on its reverse condition.  */
5725   falseval = const0_rtx;
5726   if (! can_compare_p (code, mode, ccp_jump)
5727       && (! FLOAT_MODE_P (mode)
5728           || code == ORDERED || code == UNORDERED
5729           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5730           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5731     {
5732       enum rtx_code rcode;
5733       if (FLOAT_MODE_P (mode))
5734         rcode = reverse_condition_maybe_unordered (code);
5735       else
5736         rcode = reverse_condition (code);
5737
5738       /* Canonicalize to UNORDERED for the libcall.  */
5739       if (can_compare_p (rcode, mode, ccp_jump)
5740           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5741         {
5742           falseval = trueval;
5743           trueval = const0_rtx;
5744           code = rcode;
5745         }
5746     }
5747
5748   emit_move_insn (target, trueval);
5749   label = gen_label_rtx ();
5750   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5751                            NULL_RTX, label, -1);
5752
5753   emit_move_insn (target, falseval);
5754   emit_label (label);
5755
5756   return target;
5757 }
5758 \f
5759 /* Perform possibly multi-word comparison and conditional jump to LABEL
5760    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5761    now a thin wrapper around do_compare_rtx_and_jump.  */
5762
5763 static void
5764 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5765                  rtx label)
5766 {
5767   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5768   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5769                            NULL_RTX, NULL_RTX, label, -1);
5770 }