gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
   5    2011
   6    Free Software Foundation, Inc.
   7
   8 This file is part of GCC.
   9
  10 GCC is free software; you can redistribute it and/or modify it under
  11 the terms of the GNU General Public License as published by the Free
  12 Software Foundation; either version 3, or (at your option) any later
  13 version.
  14
  15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  17 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  18 for more details.
  19
  20 You should have received a copy of the GNU General Public License
  21 along with GCC; see the file COPYING3.  If not see
  22 <http://www.gnu.org/licenses/>.  */
  23
  24
  25 #include "config.h"
  26 #include "system.h"
  27 #include "coretypes.h"
  28 #include "tm.h"
  29 #include "diagnostic-core.h"
  30 #include "rtl.h"
  31 #include "tree.h"
  32 #include "tm_p.h"
  33 #include "flags.h"
  34 #include "insn-config.h"
  35 #include "expr.h"
  36 #include "optabs.h"
  37 #include "recog.h"
  38 #include "langhooks.h"
  39 #include "df.h"
  40 #include "target.h"
  41 #include "expmed.h"
  42
  43 struct target_expmed default_target_expmed;
  44 #if SWITCHABLE_TARGET
  45 struct target_expmed *this_target_expmed = &default_target_expmed;
  46 #endif
  47
  48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  49                                    unsigned HOST_WIDE_INT,
  50                                    unsigned HOST_WIDE_INT, rtx);
  51 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  52                                    unsigned HOST_WIDE_INT, rtx);
  53 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  54                                     unsigned HOST_WIDE_INT,
  55                                     unsigned HOST_WIDE_INT,
  56                                     unsigned HOST_WIDE_INT, rtx, int, bool);
  57 static rtx mask_rtx (enum machine_mode, int, int, int);
  58 static rtx lshift_value (enum machine_mode, rtx, int, int);
  59 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  60                                     unsigned HOST_WIDE_INT, int);
  61 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  62 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  63 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  64
  65 /* Test whether a value is zero of a power of two.  */
  66 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
  67
  68 #ifndef SLOW_UNALIGNED_ACCESS
  69 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  70 #endif
  71
  72
  73 /* Reduce conditional compilation elsewhere.  */
  74 #ifndef HAVE_insv
  75 #define HAVE_insv       0
  76 #define CODE_FOR_insv   CODE_FOR_nothing
  77 #define gen_insv(a,b,c,d) NULL_RTX
  78 #endif
  79 #ifndef HAVE_extv
  80 #define HAVE_extv       0
  81 #define CODE_FOR_extv   CODE_FOR_nothing
  82 #define gen_extv(a,b,c,d) NULL_RTX
  83 #endif
  84 #ifndef HAVE_extzv
  85 #define HAVE_extzv      0
  86 #define CODE_FOR_extzv  CODE_FOR_nothing
  87 #define gen_extzv(a,b,c,d) NULL_RTX
  88 #endif
  89
  90 void
  91 init_expmed (void)
  92 {
  93   struct
  94   {
  95     struct rtx_def reg;         rtunion reg_fld[2];
  96     struct rtx_def plus;        rtunion plus_fld1;
  97     struct rtx_def neg;
  98     struct rtx_def mult;        rtunion mult_fld1;
  99     struct rtx_def sdiv;        rtunion sdiv_fld1;
 100     struct rtx_def udiv;        rtunion udiv_fld1;
 101     struct rtx_def zext;
 102     struct rtx_def sdiv_32;     rtunion sdiv_32_fld1;
 103     struct rtx_def smod_32;     rtunion smod_32_fld1;
 104     struct rtx_def wide_mult;   rtunion wide_mult_fld1;
 105     struct rtx_def wide_lshr;   rtunion wide_lshr_fld1;
 106     struct rtx_def wide_trunc;
 107     struct rtx_def shift;       rtunion shift_fld1;
 108     struct rtx_def shift_mult;  rtunion shift_mult_fld1;
 109     struct rtx_def shift_add;   rtunion shift_add_fld1;
 110     struct rtx_def shift_sub0;  rtunion shift_sub0_fld1;
 111     struct rtx_def shift_sub1;  rtunion shift_sub1_fld1;
 112   } all;
 113
 114   rtx pow2[MAX_BITS_PER_WORD];
 115   rtx cint[MAX_BITS_PER_WORD];
 116   int m, n;
 117   enum machine_mode mode, wider_mode;
 118   int speed;
 119
 120
 121   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 122     {
 123       pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 124       cint[m] = GEN_INT (m);
 125     }
 126   memset (&all, 0, sizeof all);
 127
 128   PUT_CODE (&all.reg, REG);
 129   /* Avoid using hard regs in ways which may be unsupported.  */
 130   SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
 131
 132   PUT_CODE (&all.plus, PLUS);
 133   XEXP (&all.plus, 0) = &all.reg;
 134   XEXP (&all.plus, 1) = &all.reg;
 135
 136   PUT_CODE (&all.neg, NEG);
 137   XEXP (&all.neg, 0) = &all.reg;
 138
 139   PUT_CODE (&all.mult, MULT);
 140   XEXP (&all.mult, 0) = &all.reg;
 141   XEXP (&all.mult, 1) = &all.reg;
 142
 143   PUT_CODE (&all.sdiv, DIV);
 144   XEXP (&all.sdiv, 0) = &all.reg;
 145   XEXP (&all.sdiv, 1) = &all.reg;
 146
 147   PUT_CODE (&all.udiv, UDIV);
 148   XEXP (&all.udiv, 0) = &all.reg;
 149   XEXP (&all.udiv, 1) = &all.reg;
 150
 151   PUT_CODE (&all.sdiv_32, DIV);
 152   XEXP (&all.sdiv_32, 0) = &all.reg;
 153   XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
 154
 155   PUT_CODE (&all.smod_32, MOD);
 156   XEXP (&all.smod_32, 0) = &all.reg;
 157   XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
 158
 159   PUT_CODE (&all.zext, ZERO_EXTEND);
 160   XEXP (&all.zext, 0) = &all.reg;
 161
 162   PUT_CODE (&all.wide_mult, MULT);
 163   XEXP (&all.wide_mult, 0) = &all.zext;
 164   XEXP (&all.wide_mult, 1) = &all.zext;
 165
 166   PUT_CODE (&all.wide_lshr, LSHIFTRT);
 167   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
 168
 169   PUT_CODE (&all.wide_trunc, TRUNCATE);
 170   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
 171
 172   PUT_CODE (&all.shift, ASHIFT);
 173   XEXP (&all.shift, 0) = &all.reg;
 174
 175   PUT_CODE (&all.shift_mult, MULT);
 176   XEXP (&all.shift_mult, 0) = &all.reg;
 177
 178   PUT_CODE (&all.shift_add, PLUS);
 179   XEXP (&all.shift_add, 0) = &all.shift_mult;
 180   XEXP (&all.shift_add, 1) = &all.reg;
 181
 182   PUT_CODE (&all.shift_sub0, MINUS);
 183   XEXP (&all.shift_sub0, 0) = &all.shift_mult;
 184   XEXP (&all.shift_sub0, 1) = &all.reg;
 185
 186   PUT_CODE (&all.shift_sub1, MINUS);
 187   XEXP (&all.shift_sub1, 0) = &all.reg;
 188   XEXP (&all.shift_sub1, 1) = &all.shift_mult;
 189
 190   for (speed = 0; speed < 2; speed++)
 191     {
 192       crtl->maybe_hot_insn_p = speed;
 193       zero_cost[speed] = rtx_cost (const0_rtx, SET, speed);
 194
 195       for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 196            mode != VOIDmode;
 197            mode = GET_MODE_WIDER_MODE (mode))
 198         {
 199           PUT_MODE (&all.reg, mode);
 200           PUT_MODE (&all.plus, mode);
 201           PUT_MODE (&all.neg, mode);
 202           PUT_MODE (&all.mult, mode);
 203           PUT_MODE (&all.sdiv, mode);
 204           PUT_MODE (&all.udiv, mode);
 205           PUT_MODE (&all.sdiv_32, mode);
 206           PUT_MODE (&all.smod_32, mode);
 207           PUT_MODE (&all.wide_trunc, mode);
 208           PUT_MODE (&all.shift, mode);
 209           PUT_MODE (&all.shift_mult, mode);
 210           PUT_MODE (&all.shift_add, mode);
 211           PUT_MODE (&all.shift_sub0, mode);
 212           PUT_MODE (&all.shift_sub1, mode);
 213
 214           add_cost[speed][mode] = rtx_cost (&all.plus, SET, speed);
 215           neg_cost[speed][mode] = rtx_cost (&all.neg, SET, speed);
 216           mul_cost[speed][mode] = rtx_cost (&all.mult, SET, speed);
 217           sdiv_cost[speed][mode] = rtx_cost (&all.sdiv, SET, speed);
 218           udiv_cost[speed][mode] = rtx_cost (&all.udiv, SET, speed);
 219
 220           sdiv_pow2_cheap[speed][mode] = (rtx_cost (&all.sdiv_32, SET, speed)
 221                                           <= 2 * add_cost[speed][mode]);
 222           smod_pow2_cheap[speed][mode] = (rtx_cost (&all.smod_32, SET, speed)
 223                                           <= 4 * add_cost[speed][mode]);
 224
 225           wider_mode = GET_MODE_WIDER_MODE (mode);
 226           if (wider_mode != VOIDmode)
 227             {
 228               PUT_MODE (&all.zext, wider_mode);
 229               PUT_MODE (&all.wide_mult, wider_mode);
 230               PUT_MODE (&all.wide_lshr, wider_mode);
 231               XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
 232
 233               mul_widen_cost[speed][wider_mode]
 234                 = rtx_cost (&all.wide_mult, SET, speed);
 235               mul_highpart_cost[speed][mode]
 236                 = rtx_cost (&all.wide_trunc, SET, speed);
 237             }
 238
 239           shift_cost[speed][mode][0] = 0;
 240           shiftadd_cost[speed][mode][0] = shiftsub0_cost[speed][mode][0]
 241             = shiftsub1_cost[speed][mode][0] = add_cost[speed][mode];
 242
 243           n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
 244           for (m = 1; m < n; m++)
 245             {
 246               XEXP (&all.shift, 1) = cint[m];
 247               XEXP (&all.shift_mult, 1) = pow2[m];
 248
 249               shift_cost[speed][mode][m] = rtx_cost (&all.shift, SET, speed);
 250               shiftadd_cost[speed][mode][m] = rtx_cost (&all.shift_add, SET, speed);
 251               shiftsub0_cost[speed][mode][m] = rtx_cost (&all.shift_sub0, SET, speed);
 252               shiftsub1_cost[speed][mode][m] = rtx_cost (&all.shift_sub1, SET, speed);
 253             }
 254         }
 255     }
 256   if (alg_hash_used_p)
 257     memset (alg_hash, 0, sizeof (alg_hash));
 258   else
 259     alg_hash_used_p = true;
 260   default_rtl_profile ();
 261 }
 262
 263 /* Return an rtx representing minus the value of X.
 264    MODE is the intended mode of the result,
 265    useful if X is a CONST_INT.  */
 266
 267 rtx
 268 negate_rtx (enum machine_mode mode, rtx x)
 269 {
 270   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 271
 272   if (result == 0)
 273     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 274
 275   return result;
 276 }
 277
 278 /* Report on the availability of insv/extv/extzv and the desired mode
 279    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
 280    is false; else the mode of the specified operand.  If OPNO is -1,
 281    all the caller cares about is whether the insn is available.  */
 282 enum machine_mode
 283 mode_for_extraction (enum extraction_pattern pattern, int opno)
 284 {
 285   const struct insn_data_d *data;
 286
 287   switch (pattern)
 288     {
 289     case EP_insv:
 290       if (HAVE_insv)
 291         {
 292           data = &insn_data[CODE_FOR_insv];
 293           break;
 294         }
 295       return MAX_MACHINE_MODE;
 296
 297     case EP_extv:
 298       if (HAVE_extv)
 299         {
 300           data = &insn_data[CODE_FOR_extv];
 301           break;
 302         }
 303       return MAX_MACHINE_MODE;
 304
 305     case EP_extzv:
 306       if (HAVE_extzv)
 307         {
 308           data = &insn_data[CODE_FOR_extzv];
 309           break;
 310         }
 311       return MAX_MACHINE_MODE;
 312
 313     default:
 314       gcc_unreachable ();
 315     }
 316
 317   if (opno == -1)
 318     return VOIDmode;
 319
 320   /* Everyone who uses this function used to follow it with
 321      if (result == VOIDmode) result = word_mode; */
 322   if (data->operand[opno].mode == VOIDmode)
 323     return word_mode;
 324   return data->operand[opno].mode;
 325 }
 326 \f
 327 /* A subroutine of store_bit_field, with the same arguments.  Return true
 328    if the operation could be implemented.
 329
 330    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 331    no other way of implementing the operation.  If FALLBACK_P is false,
 332    return false instead.  */
 333
 334 static bool
 335 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 336                    unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
 337                    rtx value, bool fallback_p)
 338 {
 339   unsigned int unit
 340     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
 341   unsigned HOST_WIDE_INT offset, bitpos;
 342   rtx op0 = str_rtx;
 343   int byte_offset;
 344   rtx orig_value;
 345
 346   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
 347
 348   while (GET_CODE (op0) == SUBREG)
 349     {
 350       /* The following line once was done only if WORDS_BIG_ENDIAN,
 351          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 352          meaningful at a much higher level; when structures are copied
 353          between memory and regs, the higher-numbered regs
 354          always get higher addresses.  */
 355       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 356       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 357
 358       byte_offset = 0;
 359
 360       /* Paradoxical subregs need special handling on big endian machines.  */
 361       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 362         {
 363           int difference = inner_mode_size - outer_mode_size;
 364
 365           if (WORDS_BIG_ENDIAN)
 366             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 367           if (BYTES_BIG_ENDIAN)
 368             byte_offset += difference % UNITS_PER_WORD;
 369         }
 370       else
 371         byte_offset = SUBREG_BYTE (op0);
 372
 373       bitnum += byte_offset * BITS_PER_UNIT;
 374       op0 = SUBREG_REG (op0);
 375     }
 376
 377   /* No action is needed if the target is a register and if the field
 378      lies completely outside that register.  This can occur if the source
 379      code contains an out-of-bounds access to a small array.  */
 380   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 381     return true;
 382
 383   /* Use vec_set patterns for inserting parts of vectors whenever
 384      available.  */
 385   if (VECTOR_MODE_P (GET_MODE (op0))
 386       && !MEM_P (op0)
 387       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
 388       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 389       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 390       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 391     {
 392       struct expand_operand ops[3];
 393       enum machine_mode outermode = GET_MODE (op0);
 394       enum machine_mode innermode = GET_MODE_INNER (outermode);
 395       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 396       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 397
 398       create_fixed_operand (&ops[0], op0);
 399       create_input_operand (&ops[1], value, innermode);
 400       create_integer_operand (&ops[2], pos);
 401       if (maybe_expand_insn (icode, 3, ops))
 402         return true;
 403     }
 404
 405   /* If the target is a register, overwriting the entire object, or storing
 406      a full-word or multi-word field can be done with just a SUBREG.
 407
 408      If the target is memory, storing any naturally aligned field can be
 409      done with a simple store.  For targets that support fast unaligned
 410      memory, any naturally sized, unit aligned field can be done directly.  */
 411
 412   offset = bitnum / unit;
 413   bitpos = bitnum % unit;
 414   byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 415                 + (offset * UNITS_PER_WORD);
 416
 417   if (bitpos == 0
 418       && bitsize == GET_MODE_BITSIZE (fieldmode)
 419       && (!MEM_P (op0)
 420           ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 421               || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
 422              && ((GET_MODE (op0) == fieldmode && byte_offset == 0)
 423                  || validate_subreg (fieldmode, GET_MODE (op0), op0,
 424                                      byte_offset)))
 425           : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 426              || (offset * BITS_PER_UNIT % bitsize == 0
 427                  && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
 428     {
 429       if (MEM_P (op0))
 430         op0 = adjust_address (op0, fieldmode, offset);
 431       else if (GET_MODE (op0) != fieldmode)
 432         op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 433                                    byte_offset);
 434       emit_move_insn (op0, value);
 435       return true;
 436     }
 437
 438   /* Make sure we are playing with integral modes.  Pun with subregs
 439      if we aren't.  This must come after the entire register case above,
 440      since that case is valid for any mode.  The following cases are only
 441      valid for integral modes.  */
 442   {
 443     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 444     if (imode != GET_MODE (op0))
 445       {
 446         if (MEM_P (op0))
 447           op0 = adjust_address (op0, imode, 0);
 448         else
 449           {
 450             gcc_assert (imode != BLKmode);
 451             op0 = gen_lowpart (imode, op0);
 452           }
 453       }
 454   }
 455
 456   /* We may be accessing data outside the field, which means
 457      we can alias adjacent data.  */
 458   if (MEM_P (op0))
 459     {
 460       op0 = shallow_copy_rtx (op0);
 461       set_mem_alias_set (op0, 0);
 462       set_mem_expr (op0, 0);
 463     }
 464
 465   /* If OP0 is a register, BITPOS must count within a word.
 466      But as we have it, it counts within whatever size OP0 now has.
 467      On a bigendian machine, these are not the same, so convert.  */
 468   if (BYTES_BIG_ENDIAN
 469       && !MEM_P (op0)
 470       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 471     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 472
 473   /* Storing an lsb-aligned field in a register
 474      can be done with a movestrict instruction.  */
 475
 476   if (!MEM_P (op0)
 477       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 478       && bitsize == GET_MODE_BITSIZE (fieldmode)
 479       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 480     {
 481       struct expand_operand ops[2];
 482       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 483       rtx arg0 = op0;
 484       unsigned HOST_WIDE_INT subreg_off;
 485
 486       if (GET_CODE (arg0) == SUBREG)
 487         {
 488           /* Else we've got some float mode source being extracted into
 489              a different float mode destination -- this combination of
 490              subregs results in Severe Tire Damage.  */
 491           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 492                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 493                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 494           arg0 = SUBREG_REG (arg0);
 495         }
 496
 497       subreg_off = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 498                    + (offset * UNITS_PER_WORD);
 499       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 500         {
 501           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 502
 503           create_fixed_operand (&ops[0], arg0);
 504           /* Shrink the source operand to FIELDMODE.  */
 505           create_convert_operand_to (&ops[1], value, fieldmode, false);
 506           if (maybe_expand_insn (icode, 2, ops))
 507             return true;
 508         }
 509     }
 510
 511   /* Handle fields bigger than a word.  */
 512
 513   if (bitsize > BITS_PER_WORD)
 514     {
 515       /* Here we transfer the words of the field
 516          in the order least significant first.
 517          This is because the most significant word is the one which may
 518          be less than full.
 519          However, only do that if the value is not BLKmode.  */
 520
 521       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 522       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 523       unsigned int i;
 524       rtx last;
 525
 526       /* This is the mode we must force value to, so that there will be enough
 527          subwords to extract.  Note that fieldmode will often (always?) be
 528          VOIDmode, because that is what store_field uses to indicate that this
 529          is a bit field, but passing VOIDmode to operand_subword_force
 530          is not allowed.  */
 531       fieldmode = GET_MODE (value);
 532       if (fieldmode == VOIDmode)
 533         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 534
 535       last = get_last_insn ();
 536       for (i = 0; i < nwords; i++)
 537         {
 538           /* If I is 0, use the low-order word in both field and target;
 539              if I is 1, use the next to lowest word; and so on.  */
 540           unsigned int wordnum = (backwards ? nwords - i - 1 : i);
 541           unsigned int bit_offset = (backwards
 542                                      ? MAX ((int) bitsize - ((int) i + 1)
 543                                             * BITS_PER_WORD,
 544                                             0)
 545                                      : (int) i * BITS_PER_WORD);
 546           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 547
 548           if (!store_bit_field_1 (op0, MIN (BITS_PER_WORD,
 549                                             bitsize - i * BITS_PER_WORD),
 550                                   bitnum + bit_offset, word_mode,
 551                                   value_word, fallback_p))
 552             {
 553               delete_insns_since (last);
 554               return false;
 555             }
 556         }
 557       return true;
 558     }
 559
 560   /* From here on we can assume that the field to be stored in is
 561      a full-word (whatever type that is), since it is shorter than a word.  */
 562
 563   /* OFFSET is the number of words or bytes (UNIT says which)
 564      from STR_RTX to the first word or byte containing part of the field.  */
 565
 566   if (!MEM_P (op0))
 567     {
 568       if (offset != 0
 569           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 570         {
 571           if (!REG_P (op0))
 572             {
 573               /* Since this is a destination (lvalue), we can't copy
 574                  it to a pseudo.  We can remove a SUBREG that does not
 575                  change the size of the operand.  Such a SUBREG may
 576                  have been added above.  */
 577               gcc_assert (GET_CODE (op0) == SUBREG
 578                           && (GET_MODE_SIZE (GET_MODE (op0))
 579                               == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
 580               op0 = SUBREG_REG (op0);
 581             }
 582           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 583                                 op0, (offset * UNITS_PER_WORD));
 584         }
 585       offset = 0;
 586     }
 587
 588   /* If VALUE has a floating-point or complex mode, access it as an
 589      integer of the corresponding size.  This can occur on a machine
 590      with 64 bit registers that uses SFmode for float.  It can also
 591      occur for unaligned float or complex fields.  */
 592   orig_value = value;
 593   if (GET_MODE (value) != VOIDmode
 594       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 595       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 596     {
 597       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 598       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 599     }
 600
 601   /* Now OFFSET is nonzero only if OP0 is memory
 602      and is therefore always measured in bytes.  */
 603
 604   if (HAVE_insv
 605       && GET_MODE (value) != BLKmode
 606       && bitsize > 0
 607       && GET_MODE_BITSIZE (op_mode) >= bitsize
 608       && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
 609             && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))))
 610     {
 611       struct expand_operand ops[4];
 612       int xbitpos = bitpos;
 613       rtx value1;
 614       rtx xop0 = op0;
 615       rtx last = get_last_insn ();
 616       bool copy_back = false;
 617
 618       /* Add OFFSET into OP0's address.  */
 619       if (MEM_P (xop0))
 620         xop0 = adjust_address (xop0, byte_mode, offset);
 621
 622       /* If xop0 is a register, we need it in OP_MODE
 623          to make it acceptable to the format of insv.  */
 624       if (GET_CODE (xop0) == SUBREG)
 625         /* We can't just change the mode, because this might clobber op0,
 626            and we will need the original value of op0 if insv fails.  */
 627         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 628       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 629         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 630
 631       /* If the destination is a paradoxical subreg such that we need a
 632          truncate to the inner mode, perform the insertion on a temporary and
 633          truncate the result to the original destination.  Note that we can't
 634          just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 635          X) 0)) is (reg:N X).  */
 636       if (GET_CODE (xop0) == SUBREG
 637           && REG_P (SUBREG_REG (xop0))
 638           && (!TRULY_NOOP_TRUNCATION
 639               (GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (xop0))),
 640                GET_MODE_BITSIZE (op_mode))))
 641         {
 642           rtx tem = gen_reg_rtx (op_mode);
 643           emit_move_insn (tem, xop0);
 644           xop0 = tem;
 645           copy_back = true;
 646         }
 647
 648       /* On big-endian machines, we count bits from the most significant.
 649          If the bit field insn does not, we must invert.  */
 650
 651       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 652         xbitpos = unit - bitsize - xbitpos;
 653
 654       /* We have been counting XBITPOS within UNIT.
 655          Count instead within the size of the register.  */
 656       if (BITS_BIG_ENDIAN && !MEM_P (xop0))
 657         xbitpos += GET_MODE_BITSIZE (op_mode) - unit;
 658
 659       unit = GET_MODE_BITSIZE (op_mode);
 660
 661       /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 662       value1 = value;
 663       if (GET_MODE (value) != op_mode)
 664         {
 665           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 666             {
 667               /* Optimization: Don't bother really extending VALUE
 668                  if it has all the bits we will actually use.  However,
 669                  if we must narrow it, be sure we do it correctly.  */
 670
 671               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 672                 {
 673                   rtx tmp;
 674
 675                   tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 676                   if (! tmp)
 677                     tmp = simplify_gen_subreg (op_mode,
 678                                                force_reg (GET_MODE (value),
 679                                                           value1),
 680                                                GET_MODE (value), 0);
 681                   value1 = tmp;
 682                 }
 683               else
 684                 value1 = gen_lowpart (op_mode, value1);
 685             }
 686           else if (CONST_INT_P (value))
 687             value1 = gen_int_mode (INTVAL (value), op_mode);
 688           else
 689             /* Parse phase is supposed to make VALUE's data type
 690                match that of the component reference, which is a type
 691                at least as wide as the field; so VALUE should have
 692                a mode that corresponds to that type.  */
 693             gcc_assert (CONSTANT_P (value));
 694         }
 695
 696       create_fixed_operand (&ops[0], xop0);
 697       create_integer_operand (&ops[1], bitsize);
 698       create_integer_operand (&ops[2], xbitpos);
 699       create_input_operand (&ops[3], value1, op_mode);
 700       if (maybe_expand_insn (CODE_FOR_insv, 4, ops))
 701         {
 702           if (copy_back)
 703             convert_move (op0, xop0, true);
 704           return true;
 705         }
 706       delete_insns_since (last);
 707     }
 708
 709   /* If OP0 is a memory, try copying it to a register and seeing if a
 710      cheap register alternative is available.  */
 711   if (HAVE_insv && MEM_P (op0))
 712     {
 713       enum machine_mode bestmode;
 714
 715       /* Get the mode to use for inserting into this field.  If OP0 is
 716          BLKmode, get the smallest mode consistent with the alignment. If
 717          OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
 718          mode. Otherwise, use the smallest mode containing the field.  */
 719
 720       if (GET_MODE (op0) == BLKmode
 721           || (op_mode != MAX_MACHINE_MODE
 722               && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode)))
 723         bestmode = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0),
 724                                   (op_mode == MAX_MACHINE_MODE
 725                                    ? VOIDmode : op_mode),
 726                                   MEM_VOLATILE_P (op0));
 727       else
 728         bestmode = GET_MODE (op0);
 729
 730       if (bestmode != VOIDmode
 731           && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode)
 732           && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
 733                && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
 734         {
 735           rtx last, tempreg, xop0;
 736           unsigned HOST_WIDE_INT xoffset, xbitpos;
 737
 738           last = get_last_insn ();
 739
 740           /* Adjust address to point to the containing unit of
 741              that mode.  Compute the offset as a multiple of this unit,
 742              counting in bytes.  */
 743           unit = GET_MODE_BITSIZE (bestmode);
 744           xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 745           xbitpos = bitnum % unit;
 746           xop0 = adjust_address (op0, bestmode, xoffset);
 747
 748           /* Fetch that unit, store the bitfield in it, then store
 749              the unit.  */
 750           tempreg = copy_to_reg (xop0);
 751           if (store_bit_field_1 (tempreg, bitsize, xbitpos,
 752                                  fieldmode, orig_value, false))
 753             {
 754               emit_move_insn (xop0, tempreg);
 755               return true;
 756             }
 757           delete_insns_since (last);
 758         }
 759     }
 760
 761   if (!fallback_p)
 762     return false;
 763
 764   store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 765   return true;
 766 }
 767
 768 /* Generate code to store value from rtx VALUE
 769    into a bit-field within structure STR_RTX
 770    containing BITSIZE bits starting at bit BITNUM.
 771    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.  */
 772
 773 void
 774 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 775                  unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
 776                  rtx value)
 777 {
 778   if (!store_bit_field_1 (str_rtx, bitsize, bitnum, fieldmode, value, true))
 779     gcc_unreachable ();
 780 }
 781 \f
 782 /* Use shifts and boolean operations to store VALUE
 783    into a bit field of width BITSIZE
 784    in a memory location specified by OP0 except offset by OFFSET bytes.
 785      (OFFSET must be 0 if OP0 is a register.)
 786    The field starts at position BITPOS within the byte.
 787     (If OP0 is a register, it may be a full word or a narrower mode,
 788      but BITPOS still counts within a full word,
 789      which is significant on bigendian machines.)  */
 790
 791 static void
 792 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
 793                        unsigned HOST_WIDE_INT bitsize,
 794                        unsigned HOST_WIDE_INT bitpos, rtx value)
 795 {
 796   enum machine_mode mode;
 797   unsigned int total_bits = BITS_PER_WORD;
 798   rtx temp;
 799   int all_zero = 0;
 800   int all_one = 0;
 801
 802   /* There is a case not handled here:
 803      a structure with a known alignment of just a halfword
 804      and a field split across two aligned halfwords within the structure.
 805      Or likewise a structure with a known alignment of just a byte
 806      and a field split across two bytes.
 807      Such cases are not supposed to be able to occur.  */
 808
 809   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 810     {
 811       gcc_assert (!offset);
 812       /* Special treatment for a bit field split across two registers.  */
 813       if (bitsize + bitpos > BITS_PER_WORD)
 814         {
 815           store_split_bit_field (op0, bitsize, bitpos, value);
 816           return;
 817         }
 818     }
 819   else
 820     {
 821       /* Get the proper mode to use for this field.  We want a mode that
 822          includes the entire field.  If such a mode would be larger than
 823          a word, we won't be doing the extraction the normal way.
 824          We don't want a mode bigger than the destination.  */
 825
 826       mode = GET_MODE (op0);
 827       if (GET_MODE_BITSIZE (mode) == 0
 828           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 829         mode = word_mode;
 830
 831       if (MEM_VOLATILE_P (op0)
 832           && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
 833           && flag_strict_volatile_bitfields > 0)
 834         mode = GET_MODE (op0);
 835       else
 836         mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 837                               MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 838
 839       if (mode == VOIDmode)
 840         {
 841           /* The only way this should occur is if the field spans word
 842              boundaries.  */
 843           store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
 844                                  value);
 845           return;
 846         }
 847
 848       total_bits = GET_MODE_BITSIZE (mode);
 849
 850       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 851          be in the range 0 to total_bits-1, and put any excess bytes in
 852          OFFSET.  */
 853       if (bitpos >= total_bits)
 854         {
 855           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 856           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 857                      * BITS_PER_UNIT);
 858         }
 859
 860       /* Get ref to an aligned byte, halfword, or word containing the field.
 861          Adjust BITPOS to be position within a word,
 862          and OFFSET to be the offset of that word.
 863          Then alter OP0 to refer to that word.  */
 864       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 865       offset -= (offset % (total_bits / BITS_PER_UNIT));
 866       op0 = adjust_address (op0, mode, offset);
 867     }
 868
 869   mode = GET_MODE (op0);
 870
 871   /* Now MODE is either some integral mode for a MEM as OP0,
 872      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 873      The bit field is contained entirely within OP0.
 874      BITPOS is the starting bit number within OP0.
 875      (OP0's mode may actually be narrower than MODE.)  */
 876
 877   if (BYTES_BIG_ENDIAN)
 878       /* BITPOS is the distance between our msb
 879          and that of the containing datum.
 880          Convert it to the distance from the lsb.  */
 881       bitpos = total_bits - bitsize - bitpos;
 882
 883   /* Now BITPOS is always the distance between our lsb
 884      and that of OP0.  */
 885
 886   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 887      we must first convert its mode to MODE.  */
 888
 889   if (CONST_INT_P (value))
 890     {
 891       HOST_WIDE_INT v = INTVAL (value);
 892
 893       if (bitsize < HOST_BITS_PER_WIDE_INT)
 894         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 895
 896       if (v == 0)
 897         all_zero = 1;
 898       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 899                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 900                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 901         all_one = 1;
 902
 903       value = lshift_value (mode, value, bitpos, bitsize);
 904     }
 905   else
 906     {
 907       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 908                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 909
 910       if (GET_MODE (value) != mode)
 911         value = convert_to_mode (mode, value, 1);
 912
 913       if (must_and)
 914         value = expand_binop (mode, and_optab, value,
 915                               mask_rtx (mode, 0, bitsize, 0),
 916                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
 917       if (bitpos > 0)
 918         value = expand_shift (LSHIFT_EXPR, mode, value,
 919                               build_int_cst (NULL_TREE, bitpos), NULL_RTX, 1);
 920     }
 921
 922   /* Now clear the chosen bits in OP0,
 923      except that if VALUE is -1 we need not bother.  */
 924   /* We keep the intermediates in registers to allow CSE to combine
 925      consecutive bitfield assignments.  */
 926
 927   temp = force_reg (mode, op0);
 928
 929   if (! all_one)
 930     {
 931       temp = expand_binop (mode, and_optab, temp,
 932                            mask_rtx (mode, bitpos, bitsize, 1),
 933                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
 934       temp = force_reg (mode, temp);
 935     }
 936
 937   /* Now logical-or VALUE into OP0, unless it is zero.  */
 938
 939   if (! all_zero)
 940     {
 941       temp = expand_binop (mode, ior_optab, temp, value,
 942                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
 943       temp = force_reg (mode, temp);
 944     }
 945
 946   if (op0 != temp)
 947     {
 948       op0 = copy_rtx (op0);
 949       emit_move_insn (op0, temp);
 950     }
 951 }
 952 \f
 953 /* Store a bit field that is split across multiple accessible memory objects.
 954
 955    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
 956    BITSIZE is the field width; BITPOS the position of its first bit
 957    (within the word).
 958    VALUE is the value to store.
 959
 960    This does not yet handle fields wider than BITS_PER_WORD.  */
 961
 962 static void
 963 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
 964                        unsigned HOST_WIDE_INT bitpos, rtx value)
 965 {
 966   unsigned int unit;
 967   unsigned int bitsdone = 0;
 968
 969   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
 970      much at a time.  */
 971   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 972     unit = BITS_PER_WORD;
 973   else
 974     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
 975
 976   /* If VALUE is a constant other than a CONST_INT, get it into a register in
 977      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
 978      that VALUE might be a floating-point constant.  */
 979   if (CONSTANT_P (value) && !CONST_INT_P (value))
 980     {
 981       rtx word = gen_lowpart_common (word_mode, value);
 982
 983       if (word && (value != word))
 984         value = word;
 985       else
 986         value = gen_lowpart_common (word_mode,
 987                                     force_reg (GET_MODE (value) != VOIDmode
 988                                                ? GET_MODE (value)
 989                                                : word_mode, value));
 990     }
 991
 992   while (bitsdone < bitsize)
 993     {
 994       unsigned HOST_WIDE_INT thissize;
 995       rtx part, word;
 996       unsigned HOST_WIDE_INT thispos;
 997       unsigned HOST_WIDE_INT offset;
 998
 999       offset = (bitpos + bitsdone) / unit;
1000       thispos = (bitpos + bitsdone) % unit;
1001
1002       /* THISSIZE must not overrun a word boundary.  Otherwise,
1003          store_fixed_bit_field will call us again, and we will mutually
1004          recurse forever.  */
1005       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1006       thissize = MIN (thissize, unit - thispos);
1007
1008       if (BYTES_BIG_ENDIAN)
1009         {
1010           int total_bits;
1011
1012           /* We must do an endian conversion exactly the same way as it is
1013              done in extract_bit_field, so that the two calls to
1014              extract_fixed_bit_field will have comparable arguments.  */
1015           if (!MEM_P (value) || GET_MODE (value) == BLKmode)
1016             total_bits = BITS_PER_WORD;
1017           else
1018             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1019
1020           /* Fetch successively less significant portions.  */
1021           if (CONST_INT_P (value))
1022             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1023                              >> (bitsize - bitsdone - thissize))
1024                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1025           else
1026             /* The args are chosen so that the last part includes the
1027                lsb.  Give extract_bit_field the value it needs (with
1028                endianness compensation) to fetch the piece we want.  */
1029             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1030                                             total_bits - bitsize + bitsdone,
1031                                             NULL_RTX, 1, false);
1032         }
1033       else
1034         {
1035           /* Fetch successively more significant portions.  */
1036           if (CONST_INT_P (value))
1037             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1038                              >> bitsdone)
1039                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1040           else
1041             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1042                                             bitsdone, NULL_RTX, 1, false);
1043         }
1044
1045       /* If OP0 is a register, then handle OFFSET here.
1046
1047          When handling multiword bitfields, extract_bit_field may pass
1048          down a word_mode SUBREG of a larger REG for a bitfield that actually
1049          crosses a word boundary.  Thus, for a SUBREG, we must find
1050          the current word starting from the base register.  */
1051       if (GET_CODE (op0) == SUBREG)
1052         {
1053           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1054           enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1055           if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1056             word = word_offset ? const0_rtx : op0;
1057           else
1058             word = operand_subword_force (SUBREG_REG (op0), word_offset,
1059                                           GET_MODE (SUBREG_REG (op0)));
1060           offset = 0;
1061         }
1062       else if (REG_P (op0))
1063         {
1064           enum machine_mode op0_mode = GET_MODE (op0);
1065           if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1066             word = offset ? const0_rtx : op0;
1067           else
1068             word = operand_subword_force (op0, offset, GET_MODE (op0));
1069           offset = 0;
1070         }
1071       else
1072         word = op0;
1073
1074       /* OFFSET is in UNITs, and UNIT is in bits.
1075          store_fixed_bit_field wants offset in bytes.  If WORD is const0_rtx,
1076          it is just an out-of-bounds access.  Ignore it.  */
1077       if (word != const0_rtx)
1078         store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1079                                thispos, part);
1080       bitsdone += thissize;
1081     }
1082 }
1083 \f
1084 /* A subroutine of extract_bit_field_1 that converts return value X
1085    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1086    to extract_bit_field.  */
1087
1088 static rtx
1089 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1090                              enum machine_mode tmode, bool unsignedp)
1091 {
1092   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1093     return x;
1094
1095   /* If the x mode is not a scalar integral, first convert to the
1096      integer mode of that size and then access it as a floating-point
1097      value via a SUBREG.  */
1098   if (!SCALAR_INT_MODE_P (tmode))
1099     {
1100       enum machine_mode smode;
1101
1102       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1103       x = convert_to_mode (smode, x, unsignedp);
1104       x = force_reg (smode, x);
1105       return gen_lowpart (tmode, x);
1106     }
1107
1108   return convert_to_mode (tmode, x, unsignedp);
1109 }
1110
1111 /* A subroutine of extract_bit_field, with the same arguments.
1112    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1113    if we can find no other means of implementing the operation.
1114    if FALLBACK_P is false, return NULL instead.  */
1115
1116 static rtx
1117 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1118                      unsigned HOST_WIDE_INT bitnum,
1119                      int unsignedp, bool packedp, rtx target,
1120                      enum machine_mode mode, enum machine_mode tmode,
1121                      bool fallback_p)
1122 {
1123   unsigned int unit
1124     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1125   unsigned HOST_WIDE_INT offset, bitpos;
1126   rtx op0 = str_rtx;
1127   enum machine_mode int_mode;
1128   enum machine_mode ext_mode;
1129   enum machine_mode mode1;
1130   enum insn_code icode;
1131   int byte_offset;
1132
1133   if (tmode == VOIDmode)
1134     tmode = mode;
1135
1136   while (GET_CODE (op0) == SUBREG)
1137     {
1138       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1139       op0 = SUBREG_REG (op0);
1140     }
1141
1142   /* If we have an out-of-bounds access to a register, just return an
1143      uninitialized register of the required mode.  This can occur if the
1144      source code contains an out-of-bounds access to a small array.  */
1145   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1146     return gen_reg_rtx (tmode);
1147
1148   if (REG_P (op0)
1149       && mode == GET_MODE (op0)
1150       && bitnum == 0
1151       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1152     {
1153       /* We're trying to extract a full register from itself.  */
1154       return op0;
1155     }
1156
1157   /* See if we can get a better vector mode before extracting.  */
1158   if (VECTOR_MODE_P (GET_MODE (op0))
1159       && !MEM_P (op0)
1160       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1161     {
1162       enum machine_mode new_mode;
1163
1164       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1165         new_mode = MIN_MODE_VECTOR_FLOAT;
1166       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1167         new_mode = MIN_MODE_VECTOR_FRACT;
1168       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1169         new_mode = MIN_MODE_VECTOR_UFRACT;
1170       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1171         new_mode = MIN_MODE_VECTOR_ACCUM;
1172       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1173         new_mode = MIN_MODE_VECTOR_UACCUM;
1174       else
1175         new_mode = MIN_MODE_VECTOR_INT;
1176
1177       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1178         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1179             && targetm.vector_mode_supported_p (new_mode))
1180           break;
1181       if (new_mode != VOIDmode)
1182         op0 = gen_lowpart (new_mode, op0);
1183     }
1184
1185   /* Use vec_extract patterns for extracting parts of vectors whenever
1186      available.  */
1187   if (VECTOR_MODE_P (GET_MODE (op0))
1188       && !MEM_P (op0)
1189       && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1190       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1191           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1192     {
1193       struct expand_operand ops[3];
1194       enum machine_mode outermode = GET_MODE (op0);
1195       enum machine_mode innermode = GET_MODE_INNER (outermode);
1196       enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1197       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1198
1199       create_output_operand (&ops[0], target, innermode);
1200       create_input_operand (&ops[1], op0, outermode);
1201       create_integer_operand (&ops[2], pos);
1202       if (maybe_expand_insn (icode, 3, ops))
1203         {
1204           target = ops[0].value;
1205           if (GET_MODE (target) != mode)
1206             return gen_lowpart (tmode, target);
1207           return target;
1208         }
1209     }
1210
1211   /* Make sure we are playing with integral modes.  Pun with subregs
1212      if we aren't.  */
1213   {
1214     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1215     if (imode != GET_MODE (op0))
1216       {
1217         if (MEM_P (op0))
1218           op0 = adjust_address (op0, imode, 0);
1219         else if (imode != BLKmode)
1220           {
1221             op0 = gen_lowpart (imode, op0);
1222
1223             /* If we got a SUBREG, force it into a register since we
1224                aren't going to be able to do another SUBREG on it.  */
1225             if (GET_CODE (op0) == SUBREG)
1226               op0 = force_reg (imode, op0);
1227           }
1228         else if (REG_P (op0))
1229           {
1230             rtx reg, subreg;
1231             imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1232                                             MODE_INT);
1233             reg = gen_reg_rtx (imode);
1234             subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1235             emit_move_insn (subreg, op0);
1236             op0 = reg;
1237             bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1238           }
1239         else
1240           {
1241             rtx mem = assign_stack_temp (GET_MODE (op0),
1242                                          GET_MODE_SIZE (GET_MODE (op0)), 0);
1243             emit_move_insn (mem, op0);
1244             op0 = adjust_address (mem, BLKmode, 0);
1245           }
1246       }
1247   }
1248
1249   /* We may be accessing data outside the field, which means
1250      we can alias adjacent data.  */
1251   if (MEM_P (op0))
1252     {
1253       op0 = shallow_copy_rtx (op0);
1254       set_mem_alias_set (op0, 0);
1255       set_mem_expr (op0, 0);
1256     }
1257
1258   /* Extraction of a full-word or multi-word value from a structure
1259      in a register or aligned memory can be done with just a SUBREG.
1260      A subword value in the least significant part of a register
1261      can also be extracted with a SUBREG.  For this, we need the
1262      byte offset of the value in op0.  */
1263
1264   bitpos = bitnum % unit;
1265   offset = bitnum / unit;
1266   byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1267
1268   /* If OP0 is a register, BITPOS must count within a word.
1269      But as we have it, it counts within whatever size OP0 now has.
1270      On a bigendian machine, these are not the same, so convert.  */
1271   if (BYTES_BIG_ENDIAN
1272       && !MEM_P (op0)
1273       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1274     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1275
1276   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1277      If that's wrong, the solution is to test for it and set TARGET to 0
1278      if needed.  */
1279
1280   /* Only scalar integer modes can be converted via subregs.  There is an
1281      additional problem for FP modes here in that they can have a precision
1282      which is different from the size.  mode_for_size uses precision, but
1283      we want a mode based on the size, so we must avoid calling it for FP
1284      modes.  */
1285   mode1  = (SCALAR_INT_MODE_P (tmode)
1286             ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1287             : mode);
1288
1289   /* If the bitfield is volatile, we need to make sure the access
1290      remains on a type-aligned boundary.  */
1291   if (GET_CODE (op0) == MEM
1292       && MEM_VOLATILE_P (op0)
1293       && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
1294       && flag_strict_volatile_bitfields > 0)
1295     goto no_subreg_mode_swap;
1296
1297   if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1298         && bitpos % BITS_PER_WORD == 0)
1299        || (mode1 != BLKmode
1300            /* ??? The big endian test here is wrong.  This is correct
1301               if the value is in a register, and if mode_for_size is not
1302               the same mode as op0.  This causes us to get unnecessarily
1303               inefficient code from the Thumb port when -mbig-endian.  */
1304            && (BYTES_BIG_ENDIAN
1305                ? bitpos + bitsize == BITS_PER_WORD
1306                : bitpos == 0)))
1307       && ((!MEM_P (op0)
1308            && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode1),
1309                                      GET_MODE_BITSIZE (GET_MODE (op0)))
1310            && GET_MODE_SIZE (mode1) != 0
1311            && byte_offset % GET_MODE_SIZE (mode1) == 0)
1312           || (MEM_P (op0)
1313               && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1314                   || (offset * BITS_PER_UNIT % bitsize == 0
1315                       && MEM_ALIGN (op0) % bitsize == 0)))))
1316     {
1317       if (MEM_P (op0))
1318         op0 = adjust_address (op0, mode1, offset);
1319       else if (mode1 != GET_MODE (op0))
1320         {
1321           rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1322                                          byte_offset);
1323           if (sub == NULL)
1324             goto no_subreg_mode_swap;
1325           op0 = sub;
1326         }
1327       if (mode1 != mode)
1328         return convert_to_mode (tmode, op0, unsignedp);
1329       return op0;
1330     }
1331  no_subreg_mode_swap:
1332
1333   /* Handle fields bigger than a word.  */
1334
1335   if (bitsize > BITS_PER_WORD)
1336     {
1337       /* Here we transfer the words of the field
1338          in the order least significant first.
1339          This is because the most significant word is the one which may
1340          be less than full.  */
1341
1342       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1343       unsigned int i;
1344
1345       if (target == 0 || !REG_P (target))
1346         target = gen_reg_rtx (mode);
1347
1348       /* Indicate for flow that the entire target reg is being set.  */
1349       emit_clobber (target);
1350
1351       for (i = 0; i < nwords; i++)
1352         {
1353           /* If I is 0, use the low-order word in both field and target;
1354              if I is 1, use the next to lowest word; and so on.  */
1355           /* Word number in TARGET to use.  */
1356           unsigned int wordnum
1357             = (WORDS_BIG_ENDIAN
1358                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1359                : i);
1360           /* Offset from start of field in OP0.  */
1361           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1362                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1363                                                 * (int) BITS_PER_WORD))
1364                                      : (int) i * BITS_PER_WORD);
1365           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1366           rtx result_part
1367             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1368                                            bitsize - i * BITS_PER_WORD),
1369                                  bitnum + bit_offset, 1, false, target_part, mode,
1370                                  word_mode);
1371
1372           gcc_assert (target_part);
1373
1374           if (result_part != target_part)
1375             emit_move_insn (target_part, result_part);
1376         }
1377
1378       if (unsignedp)
1379         {
1380           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1381              need to be zero'd out.  */
1382           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1383             {
1384               unsigned int i, total_words;
1385
1386               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1387               for (i = nwords; i < total_words; i++)
1388                 emit_move_insn
1389                   (operand_subword (target,
1390                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1391                                     1, VOIDmode),
1392                    const0_rtx);
1393             }
1394           return target;
1395         }
1396
1397       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1398       target = expand_shift (LSHIFT_EXPR, mode, target,
1399                              build_int_cst (NULL_TREE,
1400                                             GET_MODE_BITSIZE (mode) - bitsize),
1401                              NULL_RTX, 0);
1402       return expand_shift (RSHIFT_EXPR, mode, target,
1403                            build_int_cst (NULL_TREE,
1404                                           GET_MODE_BITSIZE (mode) - bitsize),
1405                            NULL_RTX, 0);
1406     }
1407
1408   /* From here on we know the desired field is smaller than a word.  */
1409
1410   /* Check if there is a correspondingly-sized integer field, so we can
1411      safely extract it as one size of integer, if necessary; then
1412      truncate or extend to the size that is wanted; then use SUBREGs or
1413      convert_to_mode to get one of the modes we really wanted.  */
1414
1415   int_mode = int_mode_for_mode (tmode);
1416   if (int_mode == BLKmode)
1417     int_mode = int_mode_for_mode (mode);
1418   /* Should probably push op0 out to memory and then do a load.  */
1419   gcc_assert (int_mode != BLKmode);
1420
1421   /* OFFSET is the number of words or bytes (UNIT says which)
1422      from STR_RTX to the first word or byte containing part of the field.  */
1423   if (!MEM_P (op0))
1424     {
1425       if (offset != 0
1426           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1427         {
1428           if (!REG_P (op0))
1429             op0 = copy_to_reg (op0);
1430           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1431                                 op0, (offset * UNITS_PER_WORD));
1432         }
1433       offset = 0;
1434     }
1435
1436   /* Now OFFSET is nonzero only for memory operands.  */
1437   ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0);
1438   icode = unsignedp ? CODE_FOR_extzv : CODE_FOR_extv;
1439   if (ext_mode != MAX_MACHINE_MODE
1440       && bitsize > 0
1441       && GET_MODE_BITSIZE (ext_mode) >= bitsize
1442       /* If op0 is a register, we need it in EXT_MODE to make it
1443          acceptable to the format of ext(z)v.  */
1444       && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1445       && !((REG_P (op0) || GET_CODE (op0) == SUBREG)
1446            && (bitsize + bitpos > GET_MODE_BITSIZE (ext_mode))))
1447     {
1448       struct expand_operand ops[4];
1449       unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1450       rtx xop0 = op0;
1451       rtx xtarget = target;
1452       rtx xspec_target = target;
1453       rtx xspec_target_subreg = 0;
1454
1455       /* If op0 is a register, we need it in EXT_MODE to make it
1456          acceptable to the format of ext(z)v.  */
1457       if (REG_P (xop0) && GET_MODE (xop0) != ext_mode)
1458         xop0 = gen_lowpart_SUBREG (ext_mode, xop0);
1459       if (MEM_P (xop0))
1460         /* Get ref to first byte containing part of the field.  */
1461         xop0 = adjust_address (xop0, byte_mode, xoffset);
1462
1463       /* On big-endian machines, we count bits from the most significant.
1464          If the bit field insn does not, we must invert.  */
1465       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1466         xbitpos = unit - bitsize - xbitpos;
1467
1468       /* Now convert from counting within UNIT to counting in EXT_MODE.  */
1469       if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1470         xbitpos += GET_MODE_BITSIZE (ext_mode) - unit;
1471
1472       unit = GET_MODE_BITSIZE (ext_mode);
1473
1474       if (xtarget == 0)
1475         xtarget = xspec_target = gen_reg_rtx (tmode);
1476
1477       if (GET_MODE (xtarget) != ext_mode)
1478         {
1479           /* Don't use LHS paradoxical subreg if explicit truncation is needed
1480              between the mode of the extraction (word_mode) and the target
1481              mode.  Instead, create a temporary and use convert_move to set
1482              the target.  */
1483           if (REG_P (xtarget)
1484               && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (GET_MODE (xtarget)),
1485                                         GET_MODE_BITSIZE (ext_mode)))
1486             {
1487               xtarget = gen_lowpart (ext_mode, xtarget);
1488               if (GET_MODE_SIZE (ext_mode)
1489                   > GET_MODE_SIZE (GET_MODE (xspec_target)))
1490                 xspec_target_subreg = xtarget;
1491             }
1492           else
1493             xtarget = gen_reg_rtx (ext_mode);
1494         }
1495
1496       create_output_operand (&ops[0], xtarget, ext_mode);
1497       create_fixed_operand (&ops[1], xop0);
1498       create_integer_operand (&ops[2], bitsize);
1499       create_integer_operand (&ops[3], xbitpos);
1500       if (maybe_expand_insn (unsignedp ? CODE_FOR_extzv : CODE_FOR_extv,
1501                              4, ops))
1502         {
1503           xtarget = ops[0].value;
1504           if (xtarget == xspec_target)
1505             return xtarget;
1506           if (xtarget == xspec_target_subreg)
1507             return xspec_target;
1508           return convert_extracted_bit_field (xtarget, mode, tmode, unsignedp);
1509         }
1510     }
1511
1512   /* If OP0 is a memory, try copying it to a register and seeing if a
1513      cheap register alternative is available.  */
1514   if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0))
1515     {
1516       enum machine_mode bestmode;
1517
1518       /* Get the mode to use for inserting into this field.  If
1519          OP0 is BLKmode, get the smallest mode consistent with the
1520          alignment. If OP0 is a non-BLKmode object that is no
1521          wider than EXT_MODE, use its mode. Otherwise, use the
1522          smallest mode containing the field.  */
1523
1524       if (GET_MODE (op0) == BLKmode
1525           || (ext_mode != MAX_MACHINE_MODE
1526               && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode)))
1527         bestmode = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0),
1528                                   (ext_mode == MAX_MACHINE_MODE
1529                                    ? VOIDmode : ext_mode),
1530                                   MEM_VOLATILE_P (op0));
1531       else
1532         bestmode = GET_MODE (op0);
1533
1534       if (bestmode != VOIDmode
1535           && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
1536                && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
1537         {
1538           unsigned HOST_WIDE_INT xoffset, xbitpos;
1539
1540           /* Compute the offset as a multiple of this unit,
1541              counting in bytes.  */
1542           unit = GET_MODE_BITSIZE (bestmode);
1543           xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1544           xbitpos = bitnum % unit;
1545
1546           /* Make sure the register is big enough for the whole field.  */
1547           if (xoffset * BITS_PER_UNIT + unit
1548               >= offset * BITS_PER_UNIT + bitsize)
1549             {
1550               rtx last, result, xop0;
1551
1552               last = get_last_insn ();
1553
1554               /* Fetch it to a register in that size.  */
1555               xop0 = adjust_address (op0, bestmode, xoffset);
1556               xop0 = force_reg (bestmode, xop0);
1557               result = extract_bit_field_1 (xop0, bitsize, xbitpos,
1558                                             unsignedp, packedp, target,
1559                                             mode, tmode, false);
1560               if (result)
1561                 return result;
1562
1563               delete_insns_since (last);
1564             }
1565         }
1566     }
1567
1568   if (!fallback_p)
1569     return NULL;
1570
1571   target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1572                                     bitpos, target, unsignedp, packedp);
1573   return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1574 }
1575
1576 /* Generate code to extract a byte-field from STR_RTX
1577    containing BITSIZE bits, starting at BITNUM,
1578    and put it in TARGET if possible (if TARGET is nonzero).
1579    Regardless of TARGET, we return the rtx for where the value is placed.
1580
1581    STR_RTX is the structure containing the byte (a REG or MEM).
1582    UNSIGNEDP is nonzero if this is an unsigned bit field.
1583    PACKEDP is nonzero if the field has the packed attribute.
1584    MODE is the natural mode of the field value once extracted.
1585    TMODE is the mode the caller would like the value to have;
1586    but the value may be returned with type MODE instead.
1587
1588    If a TARGET is specified and we can store in it at no extra cost,
1589    we do so, and return TARGET.
1590    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1591    if they are equally easy.  */
1592
1593 rtx
1594 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1595                    unsigned HOST_WIDE_INT bitnum, int unsignedp, bool packedp,
1596                    rtx target, enum machine_mode mode, enum machine_mode tmode)
1597 {
1598   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, packedp,
1599                               target, mode, tmode, true);
1600 }
1601 \f
1602 /* Extract a bit field using shifts and boolean operations
1603    Returns an rtx to represent the value.
1604    OP0 addresses a register (word) or memory (byte).
1605    BITPOS says which bit within the word or byte the bit field starts in.
1606    OFFSET says how many bytes farther the bit field starts;
1607     it is 0 if OP0 is a register.
1608    BITSIZE says how many bits long the bit field is.
1609     (If OP0 is a register, it may be narrower than a full word,
1610      but BITPOS still counts within a full word,
1611      which is significant on bigendian machines.)
1612
1613    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1614    PACKEDP is true if the field has the packed attribute.
1615
1616    If TARGET is nonzero, attempts to store the value there
1617    and return TARGET, but this is not guaranteed.
1618    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1619
1620 static rtx
1621 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1622                          unsigned HOST_WIDE_INT offset,
1623                          unsigned HOST_WIDE_INT bitsize,
1624                          unsigned HOST_WIDE_INT bitpos, rtx target,
1625                          int unsignedp, bool packedp)
1626 {
1627   unsigned int total_bits = BITS_PER_WORD;
1628   enum machine_mode mode;
1629
1630   if (GET_CODE (op0) == SUBREG || REG_P (op0))
1631     {
1632       /* Special treatment for a bit field split across two registers.  */
1633       if (bitsize + bitpos > BITS_PER_WORD)
1634         return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1635     }
1636   else
1637     {
1638       /* Get the proper mode to use for this field.  We want a mode that
1639          includes the entire field.  If such a mode would be larger than
1640          a word, we won't be doing the extraction the normal way.  */
1641
1642       if (MEM_VOLATILE_P (op0)
1643           && flag_strict_volatile_bitfields > 0)
1644         {
1645           if (GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1646             mode = GET_MODE (op0);
1647           else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1648             mode = GET_MODE (target);
1649           else
1650             mode = tmode;
1651         }
1652       else
1653         mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1654                               MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1655
1656       if (mode == VOIDmode)
1657         /* The only way this should occur is if the field spans word
1658            boundaries.  */
1659         return extract_split_bit_field (op0, bitsize,
1660                                         bitpos + offset * BITS_PER_UNIT,
1661                                         unsignedp);
1662
1663       total_bits = GET_MODE_BITSIZE (mode);
1664
1665       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1666          be in the range 0 to total_bits-1, and put any excess bytes in
1667          OFFSET.  */
1668       if (bitpos >= total_bits)
1669         {
1670           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1671           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1672                      * BITS_PER_UNIT);
1673         }
1674
1675       /* If we're accessing a volatile MEM, we can't do the next
1676          alignment step if it results in a multi-word access where we
1677          otherwise wouldn't have one.  So, check for that case
1678          here.  */
1679       if (MEM_P (op0)
1680           && MEM_VOLATILE_P (op0)
1681           && flag_strict_volatile_bitfields > 0
1682           && bitpos + bitsize <= total_bits
1683           && bitpos + bitsize + (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT > total_bits)
1684         {
1685           if (STRICT_ALIGNMENT)
1686             {
1687               static bool informed_about_misalignment = false;
1688               bool warned;
1689
1690               if (packedp)
1691                 {
1692                   if (bitsize == total_bits)
1693                     warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1694                                          "multiple accesses to volatile structure member"
1695                                          " because of packed attribute");
1696                   else
1697                     warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1698                                          "multiple accesses to volatile structure bitfield"
1699                                          " because of packed attribute");
1700
1701                   return extract_split_bit_field (op0, bitsize,
1702                                                   bitpos + offset * BITS_PER_UNIT,
1703                                                   unsignedp);
1704                 }
1705
1706               if (bitsize == total_bits)
1707                 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1708                                      "mis-aligned access used for structure member");
1709               else
1710                 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1711                                      "mis-aligned access used for structure bitfield");
1712
1713               if (! informed_about_misalignment && warned)
1714                 {
1715                   informed_about_misalignment = true;
1716                   inform (input_location,
1717                           "when a volatile object spans multiple type-sized locations,"
1718                           " the compiler must choose between using a single mis-aligned access to"
1719                           " preserve the volatility, or using multiple aligned accesses to avoid"
1720                           " runtime faults; this code may fail at runtime if the hardware does"
1721                           " not allow this access");
1722                 }
1723             }
1724         }
1725       else
1726         {
1727
1728           /* Get ref to an aligned byte, halfword, or word containing the field.
1729              Adjust BITPOS to be position within a word,
1730              and OFFSET to be the offset of that word.
1731              Then alter OP0 to refer to that word.  */
1732           bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1733           offset -= (offset % (total_bits / BITS_PER_UNIT));
1734         }
1735
1736       op0 = adjust_address (op0, mode, offset);
1737     }
1738
1739   mode = GET_MODE (op0);
1740
1741   if (BYTES_BIG_ENDIAN)
1742     /* BITPOS is the distance between our msb and that of OP0.
1743        Convert it to the distance from the lsb.  */
1744     bitpos = total_bits - bitsize - bitpos;
1745
1746   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1747      We have reduced the big-endian case to the little-endian case.  */
1748
1749   if (unsignedp)
1750     {
1751       if (bitpos)
1752         {
1753           /* If the field does not already start at the lsb,
1754              shift it so it does.  */
1755           tree amount = build_int_cst (NULL_TREE, bitpos);
1756           /* Maybe propagate the target for the shift.  */
1757           /* But not if we will return it--could confuse integrate.c.  */
1758           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1759           if (tmode != mode) subtarget = 0;
1760           op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1761         }
1762       /* Convert the value to the desired mode.  */
1763       if (mode != tmode)
1764         op0 = convert_to_mode (tmode, op0, 1);
1765
1766       /* Unless the msb of the field used to be the msb when we shifted,
1767          mask out the upper bits.  */
1768
1769       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1770         return expand_binop (GET_MODE (op0), and_optab, op0,
1771                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1772                              target, 1, OPTAB_LIB_WIDEN);
1773       return op0;
1774     }
1775
1776   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1777      then arithmetic-shift its lsb to the lsb of the word.  */
1778   op0 = force_reg (mode, op0);
1779   if (mode != tmode)
1780     target = 0;
1781
1782   /* Find the narrowest integer mode that contains the field.  */
1783
1784   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1785        mode = GET_MODE_WIDER_MODE (mode))
1786     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1787       {
1788         op0 = convert_to_mode (mode, op0, 0);
1789         break;
1790       }
1791
1792   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1793     {
1794       tree amount
1795         = build_int_cst (NULL_TREE,
1796                          GET_MODE_BITSIZE (mode) - (bitsize + bitpos));
1797       /* Maybe propagate the target for the shift.  */
1798       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1799       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1800     }
1801
1802   return expand_shift (RSHIFT_EXPR, mode, op0,
1803                        build_int_cst (NULL_TREE,
1804                                       GET_MODE_BITSIZE (mode) - bitsize),
1805                        target, 0);
1806 }
1807 \f
1808 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1809    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1810    complement of that if COMPLEMENT.  The mask is truncated if
1811    necessary to the width of mode MODE.  The mask is zero-extended if
1812    BITSIZE+BITPOS is too small for MODE.  */
1813
1814 static rtx
1815 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1816 {
1817   double_int mask;
1818
1819   mask = double_int_mask (bitsize);
1820   mask = double_int_lshift (mask, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1821
1822   if (complement)
1823     mask = double_int_not (mask);
1824
1825   return immed_double_int_const (mask, mode);
1826 }
1827
1828 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1829    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1830
1831 static rtx
1832 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1833 {
1834   double_int val;
1835
1836   val = double_int_zext (uhwi_to_double_int (INTVAL (value)), bitsize);
1837   val = double_int_lshift (val, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1838
1839   return immed_double_int_const (val, mode);
1840 }
1841 \f
1842 /* Extract a bit field that is split across two words
1843    and return an RTX for the result.
1844
1845    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1846    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1847    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1848
1849 static rtx
1850 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1851                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1852 {
1853   unsigned int unit;
1854   unsigned int bitsdone = 0;
1855   rtx result = NULL_RTX;
1856   int first = 1;
1857
1858   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1859      much at a time.  */
1860   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1861     unit = BITS_PER_WORD;
1862   else
1863     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1864
1865   while (bitsdone < bitsize)
1866     {
1867       unsigned HOST_WIDE_INT thissize;
1868       rtx part, word;
1869       unsigned HOST_WIDE_INT thispos;
1870       unsigned HOST_WIDE_INT offset;
1871
1872       offset = (bitpos + bitsdone) / unit;
1873       thispos = (bitpos + bitsdone) % unit;
1874
1875       /* THISSIZE must not overrun a word boundary.  Otherwise,
1876          extract_fixed_bit_field will call us again, and we will mutually
1877          recurse forever.  */
1878       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1879       thissize = MIN (thissize, unit - thispos);
1880
1881       /* If OP0 is a register, then handle OFFSET here.
1882
1883          When handling multiword bitfields, extract_bit_field may pass
1884          down a word_mode SUBREG of a larger REG for a bitfield that actually
1885          crosses a word boundary.  Thus, for a SUBREG, we must find
1886          the current word starting from the base register.  */
1887       if (GET_CODE (op0) == SUBREG)
1888         {
1889           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1890           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1891                                         GET_MODE (SUBREG_REG (op0)));
1892           offset = 0;
1893         }
1894       else if (REG_P (op0))
1895         {
1896           word = operand_subword_force (op0, offset, GET_MODE (op0));
1897           offset = 0;
1898         }
1899       else
1900         word = op0;
1901
1902       /* Extract the parts in bit-counting order,
1903          whose meaning is determined by BYTES_PER_UNIT.
1904          OFFSET is in UNITs, and UNIT is in bits.
1905          extract_fixed_bit_field wants offset in bytes.  */
1906       part = extract_fixed_bit_field (word_mode, word,
1907                                       offset * unit / BITS_PER_UNIT,
1908                                       thissize, thispos, 0, 1, false);
1909       bitsdone += thissize;
1910
1911       /* Shift this part into place for the result.  */
1912       if (BYTES_BIG_ENDIAN)
1913         {
1914           if (bitsize != bitsdone)
1915             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1916                                  build_int_cst (NULL_TREE, bitsize - bitsdone),
1917                                  0, 1);
1918         }
1919       else
1920         {
1921           if (bitsdone != thissize)
1922             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1923                                  build_int_cst (NULL_TREE,
1924                                                 bitsdone - thissize), 0, 1);
1925         }
1926
1927       if (first)
1928         result = part;
1929       else
1930         /* Combine the parts with bitwise or.  This works
1931            because we extracted each part as an unsigned bit field.  */
1932         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1933                                OPTAB_LIB_WIDEN);
1934
1935       first = 0;
1936     }
1937
1938   /* Unsigned bit field: we are done.  */
1939   if (unsignedp)
1940     return result;
1941   /* Signed bit field: sign-extend with two arithmetic shifts.  */
1942   result = expand_shift (LSHIFT_EXPR, word_mode, result,
1943                          build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
1944                          NULL_RTX, 0);
1945   return expand_shift (RSHIFT_EXPR, word_mode, result,
1946                        build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
1947                        NULL_RTX, 0);
1948 }
1949 \f
1950 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
1951    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
1952    MODE, fill the upper bits with zeros.  Fail if the layout of either
1953    mode is unknown (as for CC modes) or if the extraction would involve
1954    unprofitable mode punning.  Return the value on success, otherwise
1955    return null.
1956
1957    This is different from gen_lowpart* in these respects:
1958
1959      - the returned value must always be considered an rvalue
1960
1961      - when MODE is wider than SRC_MODE, the extraction involves
1962        a zero extension
1963
1964      - when MODE is smaller than SRC_MODE, the extraction involves
1965        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
1966
1967    In other words, this routine performs a computation, whereas the
1968    gen_lowpart* routines are conceptually lvalue or rvalue subreg
1969    operations.  */
1970
1971 rtx
1972 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
1973 {
1974   enum machine_mode int_mode, src_int_mode;
1975
1976   if (mode == src_mode)
1977     return src;
1978
1979   if (CONSTANT_P (src))
1980     {
1981       /* simplify_gen_subreg can't be used here, as if simplify_subreg
1982          fails, it will happily create (subreg (symbol_ref)) or similar
1983          invalid SUBREGs.  */
1984       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
1985       rtx ret = simplify_subreg (mode, src, src_mode, byte);
1986       if (ret)
1987         return ret;
1988
1989       if (GET_MODE (src) == VOIDmode
1990           || !validate_subreg (mode, src_mode, src, byte))
1991         return NULL_RTX;
1992
1993       src = force_reg (GET_MODE (src), src);
1994       return gen_rtx_SUBREG (mode, src, byte);
1995     }
1996
1997   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
1998     return NULL_RTX;
1999
2000   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2001       && MODES_TIEABLE_P (mode, src_mode))
2002     {
2003       rtx x = gen_lowpart_common (mode, src);
2004       if (x)
2005         return x;
2006     }
2007
2008   src_int_mode = int_mode_for_mode (src_mode);
2009   int_mode = int_mode_for_mode (mode);
2010   if (src_int_mode == BLKmode || int_mode == BLKmode)
2011     return NULL_RTX;
2012
2013   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2014     return NULL_RTX;
2015   if (!MODES_TIEABLE_P (int_mode, mode))
2016     return NULL_RTX;
2017
2018   src = gen_lowpart (src_int_mode, src);
2019   src = convert_modes (int_mode, src_int_mode, src, true);
2020   src = gen_lowpart (mode, src);
2021   return src;
2022 }
2023 \f
2024 /* Add INC into TARGET.  */
2025
2026 void
2027 expand_inc (rtx target, rtx inc)
2028 {
2029   rtx value = expand_binop (GET_MODE (target), add_optab,
2030                             target, inc,
2031                             target, 0, OPTAB_LIB_WIDEN);
2032   if (value != target)
2033     emit_move_insn (target, value);
2034 }
2035
2036 /* Subtract DEC from TARGET.  */
2037
2038 void
2039 expand_dec (rtx target, rtx dec)
2040 {
2041   rtx value = expand_binop (GET_MODE (target), sub_optab,
2042                             target, dec,
2043                             target, 0, OPTAB_LIB_WIDEN);
2044   if (value != target)
2045     emit_move_insn (target, value);
2046 }
2047 \f
2048 /* Output a shift instruction for expression code CODE,
2049    with SHIFTED being the rtx for the value to shift,
2050    and AMOUNT the tree for the amount to shift by.
2051    Store the result in the rtx TARGET, if that is convenient.
2052    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2053    Return the rtx for where the value is.  */
2054
2055 rtx
2056 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2057               tree amount, rtx target, int unsignedp)
2058 {
2059   rtx op1, temp = 0;
2060   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2061   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2062   optab lshift_optab = ashl_optab;
2063   optab rshift_arith_optab = ashr_optab;
2064   optab rshift_uns_optab = lshr_optab;
2065   optab lrotate_optab = rotl_optab;
2066   optab rrotate_optab = rotr_optab;
2067   enum machine_mode op1_mode;
2068   int attempt;
2069   bool speed = optimize_insn_for_speed_p ();
2070
2071   op1 = expand_normal (amount);
2072   op1_mode = GET_MODE (op1);
2073
2074   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2075      shift amount is a vector, use the vector/vector shift patterns.  */
2076   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2077     {
2078       lshift_optab = vashl_optab;
2079       rshift_arith_optab = vashr_optab;
2080       rshift_uns_optab = vlshr_optab;
2081       lrotate_optab = vrotl_optab;
2082       rrotate_optab = vrotr_optab;
2083     }
2084
2085   /* Previously detected shift-counts computed by NEGATE_EXPR
2086      and shifted in the other direction; but that does not work
2087      on all machines.  */
2088
2089   if (SHIFT_COUNT_TRUNCATED)
2090     {
2091       if (CONST_INT_P (op1)
2092           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2093               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2094         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2095                        % GET_MODE_BITSIZE (mode));
2096       else if (GET_CODE (op1) == SUBREG
2097                && subreg_lowpart_p (op1)
2098                && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (op1))))
2099         op1 = SUBREG_REG (op1);
2100     }
2101
2102   if (op1 == const0_rtx)
2103     return shifted;
2104
2105   /* Check whether its cheaper to implement a left shift by a constant
2106      bit count by a sequence of additions.  */
2107   if (code == LSHIFT_EXPR
2108       && CONST_INT_P (op1)
2109       && INTVAL (op1) > 0
2110       && INTVAL (op1) < GET_MODE_BITSIZE (mode)
2111       && INTVAL (op1) < MAX_BITS_PER_WORD
2112       && shift_cost[speed][mode][INTVAL (op1)] > INTVAL (op1) * add_cost[speed][mode]
2113       && shift_cost[speed][mode][INTVAL (op1)] != MAX_COST)
2114     {
2115       int i;
2116       for (i = 0; i < INTVAL (op1); i++)
2117         {
2118           temp = force_reg (mode, shifted);
2119           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2120                                   unsignedp, OPTAB_LIB_WIDEN);
2121         }
2122       return shifted;
2123     }
2124
2125   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2126     {
2127       enum optab_methods methods;
2128
2129       if (attempt == 0)
2130         methods = OPTAB_DIRECT;
2131       else if (attempt == 1)
2132         methods = OPTAB_WIDEN;
2133       else
2134         methods = OPTAB_LIB_WIDEN;
2135
2136       if (rotate)
2137         {
2138           /* Widening does not work for rotation.  */
2139           if (methods == OPTAB_WIDEN)
2140             continue;
2141           else if (methods == OPTAB_LIB_WIDEN)
2142             {
2143               /* If we have been unable to open-code this by a rotation,
2144                  do it as the IOR of two shifts.  I.e., to rotate A
2145                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2146                  where C is the bitsize of A.
2147
2148                  It is theoretically possible that the target machine might
2149                  not be able to perform either shift and hence we would
2150                  be making two libcalls rather than just the one for the
2151                  shift (similarly if IOR could not be done).  We will allow
2152                  this extremely unlikely lossage to avoid complicating the
2153                  code below.  */
2154
2155               rtx subtarget = target == shifted ? 0 : target;
2156               tree new_amount, other_amount;
2157               rtx temp1;
2158               tree type = TREE_TYPE (amount);
2159               if (GET_MODE (op1) != TYPE_MODE (type)
2160                   && GET_MODE (op1) != VOIDmode)
2161                 op1 = convert_to_mode (TYPE_MODE (type), op1, 1);
2162               new_amount = make_tree (type, op1);
2163               other_amount
2164                 = fold_build2 (MINUS_EXPR, type,
2165                                build_int_cst (type, GET_MODE_BITSIZE (mode)),
2166                                new_amount);
2167
2168               shifted = force_reg (mode, shifted);
2169
2170               temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2171                                    mode, shifted, new_amount, 0, 1);
2172               temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2173                                     mode, shifted, other_amount, subtarget, 1);
2174               return expand_binop (mode, ior_optab, temp, temp1, target,
2175                                    unsignedp, methods);
2176             }
2177
2178           temp = expand_binop (mode,
2179                                left ? lrotate_optab : rrotate_optab,
2180                                shifted, op1, target, unsignedp, methods);
2181         }
2182       else if (unsignedp)
2183         temp = expand_binop (mode,
2184                              left ? lshift_optab : rshift_uns_optab,
2185                              shifted, op1, target, unsignedp, methods);
2186
2187       /* Do arithmetic shifts.
2188          Also, if we are going to widen the operand, we can just as well
2189          use an arithmetic right-shift instead of a logical one.  */
2190       if (temp == 0 && ! rotate
2191           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2192         {
2193           enum optab_methods methods1 = methods;
2194
2195           /* If trying to widen a log shift to an arithmetic shift,
2196              don't accept an arithmetic shift of the same size.  */
2197           if (unsignedp)
2198             methods1 = OPTAB_MUST_WIDEN;
2199
2200           /* Arithmetic shift */
2201
2202           temp = expand_binop (mode,
2203                                left ? lshift_optab : rshift_arith_optab,
2204                                shifted, op1, target, unsignedp, methods1);
2205         }
2206
2207       /* We used to try extzv here for logical right shifts, but that was
2208          only useful for one machine, the VAX, and caused poor code
2209          generation there for lshrdi3, so the code was deleted and a
2210          define_expand for lshrsi3 was added to vax.md.  */
2211     }
2212
2213   gcc_assert (temp);
2214   return temp;
2215 }
2216 \f
2217 /* Indicates the type of fixup needed after a constant multiplication.
2218    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2219    the result should be negated, and ADD_VARIANT means that the
2220    multiplicand should be added to the result.  */
2221 enum mult_variant {basic_variant, negate_variant, add_variant};
2222
2223 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2224                         const struct mult_cost *, enum machine_mode mode);
2225 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2226                                  struct algorithm *, enum mult_variant *, int);
2227 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2228                               const struct algorithm *, enum mult_variant);
2229 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2230                                                  int, rtx *, int *, int *);
2231 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2232 static rtx extract_high_half (enum machine_mode, rtx);
2233 static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2234 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2235                                        int, int);
2236 /* Compute and return the best algorithm for multiplying by T.
2237    The algorithm must cost less than cost_limit
2238    If retval.cost >= COST_LIMIT, no algorithm was found and all
2239    other field of the returned struct are undefined.
2240    MODE is the machine mode of the multiplication.  */
2241
2242 static void
2243 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2244             const struct mult_cost *cost_limit, enum machine_mode mode)
2245 {
2246   int m;
2247   struct algorithm *alg_in, *best_alg;
2248   struct mult_cost best_cost;
2249   struct mult_cost new_limit;
2250   int op_cost, op_latency;
2251   unsigned HOST_WIDE_INT orig_t = t;
2252   unsigned HOST_WIDE_INT q;
2253   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2254   int hash_index;
2255   bool cache_hit = false;
2256   enum alg_code cache_alg = alg_zero;
2257   bool speed = optimize_insn_for_speed_p ();
2258
2259   /* Indicate that no algorithm is yet found.  If no algorithm
2260      is found, this value will be returned and indicate failure.  */
2261   alg_out->cost.cost = cost_limit->cost + 1;
2262   alg_out->cost.latency = cost_limit->latency + 1;
2263
2264   if (cost_limit->cost < 0
2265       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2266     return;
2267
2268   /* Restrict the bits of "t" to the multiplication's mode.  */
2269   t &= GET_MODE_MASK (mode);
2270
2271   /* t == 1 can be done in zero cost.  */
2272   if (t == 1)
2273     {
2274       alg_out->ops = 1;
2275       alg_out->cost.cost = 0;
2276       alg_out->cost.latency = 0;
2277       alg_out->op[0] = alg_m;
2278       return;
2279     }
2280
2281   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2282      fail now.  */
2283   if (t == 0)
2284     {
2285       if (MULT_COST_LESS (cost_limit, zero_cost[speed]))
2286         return;
2287       else
2288         {
2289           alg_out->ops = 1;
2290           alg_out->cost.cost = zero_cost[speed];
2291           alg_out->cost.latency = zero_cost[speed];
2292           alg_out->op[0] = alg_zero;
2293           return;
2294         }
2295     }
2296
2297   /* We'll be needing a couple extra algorithm structures now.  */
2298
2299   alg_in = XALLOCA (struct algorithm);
2300   best_alg = XALLOCA (struct algorithm);
2301   best_cost = *cost_limit;
2302
2303   /* Compute the hash index.  */
2304   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2305
2306   /* See if we already know what to do for T.  */
2307   if (alg_hash[hash_index].t == t
2308       && alg_hash[hash_index].mode == mode
2309       && alg_hash[hash_index].mode == mode
2310       && alg_hash[hash_index].speed == speed
2311       && alg_hash[hash_index].alg != alg_unknown)
2312     {
2313       cache_alg = alg_hash[hash_index].alg;
2314
2315       if (cache_alg == alg_impossible)
2316         {
2317           /* The cache tells us that it's impossible to synthesize
2318              multiplication by T within alg_hash[hash_index].cost.  */
2319           if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit))
2320             /* COST_LIMIT is at least as restrictive as the one
2321                recorded in the hash table, in which case we have no
2322                hope of synthesizing a multiplication.  Just
2323                return.  */
2324             return;
2325
2326           /* If we get here, COST_LIMIT is less restrictive than the
2327              one recorded in the hash table, so we may be able to
2328              synthesize a multiplication.  Proceed as if we didn't
2329              have the cache entry.  */
2330         }
2331       else
2332         {
2333           if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost))
2334             /* The cached algorithm shows that this multiplication
2335                requires more cost than COST_LIMIT.  Just return.  This
2336                way, we don't clobber this cache entry with
2337                alg_impossible but retain useful information.  */
2338             return;
2339
2340           cache_hit = true;
2341
2342           switch (cache_alg)
2343             {
2344             case alg_shift:
2345               goto do_alg_shift;
2346
2347             case alg_add_t_m2:
2348             case alg_sub_t_m2:
2349               goto do_alg_addsub_t_m2;
2350
2351             case alg_add_factor:
2352             case alg_sub_factor:
2353               goto do_alg_addsub_factor;
2354
2355             case alg_add_t2_m:
2356               goto do_alg_add_t2_m;
2357
2358             case alg_sub_t2_m:
2359               goto do_alg_sub_t2_m;
2360
2361             default:
2362               gcc_unreachable ();
2363             }
2364         }
2365     }
2366
2367   /* If we have a group of zero bits at the low-order part of T, try
2368      multiplying by the remaining bits and then doing a shift.  */
2369
2370   if ((t & 1) == 0)
2371     {
2372     do_alg_shift:
2373       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2374       if (m < maxm)
2375         {
2376           q = t >> m;
2377           /* The function expand_shift will choose between a shift and
2378              a sequence of additions, so the observed cost is given as
2379              MIN (m * add_cost[speed][mode], shift_cost[speed][mode][m]).  */
2380           op_cost = m * add_cost[speed][mode];
2381           if (shift_cost[speed][mode][m] < op_cost)
2382             op_cost = shift_cost[speed][mode][m];
2383           new_limit.cost = best_cost.cost - op_cost;
2384           new_limit.latency = best_cost.latency - op_cost;
2385           synth_mult (alg_in, q, &new_limit, mode);
2386
2387           alg_in->cost.cost += op_cost;
2388           alg_in->cost.latency += op_cost;
2389           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2390             {
2391               struct algorithm *x;
2392               best_cost = alg_in->cost;
2393               x = alg_in, alg_in = best_alg, best_alg = x;
2394               best_alg->log[best_alg->ops] = m;
2395               best_alg->op[best_alg->ops] = alg_shift;
2396             }
2397
2398           /* See if treating ORIG_T as a signed number yields a better
2399              sequence.  Try this sequence only for a negative ORIG_T
2400              as it would be useless for a non-negative ORIG_T.  */
2401           if ((HOST_WIDE_INT) orig_t < 0)
2402             {
2403               /* Shift ORIG_T as follows because a right shift of a
2404                  negative-valued signed type is implementation
2405                  defined.  */
2406               q = ~(~orig_t >> m);
2407               /* The function expand_shift will choose between a shift
2408                  and a sequence of additions, so the observed cost is
2409                  given as MIN (m * add_cost[speed][mode],
2410                  shift_cost[speed][mode][m]).  */
2411               op_cost = m * add_cost[speed][mode];
2412               if (shift_cost[speed][mode][m] < op_cost)
2413                 op_cost = shift_cost[speed][mode][m];
2414               new_limit.cost = best_cost.cost - op_cost;
2415               new_limit.latency = best_cost.latency - op_cost;
2416               synth_mult (alg_in, q, &new_limit, mode);
2417
2418               alg_in->cost.cost += op_cost;
2419               alg_in->cost.latency += op_cost;
2420               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2421                 {
2422                   struct algorithm *x;
2423                   best_cost = alg_in->cost;
2424                   x = alg_in, alg_in = best_alg, best_alg = x;
2425                   best_alg->log[best_alg->ops] = m;
2426                   best_alg->op[best_alg->ops] = alg_shift;
2427                 }
2428             }
2429         }
2430       if (cache_hit)
2431         goto done;
2432     }
2433
2434   /* If we have an odd number, add or subtract one.  */
2435   if ((t & 1) != 0)
2436     {
2437       unsigned HOST_WIDE_INT w;
2438
2439     do_alg_addsub_t_m2:
2440       for (w = 1; (w & t) != 0; w <<= 1)
2441         ;
2442       /* If T was -1, then W will be zero after the loop.  This is another
2443          case where T ends with ...111.  Handling this with (T + 1) and
2444          subtract 1 produces slightly better code and results in algorithm
2445          selection much faster than treating it like the ...0111 case
2446          below.  */
2447       if (w == 0
2448           || (w > 2
2449               /* Reject the case where t is 3.
2450                  Thus we prefer addition in that case.  */
2451               && t != 3))
2452         {
2453           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2454
2455           op_cost = add_cost[speed][mode];
2456           new_limit.cost = best_cost.cost - op_cost;
2457           new_limit.latency = best_cost.latency - op_cost;
2458           synth_mult (alg_in, t + 1, &new_limit, mode);
2459
2460           alg_in->cost.cost += op_cost;
2461           alg_in->cost.latency += op_cost;
2462           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2463             {
2464               struct algorithm *x;
2465               best_cost = alg_in->cost;
2466               x = alg_in, alg_in = best_alg, best_alg = x;
2467               best_alg->log[best_alg->ops] = 0;
2468               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2469             }
2470         }
2471       else
2472         {
2473           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2474
2475           op_cost = add_cost[speed][mode];
2476           new_limit.cost = best_cost.cost - op_cost;
2477           new_limit.latency = best_cost.latency - op_cost;
2478           synth_mult (alg_in, t - 1, &new_limit, mode);
2479
2480           alg_in->cost.cost += op_cost;
2481           alg_in->cost.latency += op_cost;
2482           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2483             {
2484               struct algorithm *x;
2485               best_cost = alg_in->cost;
2486               x = alg_in, alg_in = best_alg, best_alg = x;
2487               best_alg->log[best_alg->ops] = 0;
2488               best_alg->op[best_alg->ops] = alg_add_t_m2;
2489             }
2490         }
2491
2492       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2493          quickly with a - a * n for some appropriate constant n.  */
2494       m = exact_log2 (-orig_t + 1);
2495       if (m >= 0 && m < maxm)
2496         {
2497           op_cost = shiftsub1_cost[speed][mode][m];
2498           new_limit.cost = best_cost.cost - op_cost;
2499           new_limit.latency = best_cost.latency - op_cost;
2500           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m, &new_limit, mode);
2501
2502           alg_in->cost.cost += op_cost;
2503           alg_in->cost.latency += op_cost;
2504           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2505             {
2506               struct algorithm *x;
2507               best_cost = alg_in->cost;
2508               x = alg_in, alg_in = best_alg, best_alg = x;
2509               best_alg->log[best_alg->ops] = m;
2510               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2511             }
2512         }
2513
2514       if (cache_hit)
2515         goto done;
2516     }
2517
2518   /* Look for factors of t of the form
2519      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2520      If we find such a factor, we can multiply by t using an algorithm that
2521      multiplies by q, shift the result by m and add/subtract it to itself.
2522
2523      We search for large factors first and loop down, even if large factors
2524      are less probable than small; if we find a large factor we will find a
2525      good sequence quickly, and therefore be able to prune (by decreasing
2526      COST_LIMIT) the search.  */
2527
2528  do_alg_addsub_factor:
2529   for (m = floor_log2 (t - 1); m >= 2; m--)
2530     {
2531       unsigned HOST_WIDE_INT d;
2532
2533       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2534       if (t % d == 0 && t > d && m < maxm
2535           && (!cache_hit || cache_alg == alg_add_factor))
2536         {
2537           /* If the target has a cheap shift-and-add instruction use
2538              that in preference to a shift insn followed by an add insn.
2539              Assume that the shift-and-add is "atomic" with a latency
2540              equal to its cost, otherwise assume that on superscalar
2541              hardware the shift may be executed concurrently with the
2542              earlier steps in the algorithm.  */
2543           op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2544           if (shiftadd_cost[speed][mode][m] < op_cost)
2545             {
2546               op_cost = shiftadd_cost[speed][mode][m];
2547               op_latency = op_cost;
2548             }
2549           else
2550             op_latency = add_cost[speed][mode];
2551
2552           new_limit.cost = best_cost.cost - op_cost;
2553           new_limit.latency = best_cost.latency - op_latency;
2554           synth_mult (alg_in, t / d, &new_limit, mode);
2555
2556           alg_in->cost.cost += op_cost;
2557           alg_in->cost.latency += op_latency;
2558           if (alg_in->cost.latency < op_cost)
2559             alg_in->cost.latency = op_cost;
2560           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2561             {
2562               struct algorithm *x;
2563               best_cost = alg_in->cost;
2564               x = alg_in, alg_in = best_alg, best_alg = x;
2565               best_alg->log[best_alg->ops] = m;
2566               best_alg->op[best_alg->ops] = alg_add_factor;
2567             }
2568           /* Other factors will have been taken care of in the recursion.  */
2569           break;
2570         }
2571
2572       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2573       if (t % d == 0 && t > d && m < maxm
2574           && (!cache_hit || cache_alg == alg_sub_factor))
2575         {
2576           /* If the target has a cheap shift-and-subtract insn use
2577              that in preference to a shift insn followed by a sub insn.
2578              Assume that the shift-and-sub is "atomic" with a latency
2579              equal to it's cost, otherwise assume that on superscalar
2580              hardware the shift may be executed concurrently with the
2581              earlier steps in the algorithm.  */
2582           op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2583           if (shiftsub0_cost[speed][mode][m] < op_cost)
2584             {
2585               op_cost = shiftsub0_cost[speed][mode][m];
2586               op_latency = op_cost;
2587             }
2588           else
2589             op_latency = add_cost[speed][mode];
2590
2591           new_limit.cost = best_cost.cost - op_cost;
2592           new_limit.latency = best_cost.latency - op_latency;
2593           synth_mult (alg_in, t / d, &new_limit, mode);
2594
2595           alg_in->cost.cost += op_cost;
2596           alg_in->cost.latency += op_latency;
2597           if (alg_in->cost.latency < op_cost)
2598             alg_in->cost.latency = op_cost;
2599           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2600             {
2601               struct algorithm *x;
2602               best_cost = alg_in->cost;
2603               x = alg_in, alg_in = best_alg, best_alg = x;
2604               best_alg->log[best_alg->ops] = m;
2605               best_alg->op[best_alg->ops] = alg_sub_factor;
2606             }
2607           break;
2608         }
2609     }
2610   if (cache_hit)
2611     goto done;
2612
2613   /* Try shift-and-add (load effective address) instructions,
2614      i.e. do a*3, a*5, a*9.  */
2615   if ((t & 1) != 0)
2616     {
2617     do_alg_add_t2_m:
2618       q = t - 1;
2619       q = q & -q;
2620       m = exact_log2 (q);
2621       if (m >= 0 && m < maxm)
2622         {
2623           op_cost = shiftadd_cost[speed][mode][m];
2624           new_limit.cost = best_cost.cost - op_cost;
2625           new_limit.latency = best_cost.latency - op_cost;
2626           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2627
2628           alg_in->cost.cost += op_cost;
2629           alg_in->cost.latency += op_cost;
2630           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2631             {
2632               struct algorithm *x;
2633               best_cost = alg_in->cost;
2634               x = alg_in, alg_in = best_alg, best_alg = x;
2635               best_alg->log[best_alg->ops] = m;
2636               best_alg->op[best_alg->ops] = alg_add_t2_m;
2637             }
2638         }
2639       if (cache_hit)
2640         goto done;
2641
2642     do_alg_sub_t2_m:
2643       q = t + 1;
2644       q = q & -q;
2645       m = exact_log2 (q);
2646       if (m >= 0 && m < maxm)
2647         {
2648           op_cost = shiftsub0_cost[speed][mode][m];
2649           new_limit.cost = best_cost.cost - op_cost;
2650           new_limit.latency = best_cost.latency - op_cost;
2651           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2652
2653           alg_in->cost.cost += op_cost;
2654           alg_in->cost.latency += op_cost;
2655           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2656             {
2657               struct algorithm *x;
2658               best_cost = alg_in->cost;
2659               x = alg_in, alg_in = best_alg, best_alg = x;
2660               best_alg->log[best_alg->ops] = m;
2661               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2662             }
2663         }
2664       if (cache_hit)
2665         goto done;
2666     }
2667
2668  done:
2669   /* If best_cost has not decreased, we have not found any algorithm.  */
2670   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2671     {
2672       /* We failed to find an algorithm.  Record alg_impossible for
2673          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2674          we are asked to find an algorithm for T within the same or
2675          lower COST_LIMIT, we can immediately return to the
2676          caller.  */
2677       alg_hash[hash_index].t = t;
2678       alg_hash[hash_index].mode = mode;
2679       alg_hash[hash_index].speed = speed;
2680       alg_hash[hash_index].alg = alg_impossible;
2681       alg_hash[hash_index].cost = *cost_limit;
2682       return;
2683     }
2684
2685   /* Cache the result.  */
2686   if (!cache_hit)
2687     {
2688       alg_hash[hash_index].t = t;
2689       alg_hash[hash_index].mode = mode;
2690       alg_hash[hash_index].speed = speed;
2691       alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2692       alg_hash[hash_index].cost.cost = best_cost.cost;
2693       alg_hash[hash_index].cost.latency = best_cost.latency;
2694     }
2695
2696   /* If we are getting a too long sequence for `struct algorithm'
2697      to record, make this search fail.  */
2698   if (best_alg->ops == MAX_BITS_PER_WORD)
2699     return;
2700
2701   /* Copy the algorithm from temporary space to the space at alg_out.
2702      We avoid using structure assignment because the majority of
2703      best_alg is normally undefined, and this is a critical function.  */
2704   alg_out->ops = best_alg->ops + 1;
2705   alg_out->cost = best_cost;
2706   memcpy (alg_out->op, best_alg->op,
2707           alg_out->ops * sizeof *alg_out->op);
2708   memcpy (alg_out->log, best_alg->log,
2709           alg_out->ops * sizeof *alg_out->log);
2710 }
2711 \f
2712 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2713    Try three variations:
2714
2715        - a shift/add sequence based on VAL itself
2716        - a shift/add sequence based on -VAL, followed by a negation
2717        - a shift/add sequence based on VAL - 1, followed by an addition.
2718
2719    Return true if the cheapest of these cost less than MULT_COST,
2720    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2721
2722 static bool
2723 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2724                      struct algorithm *alg, enum mult_variant *variant,
2725                      int mult_cost)
2726 {
2727   struct algorithm alg2;
2728   struct mult_cost limit;
2729   int op_cost;
2730   bool speed = optimize_insn_for_speed_p ();
2731
2732   /* Fail quickly for impossible bounds.  */
2733   if (mult_cost < 0)
2734     return false;
2735
2736   /* Ensure that mult_cost provides a reasonable upper bound.
2737      Any constant multiplication can be performed with less
2738      than 2 * bits additions.  */
2739   op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[speed][mode];
2740   if (mult_cost > op_cost)
2741     mult_cost = op_cost;
2742
2743   *variant = basic_variant;
2744   limit.cost = mult_cost;
2745   limit.latency = mult_cost;
2746   synth_mult (alg, val, &limit, mode);
2747
2748   /* This works only if the inverted value actually fits in an
2749      `unsigned int' */
2750   if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2751     {
2752       op_cost = neg_cost[speed][mode];
2753       if (MULT_COST_LESS (&alg->cost, mult_cost))
2754         {
2755           limit.cost = alg->cost.cost - op_cost;
2756           limit.latency = alg->cost.latency - op_cost;
2757         }
2758       else
2759         {
2760           limit.cost = mult_cost - op_cost;
2761           limit.latency = mult_cost - op_cost;
2762         }
2763
2764       synth_mult (&alg2, -val, &limit, mode);
2765       alg2.cost.cost += op_cost;
2766       alg2.cost.latency += op_cost;
2767       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2768         *alg = alg2, *variant = negate_variant;
2769     }
2770
2771   /* This proves very useful for division-by-constant.  */
2772   op_cost = add_cost[speed][mode];
2773   if (MULT_COST_LESS (&alg->cost, mult_cost))
2774     {
2775       limit.cost = alg->cost.cost - op_cost;
2776       limit.latency = alg->cost.latency - op_cost;
2777     }
2778   else
2779     {
2780       limit.cost = mult_cost - op_cost;
2781       limit.latency = mult_cost - op_cost;
2782     }
2783
2784   synth_mult (&alg2, val - 1, &limit, mode);
2785   alg2.cost.cost += op_cost;
2786   alg2.cost.latency += op_cost;
2787   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2788     *alg = alg2, *variant = add_variant;
2789
2790   return MULT_COST_LESS (&alg->cost, mult_cost);
2791 }
2792
2793 /* A subroutine of expand_mult, used for constant multiplications.
2794    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2795    convenient.  Use the shift/add sequence described by ALG and apply
2796    the final fixup specified by VARIANT.  */
2797
2798 static rtx
2799 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2800                    rtx target, const struct algorithm *alg,
2801                    enum mult_variant variant)
2802 {
2803   HOST_WIDE_INT val_so_far;
2804   rtx insn, accum, tem;
2805   int opno;
2806   enum machine_mode nmode;
2807
2808   /* Avoid referencing memory over and over and invalid sharing
2809      on SUBREGs.  */
2810   op0 = force_reg (mode, op0);
2811
2812   /* ACCUM starts out either as OP0 or as a zero, depending on
2813      the first operation.  */
2814
2815   if (alg->op[0] == alg_zero)
2816     {
2817       accum = copy_to_mode_reg (mode, const0_rtx);
2818       val_so_far = 0;
2819     }
2820   else if (alg->op[0] == alg_m)
2821     {
2822       accum = copy_to_mode_reg (mode, op0);
2823       val_so_far = 1;
2824     }
2825   else
2826     gcc_unreachable ();
2827
2828   for (opno = 1; opno < alg->ops; opno++)
2829     {
2830       int log = alg->log[opno];
2831       rtx shift_subtarget = optimize ? 0 : accum;
2832       rtx add_target
2833         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2834            && !optimize)
2835           ? target : 0;
2836       rtx accum_target = optimize ? 0 : accum;
2837
2838       switch (alg->op[opno])
2839         {
2840         case alg_shift:
2841           tem = expand_shift (LSHIFT_EXPR, mode, accum,
2842                               build_int_cst (NULL_TREE, log),
2843                               NULL_RTX, 0);
2844           /* REG_EQUAL note will be attached to the following insn.  */
2845           emit_move_insn (accum, tem);
2846           val_so_far <<= log;
2847           break;
2848
2849         case alg_add_t_m2:
2850           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2851                               build_int_cst (NULL_TREE, log),
2852                               NULL_RTX, 0);
2853           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2854                                  add_target ? add_target : accum_target);
2855           val_so_far += (HOST_WIDE_INT) 1 << log;
2856           break;
2857
2858         case alg_sub_t_m2:
2859           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2860                               build_int_cst (NULL_TREE, log),
2861                               NULL_RTX, 0);
2862           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2863                                  add_target ? add_target : accum_target);
2864           val_so_far -= (HOST_WIDE_INT) 1 << log;
2865           break;
2866
2867         case alg_add_t2_m:
2868           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2869                                 build_int_cst (NULL_TREE, log),
2870                                 shift_subtarget,
2871                                 0);
2872           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2873                                  add_target ? add_target : accum_target);
2874           val_so_far = (val_so_far << log) + 1;
2875           break;
2876
2877         case alg_sub_t2_m:
2878           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2879                                 build_int_cst (NULL_TREE, log),
2880                                 shift_subtarget, 0);
2881           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2882                                  add_target ? add_target : accum_target);
2883           val_so_far = (val_so_far << log) - 1;
2884           break;
2885
2886         case alg_add_factor:
2887           tem = expand_shift (LSHIFT_EXPR, mode, accum,
2888                               build_int_cst (NULL_TREE, log),
2889                               NULL_RTX, 0);
2890           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2891                                  add_target ? add_target : accum_target);
2892           val_so_far += val_so_far << log;
2893           break;
2894
2895         case alg_sub_factor:
2896           tem = expand_shift (LSHIFT_EXPR, mode, accum,
2897                               build_int_cst (NULL_TREE, log),
2898                               NULL_RTX, 0);
2899           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2900                                  (add_target
2901                                   ? add_target : (optimize ? 0 : tem)));
2902           val_so_far = (val_so_far << log) - val_so_far;
2903           break;
2904
2905         default:
2906           gcc_unreachable ();
2907         }
2908
2909       /* Write a REG_EQUAL note on the last insn so that we can cse
2910          multiplication sequences.  Note that if ACCUM is a SUBREG,
2911          we've set the inner register and must properly indicate
2912          that.  */
2913
2914       tem = op0, nmode = mode;
2915       if (GET_CODE (accum) == SUBREG)
2916         {
2917           nmode = GET_MODE (SUBREG_REG (accum));
2918           tem = gen_lowpart (nmode, op0);
2919         }
2920
2921       insn = get_last_insn ();
2922       set_unique_reg_note (insn, REG_EQUAL,
2923                            gen_rtx_MULT (nmode, tem,
2924                                          GEN_INT (val_so_far)));
2925     }
2926
2927   if (variant == negate_variant)
2928     {
2929       val_so_far = -val_so_far;
2930       accum = expand_unop (mode, neg_optab, accum, target, 0);
2931     }
2932   else if (variant == add_variant)
2933     {
2934       val_so_far = val_so_far + 1;
2935       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
2936     }
2937
2938   /* Compare only the bits of val and val_so_far that are significant
2939      in the result mode, to avoid sign-/zero-extension confusion.  */
2940   val &= GET_MODE_MASK (mode);
2941   val_so_far &= GET_MODE_MASK (mode);
2942   gcc_assert (val == val_so_far);
2943
2944   return accum;
2945 }
2946
2947 /* Perform a multiplication and return an rtx for the result.
2948    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
2949    TARGET is a suggestion for where to store the result (an rtx).
2950
2951    We check specially for a constant integer as OP1.
2952    If you want this check for OP0 as well, then before calling
2953    you should swap the two operands if OP0 would be constant.  */
2954
2955 rtx
2956 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
2957              int unsignedp)
2958 {
2959   enum mult_variant variant;
2960   struct algorithm algorithm;
2961   int max_cost;
2962   bool speed = optimize_insn_for_speed_p ();
2963
2964   /* Handling const0_rtx here allows us to use zero as a rogue value for
2965      coeff below.  */
2966   if (op1 == const0_rtx)
2967     return const0_rtx;
2968   if (op1 == const1_rtx)
2969     return op0;
2970   if (op1 == constm1_rtx)
2971     return expand_unop (mode,
2972                         GET_MODE_CLASS (mode) == MODE_INT
2973                         && !unsignedp && flag_trapv
2974                         ? negv_optab : neg_optab,
2975                         op0, target, 0);
2976
2977   /* These are the operations that are potentially turned into a sequence
2978      of shifts and additions.  */
2979   if (SCALAR_INT_MODE_P (mode)
2980       && (unsignedp || !flag_trapv))
2981     {
2982       HOST_WIDE_INT coeff = 0;
2983       rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
2984
2985       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
2986          less than or equal in size to `unsigned int' this doesn't matter.
2987          If the mode is larger than `unsigned int', then synth_mult works
2988          only if the constant value exactly fits in an `unsigned int' without
2989          any truncation.  This means that multiplying by negative values does
2990          not work; results are off by 2^32 on a 32 bit machine.  */
2991
2992       if (CONST_INT_P (op1))
2993         {
2994           /* Attempt to handle multiplication of DImode values by negative
2995              coefficients, by performing the multiplication by a positive
2996              multiplier and then inverting the result.  */
2997           if (INTVAL (op1) < 0
2998               && GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
2999             {
3000               /* Its safe to use -INTVAL (op1) even for INT_MIN, as the
3001                  result is interpreted as an unsigned coefficient.
3002                  Exclude cost of op0 from max_cost to match the cost
3003                  calculation of the synth_mult.  */
3004               max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET, speed)
3005                          - neg_cost[speed][mode];
3006               if (max_cost > 0
3007                   && choose_mult_variant (mode, -INTVAL (op1), &algorithm,
3008                                           &variant, max_cost))
3009                 {
3010                   rtx temp = expand_mult_const (mode, op0, -INTVAL (op1),
3011                                                 NULL_RTX, &algorithm,
3012                                                 variant);
3013                   return expand_unop (mode, neg_optab, temp, target, 0);
3014                 }
3015             }
3016           else coeff = INTVAL (op1);
3017         }
3018       else if (GET_CODE (op1) == CONST_DOUBLE)
3019         {
3020           /* If we are multiplying in DImode, it may still be a win
3021              to try to work with shifts and adds.  */
3022           if (CONST_DOUBLE_HIGH (op1) == 0
3023               && CONST_DOUBLE_LOW (op1) > 0)
3024             coeff = CONST_DOUBLE_LOW (op1);
3025           else if (CONST_DOUBLE_LOW (op1) == 0
3026                    && EXACT_POWER_OF_2_OR_ZERO_P (CONST_DOUBLE_HIGH (op1)))
3027             {
3028               int shift = floor_log2 (CONST_DOUBLE_HIGH (op1))
3029                           + HOST_BITS_PER_WIDE_INT;
3030               return expand_shift (LSHIFT_EXPR, mode, op0,
3031                                    build_int_cst (NULL_TREE, shift),
3032                                    target, unsignedp);
3033             }
3034         }
3035
3036       /* We used to test optimize here, on the grounds that it's better to
3037          produce a smaller program when -O is not used.  But this causes
3038          such a terrible slowdown sometimes that it seems better to always
3039          use synth_mult.  */
3040       if (coeff != 0)
3041         {
3042           /* Special case powers of two.  */
3043           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3044             return expand_shift (LSHIFT_EXPR, mode, op0,
3045                                  build_int_cst (NULL_TREE, floor_log2 (coeff)),
3046                                  target, unsignedp);
3047
3048           /* Exclude cost of op0 from max_cost to match the cost
3049              calculation of the synth_mult.  */
3050           max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET, speed);
3051           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3052                                    max_cost))
3053             return expand_mult_const (mode, op0, coeff, target,
3054                                       &algorithm, variant);
3055         }
3056     }
3057
3058   if (GET_CODE (op0) == CONST_DOUBLE)
3059     {
3060       rtx temp = op0;
3061       op0 = op1;
3062       op1 = temp;
3063     }
3064
3065   /* Expand x*2.0 as x+x.  */
3066   if (GET_CODE (op1) == CONST_DOUBLE
3067       && SCALAR_FLOAT_MODE_P (mode))
3068     {
3069       REAL_VALUE_TYPE d;
3070       REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3071
3072       if (REAL_VALUES_EQUAL (d, dconst2))
3073         {
3074           op0 = force_reg (GET_MODE (op0), op0);
3075           return expand_binop (mode, add_optab, op0, op0,
3076                                target, unsignedp, OPTAB_LIB_WIDEN);
3077         }
3078     }
3079
3080   /* This used to use umul_optab if unsigned, but for non-widening multiply
3081      there is no difference between signed and unsigned.  */
3082   op0 = expand_binop (mode,
3083                       ! unsignedp
3084                       && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3085                       ? smulv_optab : smul_optab,
3086                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3087   gcc_assert (op0);
3088   return op0;
3089 }
3090
3091 /* Perform a widening multiplication and return an rtx for the result.
3092    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3093    TARGET is a suggestion for where to store the result (an rtx).
3094    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3095    or smul_widen_optab.
3096
3097    We check specially for a constant integer as OP1, comparing the
3098    cost of a widening multiply against the cost of a sequence of shifts
3099    and adds.  */
3100
3101 rtx
3102 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3103                       int unsignedp, optab this_optab)
3104 {
3105   bool speed = optimize_insn_for_speed_p ();
3106   rtx cop1;
3107
3108   if (CONST_INT_P (op1)
3109       && GET_MODE (op0) != VOIDmode
3110       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3111                                 this_optab == umul_widen_optab))
3112       && CONST_INT_P (cop1)
3113       && (INTVAL (cop1) >= 0
3114           || GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT))
3115     {
3116       HOST_WIDE_INT coeff = INTVAL (cop1);
3117       int max_cost;
3118       enum mult_variant variant;
3119       struct algorithm algorithm;
3120
3121       /* Special case powers of two.  */
3122       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3123         {
3124           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3125           return expand_shift (LSHIFT_EXPR, mode, op0,
3126                                build_int_cst (NULL_TREE, floor_log2 (coeff)),
3127                                target, unsignedp);
3128         }
3129
3130       /* Exclude cost of op0 from max_cost to match the cost
3131          calculation of the synth_mult.  */
3132       max_cost = mul_widen_cost[speed][mode];
3133       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3134                                max_cost))
3135         {
3136           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3137           return expand_mult_const (mode, op0, coeff, target,
3138                                     &algorithm, variant);
3139         }
3140     }
3141   return expand_binop (mode, this_optab, op0, op1, target,
3142                        unsignedp, OPTAB_LIB_WIDEN);
3143 }
3144 \f
3145 /* Return the smallest n such that 2**n >= X.  */
3146
3147 int
3148 ceil_log2 (unsigned HOST_WIDE_INT x)
3149 {
3150   return floor_log2 (x - 1) + 1;
3151 }
3152
3153 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3154    replace division by D, and put the least significant N bits of the result
3155    in *MULTIPLIER_PTR and return the most significant bit.
3156
3157    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3158    needed precision is in PRECISION (should be <= N).
3159
3160    PRECISION should be as small as possible so this function can choose
3161    multiplier more freely.
3162
3163    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3164    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3165
3166    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3167    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3168
3169 static
3170 unsigned HOST_WIDE_INT
3171 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3172                    rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr)
3173 {
3174   HOST_WIDE_INT mhigh_hi, mlow_hi;
3175   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3176   int lgup, post_shift;
3177   int pow, pow2;
3178   unsigned HOST_WIDE_INT nl, dummy1;
3179   HOST_WIDE_INT nh, dummy2;
3180
3181   /* lgup = ceil(log2(divisor)); */
3182   lgup = ceil_log2 (d);
3183
3184   gcc_assert (lgup <= n);
3185
3186   pow = n + lgup;
3187   pow2 = n + lgup - precision;
3188
3189   /* We could handle this with some effort, but this case is much
3190      better handled directly with a scc insn, so rely on caller using
3191      that.  */
3192   gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3193
3194   /* mlow = 2^(N + lgup)/d */
3195  if (pow >= HOST_BITS_PER_WIDE_INT)
3196     {
3197       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3198       nl = 0;
3199     }
3200   else
3201     {
3202       nh = 0;
3203       nl = (unsigned HOST_WIDE_INT) 1 << pow;
3204     }
3205   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3206                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3207
3208   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3209   if (pow2 >= HOST_BITS_PER_WIDE_INT)
3210     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3211   else
3212     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3213   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3214                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3215
3216   gcc_assert (!mhigh_hi || nh - d < d);
3217   gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3218   /* Assert that mlow < mhigh.  */
3219   gcc_assert (mlow_hi < mhigh_hi
3220               || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3221
3222   /* If precision == N, then mlow, mhigh exceed 2^N
3223      (but they do not exceed 2^(N+1)).  */
3224
3225   /* Reduce to lowest terms.  */
3226   for (post_shift = lgup; post_shift > 0; post_shift--)
3227     {
3228       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3229       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3230       if (ml_lo >= mh_lo)
3231         break;
3232
3233       mlow_hi = 0;
3234       mlow_lo = ml_lo;
3235       mhigh_hi = 0;
3236       mhigh_lo = mh_lo;
3237     }
3238
3239   *post_shift_ptr = post_shift;
3240   *lgup_ptr = lgup;
3241   if (n < HOST_BITS_PER_WIDE_INT)
3242     {
3243       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3244       *multiplier_ptr = GEN_INT (mhigh_lo & mask);
3245       return mhigh_lo >= mask;
3246     }
3247   else
3248     {
3249       *multiplier_ptr = GEN_INT (mhigh_lo);
3250       return mhigh_hi;
3251     }
3252 }
3253
3254 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3255    congruent to 1 (mod 2**N).  */
3256
3257 static unsigned HOST_WIDE_INT
3258 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3259 {
3260   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3261
3262   /* The algorithm notes that the choice y = x satisfies
3263      x*y == 1 mod 2^3, since x is assumed odd.
3264      Each iteration doubles the number of bits of significance in y.  */
3265
3266   unsigned HOST_WIDE_INT mask;
3267   unsigned HOST_WIDE_INT y = x;
3268   int nbit = 3;
3269
3270   mask = (n == HOST_BITS_PER_WIDE_INT
3271           ? ~(unsigned HOST_WIDE_INT) 0
3272           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3273
3274   while (nbit < n)
3275     {
3276       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3277       nbit *= 2;
3278     }
3279   return y;
3280 }
3281
3282 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3283    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3284    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3285    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3286    become signed.
3287
3288    The result is put in TARGET if that is convenient.
3289
3290    MODE is the mode of operation.  */
3291
3292 rtx
3293 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3294                              rtx op1, rtx target, int unsignedp)
3295 {
3296   rtx tem;
3297   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3298
3299   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3300                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3301                       NULL_RTX, 0);
3302   tem = expand_and (mode, tem, op1, NULL_RTX);
3303   adj_operand
3304     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3305                      adj_operand);
3306
3307   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3308                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3309                       NULL_RTX, 0);
3310   tem = expand_and (mode, tem, op0, NULL_RTX);
3311   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3312                           target);
3313
3314   return target;
3315 }
3316
3317 /* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */
3318
3319 static rtx
3320 extract_high_half (enum machine_mode mode, rtx op)
3321 {
3322   enum machine_mode wider_mode;
3323
3324   if (mode == word_mode)
3325     return gen_highpart (mode, op);
3326
3327   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3328
3329   wider_mode = GET_MODE_WIDER_MODE (mode);
3330   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3331                      build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode)), 0, 1);
3332   return convert_modes (mode, wider_mode, op, 0);
3333 }
3334
3335 /* Like expand_mult_highpart, but only consider using a multiplication
3336    optab.  OP1 is an rtx for the constant operand.  */
3337
3338 static rtx
3339 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3340                             rtx target, int unsignedp, int max_cost)
3341 {
3342   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3343   enum machine_mode wider_mode;
3344   optab moptab;
3345   rtx tem;
3346   int size;
3347   bool speed = optimize_insn_for_speed_p ();
3348
3349   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3350
3351   wider_mode = GET_MODE_WIDER_MODE (mode);
3352   size = GET_MODE_BITSIZE (mode);
3353
3354   /* Firstly, try using a multiplication insn that only generates the needed
3355      high part of the product, and in the sign flavor of unsignedp.  */
3356   if (mul_highpart_cost[speed][mode] < max_cost)
3357     {
3358       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3359       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3360                           unsignedp, OPTAB_DIRECT);
3361       if (tem)
3362         return tem;
3363     }
3364
3365   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3366      Need to adjust the result after the multiplication.  */
3367   if (size - 1 < BITS_PER_WORD
3368       && (mul_highpart_cost[speed][mode] + 2 * shift_cost[speed][mode][size-1]
3369           + 4 * add_cost[speed][mode] < max_cost))
3370     {
3371       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3372       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3373                           unsignedp, OPTAB_DIRECT);
3374       if (tem)
3375         /* We used the wrong signedness.  Adjust the result.  */
3376         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3377                                             tem, unsignedp);
3378     }
3379
3380   /* Try widening multiplication.  */
3381   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3382   if (optab_handler (moptab, wider_mode) != CODE_FOR_nothing
3383       && mul_widen_cost[speed][wider_mode] < max_cost)
3384     {
3385       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3386                           unsignedp, OPTAB_WIDEN);
3387       if (tem)
3388         return extract_high_half (mode, tem);
3389     }
3390
3391   /* Try widening the mode and perform a non-widening multiplication.  */
3392   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3393       && size - 1 < BITS_PER_WORD
3394       && mul_cost[speed][wider_mode] + shift_cost[speed][mode][size-1] < max_cost)
3395     {
3396       rtx insns, wop0, wop1;
3397
3398       /* We need to widen the operands, for example to ensure the
3399          constant multiplier is correctly sign or zero extended.
3400          Use a sequence to clean-up any instructions emitted by
3401          the conversions if things don't work out.  */
3402       start_sequence ();
3403       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3404       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3405       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3406                           unsignedp, OPTAB_WIDEN);
3407       insns = get_insns ();
3408       end_sequence ();
3409
3410       if (tem)
3411         {
3412           emit_insn (insns);
3413           return extract_high_half (mode, tem);
3414         }
3415     }
3416
3417   /* Try widening multiplication of opposite signedness, and adjust.  */
3418   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3419   if (optab_handler (moptab, wider_mode) != CODE_FOR_nothing
3420       && size - 1 < BITS_PER_WORD
3421       && (mul_widen_cost[speed][wider_mode] + 2 * shift_cost[speed][mode][size-1]
3422           + 4 * add_cost[speed][mode] < max_cost))
3423     {
3424       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3425                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3426       if (tem != 0)
3427         {
3428           tem = extract_high_half (mode, tem);
3429           /* We used the wrong signedness.  Adjust the result.  */
3430           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3431                                               target, unsignedp);
3432         }
3433     }
3434
3435   return 0;
3436 }
3437
3438 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3439    putting the high half of the result in TARGET if that is convenient,
3440    and return where the result is.  If the operation can not be performed,
3441    0 is returned.
3442
3443    MODE is the mode of operation and result.
3444
3445    UNSIGNEDP nonzero means unsigned multiply.
3446
3447    MAX_COST is the total allowed cost for the expanded RTL.  */
3448
3449 static rtx
3450 expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3451                       rtx target, int unsignedp, int max_cost)
3452 {
3453   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3454   unsigned HOST_WIDE_INT cnst1;
3455   int extra_cost;
3456   bool sign_adjust = false;
3457   enum mult_variant variant;
3458   struct algorithm alg;
3459   rtx tem;
3460   bool speed = optimize_insn_for_speed_p ();
3461
3462   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3463   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3464   gcc_assert (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT);
3465
3466   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3467
3468   /* We can't optimize modes wider than BITS_PER_WORD.
3469      ??? We might be able to perform double-word arithmetic if
3470      mode == word_mode, however all the cost calculations in
3471      synth_mult etc. assume single-word operations.  */
3472   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3473     return expand_mult_highpart_optab (mode, op0, op1, target,
3474                                        unsignedp, max_cost);
3475
3476   extra_cost = shift_cost[speed][mode][GET_MODE_BITSIZE (mode) - 1];
3477
3478   /* Check whether we try to multiply by a negative constant.  */
3479   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3480     {
3481       sign_adjust = true;
3482       extra_cost += add_cost[speed][mode];
3483     }
3484
3485   /* See whether shift/add multiplication is cheap enough.  */
3486   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3487                            max_cost - extra_cost))
3488     {
3489       /* See whether the specialized multiplication optabs are
3490          cheaper than the shift/add version.  */
3491       tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3492                                         alg.cost.cost + extra_cost);
3493       if (tem)
3494         return tem;
3495
3496       tem = convert_to_mode (wider_mode, op0, unsignedp);
3497       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3498       tem = extract_high_half (mode, tem);
3499
3500       /* Adjust result for signedness.  */
3501       if (sign_adjust)
3502         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3503
3504       return tem;
3505     }
3506   return expand_mult_highpart_optab (mode, op0, op1, target,
3507                                      unsignedp, max_cost);
3508 }
3509
3510
3511 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3512
3513 static rtx
3514 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3515 {
3516   unsigned HOST_WIDE_INT masklow, maskhigh;
3517   rtx result, temp, shift, label;
3518   int logd;
3519
3520   logd = floor_log2 (d);
3521   result = gen_reg_rtx (mode);
3522
3523   /* Avoid conditional branches when they're expensive.  */
3524   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3525       && optimize_insn_for_speed_p ())
3526     {
3527       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3528                                       mode, 0, -1);
3529       if (signmask)
3530         {
3531           signmask = force_reg (mode, signmask);
3532           masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3533           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3534
3535           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3536              which instruction sequence to use.  If logical right shifts
3537              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3538              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3539
3540           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3541           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3542               || rtx_cost (temp, SET, optimize_insn_for_speed_p ()) > COSTS_N_INSNS (2))
3543             {
3544               temp = expand_binop (mode, xor_optab, op0, signmask,
3545                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3546               temp = expand_binop (mode, sub_optab, temp, signmask,
3547                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3548               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3549                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3550               temp = expand_binop (mode, xor_optab, temp, signmask,
3551                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3552               temp = expand_binop (mode, sub_optab, temp, signmask,
3553                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3554             }
3555           else
3556             {
3557               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3558                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3559               signmask = force_reg (mode, signmask);
3560
3561               temp = expand_binop (mode, add_optab, op0, signmask,
3562                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3563               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3564                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3565               temp = expand_binop (mode, sub_optab, temp, signmask,
3566                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3567             }
3568           return temp;
3569         }
3570     }
3571
3572   /* Mask contains the mode's signbit and the significant bits of the
3573      modulus.  By including the signbit in the operation, many targets
3574      can avoid an explicit compare operation in the following comparison
3575      against zero.  */
3576
3577   masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3578   if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3579     {
3580       masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3581       maskhigh = -1;
3582     }
3583   else
3584     maskhigh = (HOST_WIDE_INT) -1
3585                  << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3586
3587   temp = expand_binop (mode, and_optab, op0,
3588                        immed_double_const (masklow, maskhigh, mode),
3589                        result, 1, OPTAB_LIB_WIDEN);
3590   if (temp != result)
3591     emit_move_insn (result, temp);
3592
3593   label = gen_label_rtx ();
3594   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3595
3596   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3597                        0, OPTAB_LIB_WIDEN);
3598   masklow = (HOST_WIDE_INT) -1 << logd;
3599   maskhigh = -1;
3600   temp = expand_binop (mode, ior_optab, temp,
3601                        immed_double_const (masklow, maskhigh, mode),
3602                        result, 1, OPTAB_LIB_WIDEN);
3603   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3604                        0, OPTAB_LIB_WIDEN);
3605   if (temp != result)
3606     emit_move_insn (result, temp);
3607   emit_label (label);
3608   return result;
3609 }
3610
3611 /* Expand signed division of OP0 by a power of two D in mode MODE.
3612    This routine is only called for positive values of D.  */
3613
3614 static rtx
3615 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3616 {
3617   rtx temp, label;
3618   tree shift;
3619   int logd;
3620
3621   logd = floor_log2 (d);
3622   shift = build_int_cst (NULL_TREE, logd);
3623
3624   if (d == 2
3625       && BRANCH_COST (optimize_insn_for_speed_p (),
3626                       false) >= 1)
3627     {
3628       temp = gen_reg_rtx (mode);
3629       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3630       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3631                            0, OPTAB_LIB_WIDEN);
3632       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3633     }
3634
3635 #ifdef HAVE_conditional_move
3636   if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3637       >= 2)
3638     {
3639       rtx temp2;
3640
3641       /* ??? emit_conditional_move forces a stack adjustment via
3642          compare_from_rtx so, if the sequence is discarded, it will
3643          be lost.  Do it now instead.  */
3644       do_pending_stack_adjust ();
3645
3646       start_sequence ();
3647       temp2 = copy_to_mode_reg (mode, op0);
3648       temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3649                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3650       temp = force_reg (mode, temp);
3651
3652       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3653       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3654                                      mode, temp, temp2, mode, 0);
3655       if (temp2)
3656         {
3657           rtx seq = get_insns ();
3658           end_sequence ();
3659           emit_insn (seq);
3660           return expand_shift (RSHIFT_EXPR, mode, temp2, shift, NULL_RTX, 0);
3661         }
3662       end_sequence ();
3663     }
3664 #endif
3665
3666   if (BRANCH_COST (optimize_insn_for_speed_p (),
3667                    false) >= 2)
3668     {
3669       int ushift = GET_MODE_BITSIZE (mode) - logd;
3670
3671       temp = gen_reg_rtx (mode);
3672       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3673       if (shift_cost[optimize_insn_for_speed_p ()][mode][ushift] > COSTS_N_INSNS (1))
3674         temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3675                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3676       else
3677         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3678                              build_int_cst (NULL_TREE, ushift),
3679                              NULL_RTX, 1);
3680       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3681                            0, OPTAB_LIB_WIDEN);
3682       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3683     }
3684
3685   label = gen_label_rtx ();
3686   temp = copy_to_mode_reg (mode, op0);
3687   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3688   expand_inc (temp, GEN_INT (d - 1));
3689   emit_label (label);
3690   return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3691 }
3692 \f
3693 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3694    if that is convenient, and returning where the result is.
3695    You may request either the quotient or the remainder as the result;
3696    specify REM_FLAG nonzero to get the remainder.
3697
3698    CODE is the expression code for which kind of division this is;
3699    it controls how rounding is done.  MODE is the machine mode to use.
3700    UNSIGNEDP nonzero means do unsigned division.  */
3701
3702 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3703    and then correct it by or'ing in missing high bits
3704    if result of ANDI is nonzero.
3705    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3706    This could optimize to a bfexts instruction.
3707    But C doesn't use these operations, so their optimizations are
3708    left for later.  */
3709 /* ??? For modulo, we don't actually need the highpart of the first product,
3710    the low part will do nicely.  And for small divisors, the second multiply
3711    can also be a low-part only multiply or even be completely left out.
3712    E.g. to calculate the remainder of a division by 3 with a 32 bit
3713    multiply, multiply with 0x55555556 and extract the upper two bits;
3714    the result is exact for inputs up to 0x1fffffff.
3715    The input range can be reduced by using cross-sum rules.
3716    For odd divisors >= 3, the following table gives right shift counts
3717    so that if a number is shifted by an integer multiple of the given
3718    amount, the remainder stays the same:
3719    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3720    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3721    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3722    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3723    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3724
3725    Cross-sum rules for even numbers can be derived by leaving as many bits
3726    to the right alone as the divisor has zeros to the right.
3727    E.g. if x is an unsigned 32 bit number:
3728    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3729    */
3730
3731 rtx
3732 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3733                rtx op0, rtx op1, rtx target, int unsignedp)
3734 {
3735   enum machine_mode compute_mode;
3736   rtx tquotient;
3737   rtx quotient = 0, remainder = 0;
3738   rtx last;
3739   int size;
3740   rtx insn, set;
3741   optab optab1, optab2;
3742   int op1_is_constant, op1_is_pow2 = 0;
3743   int max_cost, extra_cost;
3744   static HOST_WIDE_INT last_div_const = 0;
3745   static HOST_WIDE_INT ext_op1;
3746   bool speed = optimize_insn_for_speed_p ();
3747
3748   op1_is_constant = CONST_INT_P (op1);
3749   if (op1_is_constant)
3750     {
3751       ext_op1 = INTVAL (op1);
3752       if (unsignedp)
3753         ext_op1 &= GET_MODE_MASK (mode);
3754       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3755                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3756     }
3757
3758   /*
3759      This is the structure of expand_divmod:
3760
3761      First comes code to fix up the operands so we can perform the operations
3762      correctly and efficiently.
3763
3764      Second comes a switch statement with code specific for each rounding mode.
3765      For some special operands this code emits all RTL for the desired
3766      operation, for other cases, it generates only a quotient and stores it in
3767      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3768      to indicate that it has not done anything.
3769
3770      Last comes code that finishes the operation.  If QUOTIENT is set and
3771      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3772      QUOTIENT is not set, it is computed using trunc rounding.
3773
3774      We try to generate special code for division and remainder when OP1 is a
3775      constant.  If |OP1| = 2**n we can use shifts and some other fast
3776      operations.  For other values of OP1, we compute a carefully selected
3777      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3778      by m.
3779
3780      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3781      half of the product.  Different strategies for generating the product are
3782      implemented in expand_mult_highpart.
3783
3784      If what we actually want is the remainder, we generate that by another
3785      by-constant multiplication and a subtraction.  */
3786
3787   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3788      code below will malfunction if we are, so check here and handle
3789      the special case if so.  */
3790   if (op1 == const1_rtx)
3791     return rem_flag ? const0_rtx : op0;
3792
3793     /* When dividing by -1, we could get an overflow.
3794      negv_optab can handle overflows.  */
3795   if (! unsignedp && op1 == constm1_rtx)
3796     {
3797       if (rem_flag)
3798         return const0_rtx;
3799       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3800                           ? negv_optab : neg_optab, op0, target, 0);
3801     }
3802
3803   if (target
3804       /* Don't use the function value register as a target
3805          since we have to read it as well as write it,
3806          and function-inlining gets confused by this.  */
3807       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3808           /* Don't clobber an operand while doing a multi-step calculation.  */
3809           || ((rem_flag || op1_is_constant)
3810               && (reg_mentioned_p (target, op0)
3811                   || (MEM_P (op0) && MEM_P (target))))
3812           || reg_mentioned_p (target, op1)
3813           || (MEM_P (op1) && MEM_P (target))))
3814     target = 0;
3815
3816   /* Get the mode in which to perform this computation.  Normally it will
3817      be MODE, but sometimes we can't do the desired operation in MODE.
3818      If so, pick a wider mode in which we can do the operation.  Convert
3819      to that mode at the start to avoid repeated conversions.
3820
3821      First see what operations we need.  These depend on the expression
3822      we are evaluating.  (We assume that divxx3 insns exist under the
3823      same conditions that modxx3 insns and that these insns don't normally
3824      fail.  If these assumptions are not correct, we may generate less
3825      efficient code in some cases.)
3826
3827      Then see if we find a mode in which we can open-code that operation
3828      (either a division, modulus, or shift).  Finally, check for the smallest
3829      mode for which we can do the operation with a library call.  */
3830
3831   /* We might want to refine this now that we have division-by-constant
3832      optimization.  Since expand_mult_highpart tries so many variants, it is
3833      not straightforward to generalize this.  Maybe we should make an array
3834      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3835
3836   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3837             ? (unsignedp ? lshr_optab : ashr_optab)
3838             : (unsignedp ? udiv_optab : sdiv_optab));
3839   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3840             ? optab1
3841             : (unsignedp ? udivmod_optab : sdivmod_optab));
3842
3843   for (compute_mode = mode; compute_mode != VOIDmode;
3844        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3845     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3846         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
3847       break;
3848
3849   if (compute_mode == VOIDmode)
3850     for (compute_mode = mode; compute_mode != VOIDmode;
3851          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3852       if (optab_libfunc (optab1, compute_mode)
3853           || optab_libfunc (optab2, compute_mode))
3854         break;
3855
3856   /* If we still couldn't find a mode, use MODE, but expand_binop will
3857      probably die.  */
3858   if (compute_mode == VOIDmode)
3859     compute_mode = mode;
3860
3861   if (target && GET_MODE (target) == compute_mode)
3862     tquotient = target;
3863   else
3864     tquotient = gen_reg_rtx (compute_mode);
3865
3866   size = GET_MODE_BITSIZE (compute_mode);
3867 #if 0
3868   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3869      (mode), and thereby get better code when OP1 is a constant.  Do that
3870      later.  It will require going over all usages of SIZE below.  */
3871   size = GET_MODE_BITSIZE (mode);
3872 #endif
3873
3874   /* Only deduct something for a REM if the last divide done was
3875      for a different constant.   Then set the constant of the last
3876      divide.  */
3877   max_cost = unsignedp ? udiv_cost[speed][compute_mode] : sdiv_cost[speed][compute_mode];
3878   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
3879                      && INTVAL (op1) == last_div_const))
3880     max_cost -= mul_cost[speed][compute_mode] + add_cost[speed][compute_mode];
3881
3882   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3883
3884   /* Now convert to the best mode to use.  */
3885   if (compute_mode != mode)
3886     {
3887       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3888       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3889
3890       /* convert_modes may have placed op1 into a register, so we
3891          must recompute the following.  */
3892       op1_is_constant = CONST_INT_P (op1);
3893       op1_is_pow2 = (op1_is_constant
3894                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3895                           || (! unsignedp
3896                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3897     }
3898
3899   /* If one of the operands is a volatile MEM, copy it into a register.  */
3900
3901   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3902     op0 = force_reg (compute_mode, op0);
3903   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3904     op1 = force_reg (compute_mode, op1);
3905
3906   /* If we need the remainder or if OP1 is constant, we need to
3907      put OP0 in a register in case it has any queued subexpressions.  */
3908   if (rem_flag || op1_is_constant)
3909     op0 = force_reg (compute_mode, op0);
3910
3911   last = get_last_insn ();
3912
3913   /* Promote floor rounding to trunc rounding for unsigned operations.  */
3914   if (unsignedp)
3915     {
3916       if (code == FLOOR_DIV_EXPR)
3917         code = TRUNC_DIV_EXPR;
3918       if (code == FLOOR_MOD_EXPR)
3919         code = TRUNC_MOD_EXPR;
3920       if (code == EXACT_DIV_EXPR && op1_is_pow2)
3921         code = TRUNC_DIV_EXPR;
3922     }
3923
3924   if (op1 != const0_rtx)
3925     switch (code)
3926       {
3927       case TRUNC_MOD_EXPR:
3928       case TRUNC_DIV_EXPR:
3929         if (op1_is_constant)
3930           {
3931             if (unsignedp)
3932               {
3933                 unsigned HOST_WIDE_INT mh;
3934                 int pre_shift, post_shift;
3935                 int dummy;
3936                 rtx ml;
3937                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
3938                                             & GET_MODE_MASK (compute_mode));
3939
3940                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3941                   {
3942                     pre_shift = floor_log2 (d);
3943                     if (rem_flag)
3944                       {
3945                         remainder
3946                           = expand_binop (compute_mode, and_optab, op0,
3947                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3948                                           remainder, 1,
3949                                           OPTAB_LIB_WIDEN);
3950                         if (remainder)
3951                           return gen_lowpart (mode, remainder);
3952                       }
3953                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3954                                              build_int_cst (NULL_TREE,
3955                                                             pre_shift),
3956                                              tquotient, 1);
3957                   }
3958                 else if (size <= HOST_BITS_PER_WIDE_INT)
3959                   {
3960                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
3961                       {
3962                         /* Most significant bit of divisor is set; emit an scc
3963                            insn.  */
3964                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
3965                                                           compute_mode, 1, 1);
3966                       }
3967                     else
3968                       {
3969                         /* Find a suitable multiplier and right shift count
3970                            instead of multiplying with D.  */
3971
3972                         mh = choose_multiplier (d, size, size,
3973                                                 &ml, &post_shift, &dummy);
3974
3975                         /* If the suggested multiplier is more than SIZE bits,
3976                            we can do better for even divisors, using an
3977                            initial right shift.  */
3978                         if (mh != 0 && (d & 1) == 0)
3979                           {
3980                             pre_shift = floor_log2 (d & -d);
3981                             mh = choose_multiplier (d >> pre_shift, size,
3982                                                     size - pre_shift,
3983                                                     &ml, &post_shift, &dummy);
3984                             gcc_assert (!mh);
3985                           }
3986                         else
3987                           pre_shift = 0;
3988
3989                         if (mh != 0)
3990                           {
3991                             rtx t1, t2, t3, t4;
3992
3993                             if (post_shift - 1 >= BITS_PER_WORD)
3994                               goto fail1;
3995
3996                             extra_cost
3997                               = (shift_cost[speed][compute_mode][post_shift - 1]
3998                                  + shift_cost[speed][compute_mode][1]
3999                                  + 2 * add_cost[speed][compute_mode]);
4000                             t1 = expand_mult_highpart (compute_mode, op0, ml,
4001                                                        NULL_RTX, 1,
4002                                                        max_cost - extra_cost);
4003                             if (t1 == 0)
4004                               goto fail1;
4005                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4006                                                                op0, t1),
4007                                                 NULL_RTX);
4008                             t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
4009                                                integer_one_node, NULL_RTX, 1);
4010                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4011                                                               t1, t3),
4012                                                 NULL_RTX);
4013                             quotient = expand_shift
4014                               (RSHIFT_EXPR, compute_mode, t4,
4015                                build_int_cst (NULL_TREE, post_shift - 1),
4016                                tquotient, 1);
4017                           }
4018                         else
4019                           {
4020                             rtx t1, t2;
4021
4022                             if (pre_shift >= BITS_PER_WORD
4023                                 || post_shift >= BITS_PER_WORD)
4024                               goto fail1;
4025
4026                             t1 = expand_shift
4027                               (RSHIFT_EXPR, compute_mode, op0,
4028                                build_int_cst (NULL_TREE, pre_shift),
4029                                NULL_RTX, 1);
4030                             extra_cost
4031                               = (shift_cost[speed][compute_mode][pre_shift]
4032                                  + shift_cost[speed][compute_mode][post_shift]);
4033                             t2 = expand_mult_highpart (compute_mode, t1, ml,
4034                                                        NULL_RTX, 1,
4035                                                        max_cost - extra_cost);
4036                             if (t2 == 0)
4037                               goto fail1;
4038                             quotient = expand_shift
4039                               (RSHIFT_EXPR, compute_mode, t2,
4040                                build_int_cst (NULL_TREE, post_shift),
4041                                tquotient, 1);
4042                           }
4043                       }
4044                   }
4045                 else            /* Too wide mode to use tricky code */
4046                   break;
4047
4048                 insn = get_last_insn ();
4049                 if (insn != last
4050                     && (set = single_set (insn)) != 0
4051                     && SET_DEST (set) == quotient)
4052                   set_unique_reg_note (insn,
4053                                        REG_EQUAL,
4054                                        gen_rtx_UDIV (compute_mode, op0, op1));
4055               }
4056             else                /* TRUNC_DIV, signed */
4057               {
4058                 unsigned HOST_WIDE_INT ml;
4059                 int lgup, post_shift;
4060                 rtx mlr;
4061                 HOST_WIDE_INT d = INTVAL (op1);
4062                 unsigned HOST_WIDE_INT abs_d;
4063
4064                 /* Since d might be INT_MIN, we have to cast to
4065                    unsigned HOST_WIDE_INT before negating to avoid
4066                    undefined signed overflow.  */
4067                 abs_d = (d >= 0
4068                          ? (unsigned HOST_WIDE_INT) d
4069                          : - (unsigned HOST_WIDE_INT) d);
4070
4071                 /* n rem d = n rem -d */
4072                 if (rem_flag && d < 0)
4073                   {
4074                     d = abs_d;
4075                     op1 = gen_int_mode (abs_d, compute_mode);
4076                   }
4077
4078                 if (d == 1)
4079                   quotient = op0;
4080                 else if (d == -1)
4081                   quotient = expand_unop (compute_mode, neg_optab, op0,
4082                                           tquotient, 0);
4083                 else if (HOST_BITS_PER_WIDE_INT >= size
4084                          && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4085                   {
4086                     /* This case is not handled correctly below.  */
4087                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4088                                                 compute_mode, 1, 1);
4089                     if (quotient == 0)
4090                       goto fail1;
4091                   }
4092                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4093                          && (rem_flag ? smod_pow2_cheap[speed][compute_mode]
4094                                       : sdiv_pow2_cheap[speed][compute_mode])
4095                          /* We assume that cheap metric is true if the
4096                             optab has an expander for this mode.  */
4097                          && ((optab_handler ((rem_flag ? smod_optab
4098                                               : sdiv_optab),
4099                                              compute_mode)
4100                               != CODE_FOR_nothing)
4101                              || (optab_handler (sdivmod_optab,
4102                                                 compute_mode)
4103                                  != CODE_FOR_nothing)))
4104                   ;
4105                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4106                   {
4107                     if (rem_flag)
4108                       {
4109                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4110                         if (remainder)
4111                           return gen_lowpart (mode, remainder);
4112                       }
4113
4114                     if (sdiv_pow2_cheap[speed][compute_mode]
4115                         && ((optab_handler (sdiv_optab, compute_mode)
4116                              != CODE_FOR_nothing)
4117                             || (optab_handler (sdivmod_optab, compute_mode)
4118                                 != CODE_FOR_nothing)))
4119                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4120                                                 compute_mode, op0,
4121                                                 gen_int_mode (abs_d,
4122                                                               compute_mode),
4123                                                 NULL_RTX, 0);
4124                     else
4125                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4126
4127                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4128                        negate the quotient.  */
4129                     if (d < 0)
4130                       {
4131                         insn = get_last_insn ();
4132                         if (insn != last
4133                             && (set = single_set (insn)) != 0
4134                             && SET_DEST (set) == quotient
4135                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4136                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4137                           set_unique_reg_note (insn,
4138                                                REG_EQUAL,
4139                                                gen_rtx_DIV (compute_mode,
4140                                                             op0,
4141                                                             GEN_INT
4142                                                             (trunc_int_for_mode
4143                                                              (abs_d,
4144                                                               compute_mode))));
4145
4146                         quotient = expand_unop (compute_mode, neg_optab,
4147                                                 quotient, quotient, 0);
4148                       }
4149                   }
4150                 else if (size <= HOST_BITS_PER_WIDE_INT)
4151                   {
4152                     choose_multiplier (abs_d, size, size - 1,
4153                                        &mlr, &post_shift, &lgup);
4154                     ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
4155                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4156                       {
4157                         rtx t1, t2, t3;
4158
4159                         if (post_shift >= BITS_PER_WORD
4160                             || size - 1 >= BITS_PER_WORD)
4161                           goto fail1;
4162
4163                         extra_cost = (shift_cost[speed][compute_mode][post_shift]
4164                                       + shift_cost[speed][compute_mode][size - 1]
4165                                       + add_cost[speed][compute_mode]);
4166                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4167                                                    NULL_RTX, 0,
4168                                                    max_cost - extra_cost);
4169                         if (t1 == 0)
4170                           goto fail1;
4171                         t2 = expand_shift
4172                           (RSHIFT_EXPR, compute_mode, t1,
4173                            build_int_cst (NULL_TREE, post_shift),
4174                            NULL_RTX, 0);
4175                         t3 = expand_shift
4176                           (RSHIFT_EXPR, compute_mode, op0,
4177                            build_int_cst (NULL_TREE, size - 1),
4178                            NULL_RTX, 0);
4179                         if (d < 0)
4180                           quotient
4181                             = force_operand (gen_rtx_MINUS (compute_mode,
4182                                                             t3, t2),
4183                                              tquotient);
4184                         else
4185                           quotient
4186                             = force_operand (gen_rtx_MINUS (compute_mode,
4187                                                             t2, t3),
4188                                              tquotient);
4189                       }
4190                     else
4191                       {
4192                         rtx t1, t2, t3, t4;
4193
4194                         if (post_shift >= BITS_PER_WORD
4195                             || size - 1 >= BITS_PER_WORD)
4196                           goto fail1;
4197
4198                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4199                         mlr = gen_int_mode (ml, compute_mode);
4200                         extra_cost = (shift_cost[speed][compute_mode][post_shift]
4201                                       + shift_cost[speed][compute_mode][size - 1]
4202                                       + 2 * add_cost[speed][compute_mode]);
4203                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4204                                                    NULL_RTX, 0,
4205                                                    max_cost - extra_cost);
4206                         if (t1 == 0)
4207                           goto fail1;
4208                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4209                                                           t1, op0),
4210                                             NULL_RTX);
4211                         t3 = expand_shift
4212                           (RSHIFT_EXPR, compute_mode, t2,
4213                            build_int_cst (NULL_TREE, post_shift),
4214                            NULL_RTX, 0);
4215                         t4 = expand_shift
4216                           (RSHIFT_EXPR, compute_mode, op0,
4217                            build_int_cst (NULL_TREE, size - 1),
4218                            NULL_RTX, 0);
4219                         if (d < 0)
4220                           quotient
4221                             = force_operand (gen_rtx_MINUS (compute_mode,
4222                                                             t4, t3),
4223                                              tquotient);
4224                         else
4225                           quotient
4226                             = force_operand (gen_rtx_MINUS (compute_mode,
4227                                                             t3, t4),
4228                                              tquotient);
4229                       }
4230                   }
4231                 else            /* Too wide mode to use tricky code */
4232                   break;
4233
4234                 insn = get_last_insn ();
4235                 if (insn != last
4236                     && (set = single_set (insn)) != 0
4237                     && SET_DEST (set) == quotient)
4238                   set_unique_reg_note (insn,
4239                                        REG_EQUAL,
4240                                        gen_rtx_DIV (compute_mode, op0, op1));
4241               }
4242             break;
4243           }
4244       fail1:
4245         delete_insns_since (last);
4246         break;
4247
4248       case FLOOR_DIV_EXPR:
4249       case FLOOR_MOD_EXPR:
4250       /* We will come here only for signed operations.  */
4251         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4252           {
4253             unsigned HOST_WIDE_INT mh;
4254             int pre_shift, lgup, post_shift;
4255             HOST_WIDE_INT d = INTVAL (op1);
4256             rtx ml;
4257
4258             if (d > 0)
4259               {
4260                 /* We could just as easily deal with negative constants here,
4261                    but it does not seem worth the trouble for GCC 2.6.  */
4262                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4263                   {
4264                     pre_shift = floor_log2 (d);
4265                     if (rem_flag)
4266                       {
4267                         remainder = expand_binop (compute_mode, and_optab, op0,
4268                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4269                                                   remainder, 0, OPTAB_LIB_WIDEN);
4270                         if (remainder)
4271                           return gen_lowpart (mode, remainder);
4272                       }
4273                     quotient = expand_shift
4274                       (RSHIFT_EXPR, compute_mode, op0,
4275                        build_int_cst (NULL_TREE, pre_shift),
4276                        tquotient, 0);
4277                   }
4278                 else
4279                   {
4280                     rtx t1, t2, t3, t4;
4281
4282                     mh = choose_multiplier (d, size, size - 1,
4283                                             &ml, &post_shift, &lgup);
4284                     gcc_assert (!mh);
4285
4286                     if (post_shift < BITS_PER_WORD
4287                         && size - 1 < BITS_PER_WORD)
4288                       {
4289                         t1 = expand_shift
4290                           (RSHIFT_EXPR, compute_mode, op0,
4291                            build_int_cst (NULL_TREE, size - 1),
4292                            NULL_RTX, 0);
4293                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4294                                            NULL_RTX, 0, OPTAB_WIDEN);
4295                         extra_cost = (shift_cost[speed][compute_mode][post_shift]
4296                                       + shift_cost[speed][compute_mode][size - 1]
4297                                       + 2 * add_cost[speed][compute_mode]);
4298                         t3 = expand_mult_highpart (compute_mode, t2, ml,
4299                                                    NULL_RTX, 1,
4300                                                    max_cost - extra_cost);
4301                         if (t3 != 0)
4302                           {
4303                             t4 = expand_shift
4304                               (RSHIFT_EXPR, compute_mode, t3,
4305                                build_int_cst (NULL_TREE, post_shift),
4306                                NULL_RTX, 1);
4307                             quotient = expand_binop (compute_mode, xor_optab,
4308                                                      t4, t1, tquotient, 0,
4309                                                      OPTAB_WIDEN);
4310                           }
4311                       }
4312                   }
4313               }
4314             else
4315               {
4316                 rtx nsign, t1, t2, t3, t4;
4317                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4318                                                   op0, constm1_rtx), NULL_RTX);
4319                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4320                                    0, OPTAB_WIDEN);
4321                 nsign = expand_shift
4322                   (RSHIFT_EXPR, compute_mode, t2,
4323                    build_int_cst (NULL_TREE, size - 1),
4324                    NULL_RTX, 0);
4325                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4326                                     NULL_RTX);
4327                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4328                                     NULL_RTX, 0);
4329                 if (t4)
4330                   {
4331                     rtx t5;
4332                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4333                                       NULL_RTX, 0);
4334                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4335                                                             t4, t5),
4336                                               tquotient);
4337                   }
4338               }
4339           }
4340
4341         if (quotient != 0)
4342           break;
4343         delete_insns_since (last);
4344
4345         /* Try using an instruction that produces both the quotient and
4346            remainder, using truncation.  We can easily compensate the quotient
4347            or remainder to get floor rounding, once we have the remainder.
4348            Notice that we compute also the final remainder value here,
4349            and return the result right away.  */
4350         if (target == 0 || GET_MODE (target) != compute_mode)
4351           target = gen_reg_rtx (compute_mode);
4352
4353         if (rem_flag)
4354           {
4355             remainder
4356               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4357             quotient = gen_reg_rtx (compute_mode);
4358           }
4359         else
4360           {
4361             quotient
4362               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4363             remainder = gen_reg_rtx (compute_mode);
4364           }
4365
4366         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4367                                  quotient, remainder, 0))
4368           {
4369             /* This could be computed with a branch-less sequence.
4370                Save that for later.  */
4371             rtx tem;
4372             rtx label = gen_label_rtx ();
4373             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4374             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4375                                 NULL_RTX, 0, OPTAB_WIDEN);
4376             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4377             expand_dec (quotient, const1_rtx);
4378             expand_inc (remainder, op1);
4379             emit_label (label);
4380             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4381           }
4382
4383         /* No luck with division elimination or divmod.  Have to do it
4384            by conditionally adjusting op0 *and* the result.  */
4385         {
4386           rtx label1, label2, label3, label4, label5;
4387           rtx adjusted_op0;
4388           rtx tem;
4389
4390           quotient = gen_reg_rtx (compute_mode);
4391           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4392           label1 = gen_label_rtx ();
4393           label2 = gen_label_rtx ();
4394           label3 = gen_label_rtx ();
4395           label4 = gen_label_rtx ();
4396           label5 = gen_label_rtx ();
4397           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4398           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4399           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4400                               quotient, 0, OPTAB_LIB_WIDEN);
4401           if (tem != quotient)
4402             emit_move_insn (quotient, tem);
4403           emit_jump_insn (gen_jump (label5));
4404           emit_barrier ();
4405           emit_label (label1);
4406           expand_inc (adjusted_op0, const1_rtx);
4407           emit_jump_insn (gen_jump (label4));
4408           emit_barrier ();
4409           emit_label (label2);
4410           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4411           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4412                               quotient, 0, OPTAB_LIB_WIDEN);
4413           if (tem != quotient)
4414             emit_move_insn (quotient, tem);
4415           emit_jump_insn (gen_jump (label5));
4416           emit_barrier ();
4417           emit_label (label3);
4418           expand_dec (adjusted_op0, const1_rtx);
4419           emit_label (label4);
4420           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4421                               quotient, 0, OPTAB_LIB_WIDEN);
4422           if (tem != quotient)
4423             emit_move_insn (quotient, tem);
4424           expand_dec (quotient, const1_rtx);
4425           emit_label (label5);
4426         }
4427         break;
4428
4429       case CEIL_DIV_EXPR:
4430       case CEIL_MOD_EXPR:
4431         if (unsignedp)
4432           {
4433             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4434               {
4435                 rtx t1, t2, t3;
4436                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4437                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4438                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4439                                    tquotient, 1);
4440                 t2 = expand_binop (compute_mode, and_optab, op0,
4441                                    GEN_INT (d - 1),
4442                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4443                 t3 = gen_reg_rtx (compute_mode);
4444                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4445                                       compute_mode, 1, 1);
4446                 if (t3 == 0)
4447                   {
4448                     rtx lab;
4449                     lab = gen_label_rtx ();
4450                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4451                     expand_inc (t1, const1_rtx);
4452                     emit_label (lab);
4453                     quotient = t1;
4454                   }
4455                 else
4456                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4457                                                           t1, t3),
4458                                             tquotient);
4459                 break;
4460               }
4461
4462             /* Try using an instruction that produces both the quotient and
4463                remainder, using truncation.  We can easily compensate the
4464                quotient or remainder to get ceiling rounding, once we have the
4465                remainder.  Notice that we compute also the final remainder
4466                value here, and return the result right away.  */
4467             if (target == 0 || GET_MODE (target) != compute_mode)
4468               target = gen_reg_rtx (compute_mode);
4469
4470             if (rem_flag)
4471               {
4472                 remainder = (REG_P (target)
4473                              ? target : gen_reg_rtx (compute_mode));
4474                 quotient = gen_reg_rtx (compute_mode);
4475               }
4476             else
4477               {
4478                 quotient = (REG_P (target)
4479                             ? target : gen_reg_rtx (compute_mode));
4480                 remainder = gen_reg_rtx (compute_mode);
4481               }
4482
4483             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4484                                      remainder, 1))
4485               {
4486                 /* This could be computed with a branch-less sequence.
4487                    Save that for later.  */
4488                 rtx label = gen_label_rtx ();
4489                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4490                                  compute_mode, label);
4491                 expand_inc (quotient, const1_rtx);
4492                 expand_dec (remainder, op1);
4493                 emit_label (label);
4494                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4495               }
4496
4497             /* No luck with division elimination or divmod.  Have to do it
4498                by conditionally adjusting op0 *and* the result.  */
4499             {
4500               rtx label1, label2;
4501               rtx adjusted_op0, tem;
4502
4503               quotient = gen_reg_rtx (compute_mode);
4504               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4505               label1 = gen_label_rtx ();
4506               label2 = gen_label_rtx ();
4507               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4508                                compute_mode, label1);
4509               emit_move_insn  (quotient, const0_rtx);
4510               emit_jump_insn (gen_jump (label2));
4511               emit_barrier ();
4512               emit_label (label1);
4513               expand_dec (adjusted_op0, const1_rtx);
4514               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4515                                   quotient, 1, OPTAB_LIB_WIDEN);
4516               if (tem != quotient)
4517                 emit_move_insn (quotient, tem);
4518               expand_inc (quotient, const1_rtx);
4519               emit_label (label2);
4520             }
4521           }
4522         else /* signed */
4523           {
4524             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4525                 && INTVAL (op1) >= 0)
4526               {
4527                 /* This is extremely similar to the code for the unsigned case
4528                    above.  For 2.7 we should merge these variants, but for
4529                    2.6.1 I don't want to touch the code for unsigned since that
4530                    get used in C.  The signed case will only be used by other
4531                    languages (Ada).  */
4532
4533                 rtx t1, t2, t3;
4534                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4535                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4536                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4537                                    tquotient, 0);
4538                 t2 = expand_binop (compute_mode, and_optab, op0,
4539                                    GEN_INT (d - 1),
4540                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4541                 t3 = gen_reg_rtx (compute_mode);
4542                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4543                                       compute_mode, 1, 1);
4544                 if (t3 == 0)
4545                   {
4546                     rtx lab;
4547                     lab = gen_label_rtx ();
4548                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4549                     expand_inc (t1, const1_rtx);
4550                     emit_label (lab);
4551                     quotient = t1;
4552                   }
4553                 else
4554                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4555                                                           t1, t3),
4556                                             tquotient);
4557                 break;
4558               }
4559
4560             /* Try using an instruction that produces both the quotient and
4561                remainder, using truncation.  We can easily compensate the
4562                quotient or remainder to get ceiling rounding, once we have the
4563                remainder.  Notice that we compute also the final remainder
4564                value here, and return the result right away.  */
4565             if (target == 0 || GET_MODE (target) != compute_mode)
4566               target = gen_reg_rtx (compute_mode);
4567             if (rem_flag)
4568               {
4569                 remainder= (REG_P (target)
4570                             ? target : gen_reg_rtx (compute_mode));
4571                 quotient = gen_reg_rtx (compute_mode);
4572               }
4573             else
4574               {
4575                 quotient = (REG_P (target)
4576                             ? target : gen_reg_rtx (compute_mode));
4577                 remainder = gen_reg_rtx (compute_mode);
4578               }
4579
4580             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4581                                      remainder, 0))
4582               {
4583                 /* This could be computed with a branch-less sequence.
4584                    Save that for later.  */
4585                 rtx tem;
4586                 rtx label = gen_label_rtx ();
4587                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4588                                  compute_mode, label);
4589                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4590                                     NULL_RTX, 0, OPTAB_WIDEN);
4591                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4592                 expand_inc (quotient, const1_rtx);
4593                 expand_dec (remainder, op1);
4594                 emit_label (label);
4595                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4596               }
4597
4598             /* No luck with division elimination or divmod.  Have to do it
4599                by conditionally adjusting op0 *and* the result.  */
4600             {
4601               rtx label1, label2, label3, label4, label5;
4602               rtx adjusted_op0;
4603               rtx tem;
4604
4605               quotient = gen_reg_rtx (compute_mode);
4606               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4607               label1 = gen_label_rtx ();
4608               label2 = gen_label_rtx ();
4609               label3 = gen_label_rtx ();
4610               label4 = gen_label_rtx ();
4611               label5 = gen_label_rtx ();
4612               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4613               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4614                                compute_mode, label1);
4615               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4616                                   quotient, 0, OPTAB_LIB_WIDEN);
4617               if (tem != quotient)
4618                 emit_move_insn (quotient, tem);
4619               emit_jump_insn (gen_jump (label5));
4620               emit_barrier ();
4621               emit_label (label1);
4622               expand_dec (adjusted_op0, const1_rtx);
4623               emit_jump_insn (gen_jump (label4));
4624               emit_barrier ();
4625               emit_label (label2);
4626               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4627                                compute_mode, label3);
4628               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4629                                   quotient, 0, OPTAB_LIB_WIDEN);
4630               if (tem != quotient)
4631                 emit_move_insn (quotient, tem);
4632               emit_jump_insn (gen_jump (label5));
4633               emit_barrier ();
4634               emit_label (label3);
4635               expand_inc (adjusted_op0, const1_rtx);
4636               emit_label (label4);
4637               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4638                                   quotient, 0, OPTAB_LIB_WIDEN);
4639               if (tem != quotient)
4640                 emit_move_insn (quotient, tem);
4641               expand_inc (quotient, const1_rtx);
4642               emit_label (label5);
4643             }
4644           }
4645         break;
4646
4647       case EXACT_DIV_EXPR:
4648         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4649           {
4650             HOST_WIDE_INT d = INTVAL (op1);
4651             unsigned HOST_WIDE_INT ml;
4652             int pre_shift;
4653             rtx t1;
4654
4655             pre_shift = floor_log2 (d & -d);
4656             ml = invert_mod2n (d >> pre_shift, size);
4657             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4658                                build_int_cst (NULL_TREE, pre_shift),
4659                                NULL_RTX, unsignedp);
4660             quotient = expand_mult (compute_mode, t1,
4661                                     gen_int_mode (ml, compute_mode),
4662                                     NULL_RTX, 1);
4663
4664             insn = get_last_insn ();
4665             set_unique_reg_note (insn,
4666                                  REG_EQUAL,
4667                                  gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4668                                                  compute_mode,
4669                                                  op0, op1));
4670           }
4671         break;
4672
4673       case ROUND_DIV_EXPR:
4674       case ROUND_MOD_EXPR:
4675         if (unsignedp)
4676           {
4677             rtx tem;
4678             rtx label;
4679             label = gen_label_rtx ();
4680             quotient = gen_reg_rtx (compute_mode);
4681             remainder = gen_reg_rtx (compute_mode);
4682             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4683               {
4684                 rtx tem;
4685                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4686                                          quotient, 1, OPTAB_LIB_WIDEN);
4687                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4688                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4689                                           remainder, 1, OPTAB_LIB_WIDEN);
4690               }
4691             tem = plus_constant (op1, -1);
4692             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4693                                 integer_one_node, NULL_RTX, 1);
4694             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4695             expand_inc (quotient, const1_rtx);
4696             expand_dec (remainder, op1);
4697             emit_label (label);
4698           }
4699         else
4700           {
4701             rtx abs_rem, abs_op1, tem, mask;
4702             rtx label;
4703             label = gen_label_rtx ();
4704             quotient = gen_reg_rtx (compute_mode);
4705             remainder = gen_reg_rtx (compute_mode);
4706             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4707               {
4708                 rtx tem;
4709                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4710                                          quotient, 0, OPTAB_LIB_WIDEN);
4711                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4712                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4713                                           remainder, 0, OPTAB_LIB_WIDEN);
4714               }
4715             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4716             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4717             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4718                                 integer_one_node, NULL_RTX, 1);
4719             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4720             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4721                                 NULL_RTX, 0, OPTAB_WIDEN);
4722             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4723                                  build_int_cst (NULL_TREE, size - 1),
4724                                  NULL_RTX, 0);
4725             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4726                                 NULL_RTX, 0, OPTAB_WIDEN);
4727             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4728                                 NULL_RTX, 0, OPTAB_WIDEN);
4729             expand_inc (quotient, tem);
4730             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4731                                 NULL_RTX, 0, OPTAB_WIDEN);
4732             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4733                                 NULL_RTX, 0, OPTAB_WIDEN);
4734             expand_dec (remainder, tem);
4735             emit_label (label);
4736           }
4737         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4738
4739       default:
4740         gcc_unreachable ();
4741       }
4742
4743   if (quotient == 0)
4744     {
4745       if (target && GET_MODE (target) != compute_mode)
4746         target = 0;
4747
4748       if (rem_flag)
4749         {
4750           /* Try to produce the remainder without producing the quotient.
4751              If we seem to have a divmod pattern that does not require widening,
4752              don't try widening here.  We should really have a WIDEN argument
4753              to expand_twoval_binop, since what we'd really like to do here is
4754              1) try a mod insn in compute_mode
4755              2) try a divmod insn in compute_mode
4756              3) try a div insn in compute_mode and multiply-subtract to get
4757                 remainder
4758              4) try the same things with widening allowed.  */
4759           remainder
4760             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4761                                  op0, op1, target,
4762                                  unsignedp,
4763                                  ((optab_handler (optab2, compute_mode)
4764                                    != CODE_FOR_nothing)
4765                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4766           if (remainder == 0)
4767             {
4768               /* No luck there.  Can we do remainder and divide at once
4769                  without a library call?  */
4770               remainder = gen_reg_rtx (compute_mode);
4771               if (! expand_twoval_binop ((unsignedp
4772                                           ? udivmod_optab
4773                                           : sdivmod_optab),
4774                                          op0, op1,
4775                                          NULL_RTX, remainder, unsignedp))
4776                 remainder = 0;
4777             }
4778
4779           if (remainder)
4780             return gen_lowpart (mode, remainder);
4781         }
4782
4783       /* Produce the quotient.  Try a quotient insn, but not a library call.
4784          If we have a divmod in this mode, use it in preference to widening
4785          the div (for this test we assume it will not fail). Note that optab2
4786          is set to the one of the two optabs that the call below will use.  */
4787       quotient
4788         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4789                              op0, op1, rem_flag ? NULL_RTX : target,
4790                              unsignedp,
4791                              ((optab_handler (optab2, compute_mode)
4792                                != CODE_FOR_nothing)
4793                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4794
4795       if (quotient == 0)
4796         {
4797           /* No luck there.  Try a quotient-and-remainder insn,
4798              keeping the quotient alone.  */
4799           quotient = gen_reg_rtx (compute_mode);
4800           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4801                                      op0, op1,
4802                                      quotient, NULL_RTX, unsignedp))
4803             {
4804               quotient = 0;
4805               if (! rem_flag)
4806                 /* Still no luck.  If we are not computing the remainder,
4807                    use a library call for the quotient.  */
4808                 quotient = sign_expand_binop (compute_mode,
4809                                               udiv_optab, sdiv_optab,
4810                                               op0, op1, target,
4811                                               unsignedp, OPTAB_LIB_WIDEN);
4812             }
4813         }
4814     }
4815
4816   if (rem_flag)
4817     {
4818       if (target && GET_MODE (target) != compute_mode)
4819         target = 0;
4820
4821       if (quotient == 0)
4822         {
4823           /* No divide instruction either.  Use library for remainder.  */
4824           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4825                                          op0, op1, target,
4826                                          unsignedp, OPTAB_LIB_WIDEN);
4827           /* No remainder function.  Try a quotient-and-remainder
4828              function, keeping the remainder.  */
4829           if (!remainder)
4830             {
4831               remainder = gen_reg_rtx (compute_mode);
4832               if (!expand_twoval_binop_libfunc
4833                   (unsignedp ? udivmod_optab : sdivmod_optab,
4834                    op0, op1,
4835                    NULL_RTX, remainder,
4836                    unsignedp ? UMOD : MOD))
4837                 remainder = NULL_RTX;
4838             }
4839         }
4840       else
4841         {
4842           /* We divided.  Now finish doing X - Y * (X / Y).  */
4843           remainder = expand_mult (compute_mode, quotient, op1,
4844                                    NULL_RTX, unsignedp);
4845           remainder = expand_binop (compute_mode, sub_optab, op0,
4846                                     remainder, target, unsignedp,
4847                                     OPTAB_LIB_WIDEN);
4848         }
4849     }
4850
4851   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4852 }
4853 \f
4854 /* Return a tree node with data type TYPE, describing the value of X.
4855    Usually this is an VAR_DECL, if there is no obvious better choice.
4856    X may be an expression, however we only support those expressions
4857    generated by loop.c.  */
4858
4859 tree
4860 make_tree (tree type, rtx x)
4861 {
4862   tree t;
4863
4864   switch (GET_CODE (x))
4865     {
4866     case CONST_INT:
4867       {
4868         HOST_WIDE_INT hi = 0;
4869
4870         if (INTVAL (x) < 0
4871             && !(TYPE_UNSIGNED (type)
4872                  && (GET_MODE_BITSIZE (TYPE_MODE (type))
4873                      < HOST_BITS_PER_WIDE_INT)))
4874           hi = -1;
4875
4876         t = build_int_cst_wide (type, INTVAL (x), hi);
4877
4878         return t;
4879       }
4880
4881     case CONST_DOUBLE:
4882       if (GET_MODE (x) == VOIDmode)
4883         t = build_int_cst_wide (type,
4884                                 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4885       else
4886         {
4887           REAL_VALUE_TYPE d;
4888
4889           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4890           t = build_real (type, d);
4891         }
4892
4893       return t;
4894
4895     case CONST_VECTOR:
4896       {
4897         int units = CONST_VECTOR_NUNITS (x);
4898         tree itype = TREE_TYPE (type);
4899         tree t = NULL_TREE;
4900         int i;
4901
4902
4903         /* Build a tree with vector elements.  */
4904         for (i = units - 1; i >= 0; --i)
4905           {
4906             rtx elt = CONST_VECTOR_ELT (x, i);
4907             t = tree_cons (NULL_TREE, make_tree (itype, elt), t);
4908           }
4909
4910         return build_vector (type, t);
4911       }
4912
4913     case PLUS:
4914       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4915                           make_tree (type, XEXP (x, 1)));
4916
4917     case MINUS:
4918       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4919                           make_tree (type, XEXP (x, 1)));
4920
4921     case NEG:
4922       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
4923
4924     case MULT:
4925       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4926                           make_tree (type, XEXP (x, 1)));
4927
4928     case ASHIFT:
4929       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4930                           make_tree (type, XEXP (x, 1)));
4931
4932     case LSHIFTRT:
4933       t = unsigned_type_for (type);
4934       return fold_convert (type, build2 (RSHIFT_EXPR, t,
4935                                          make_tree (t, XEXP (x, 0)),
4936                                          make_tree (type, XEXP (x, 1))));
4937
4938     case ASHIFTRT:
4939       t = signed_type_for (type);
4940       return fold_convert (type, build2 (RSHIFT_EXPR, t,
4941                                          make_tree (t, XEXP (x, 0)),
4942                                          make_tree (type, XEXP (x, 1))));
4943
4944     case DIV:
4945       if (TREE_CODE (type) != REAL_TYPE)
4946         t = signed_type_for (type);
4947       else
4948         t = type;
4949
4950       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
4951                                          make_tree (t, XEXP (x, 0)),
4952                                          make_tree (t, XEXP (x, 1))));
4953     case UDIV:
4954       t = unsigned_type_for (type);
4955       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
4956                                          make_tree (t, XEXP (x, 0)),
4957                                          make_tree (t, XEXP (x, 1))));
4958
4959     case SIGN_EXTEND:
4960     case ZERO_EXTEND:
4961       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
4962                                           GET_CODE (x) == ZERO_EXTEND);
4963       return fold_convert (type, make_tree (t, XEXP (x, 0)));
4964
4965     case CONST:
4966       return make_tree (type, XEXP (x, 0));
4967
4968     case SYMBOL_REF:
4969       t = SYMBOL_REF_DECL (x);
4970       if (t)
4971         return fold_convert (type, build_fold_addr_expr (t));
4972       /* else fall through.  */
4973
4974     default:
4975       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
4976
4977       /* If TYPE is a POINTER_TYPE, we might need to convert X from
4978          address mode to pointer mode.  */
4979       if (POINTER_TYPE_P (type))
4980         x = convert_memory_address_addr_space
4981               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
4982
4983       /* Note that we do *not* use SET_DECL_RTL here, because we do not
4984          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
4985       t->decl_with_rtl.rtl = x;
4986
4987       return t;
4988     }
4989 }
4990 \f
4991 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
4992    and returning TARGET.
4993
4994    If TARGET is 0, a pseudo-register or constant is returned.  */
4995
4996 rtx
4997 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
4998 {
4999   rtx tem = 0;
5000
5001   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5002     tem = simplify_binary_operation (AND, mode, op0, op1);
5003   if (tem == 0)
5004     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5005
5006   if (target == 0)
5007     target = tem;
5008   else if (tem != target)
5009     emit_move_insn (target, tem);
5010   return target;
5011 }
5012
5013 /* Helper function for emit_store_flag.  */
5014 static rtx
5015 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5016              enum machine_mode mode, enum machine_mode compare_mode,
5017              int unsignedp, rtx x, rtx y, int normalizep,
5018              enum machine_mode target_mode)
5019 {
5020   struct expand_operand ops[4];
5021   rtx op0, last, comparison, subtarget;
5022   enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode;
5023
5024   last = get_last_insn ();
5025   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5026   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5027   if (!x || !y)
5028     {
5029       delete_insns_since (last);
5030       return NULL_RTX;
5031     }
5032
5033   if (target_mode == VOIDmode)
5034     target_mode = result_mode;
5035   if (!target)
5036     target = gen_reg_rtx (target_mode);
5037
5038   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5039
5040   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5041   create_fixed_operand (&ops[1], comparison);
5042   create_fixed_operand (&ops[2], x);
5043   create_fixed_operand (&ops[3], y);
5044   if (!maybe_expand_insn (icode, 4, ops))
5045     {
5046       delete_insns_since (last);
5047       return NULL_RTX;
5048     }
5049   subtarget = ops[0].value;
5050
5051   /* If we are converting to a wider mode, first convert to
5052      TARGET_MODE, then normalize.  This produces better combining
5053      opportunities on machines that have a SIGN_EXTRACT when we are
5054      testing a single bit.  This mostly benefits the 68k.
5055
5056      If STORE_FLAG_VALUE does not have the sign bit set when
5057      interpreted in MODE, we can do this conversion as unsigned, which
5058      is usually more efficient.  */
5059   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5060     {
5061       convert_move (target, subtarget,
5062                     (GET_MODE_BITSIZE (result_mode) <= HOST_BITS_PER_WIDE_INT)
5063                     && 0 == (STORE_FLAG_VALUE
5064                              & ((HOST_WIDE_INT) 1
5065                                 << (GET_MODE_BITSIZE (result_mode) -1))));
5066       op0 = target;
5067       result_mode = target_mode;
5068     }
5069   else
5070     op0 = subtarget;
5071
5072   /* If we want to keep subexpressions around, don't reuse our last
5073      target.  */
5074   if (optimize)
5075     subtarget = 0;
5076
5077   /* Now normalize to the proper value in MODE.  Sometimes we don't
5078      have to do anything.  */
5079   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5080     ;
5081   /* STORE_FLAG_VALUE might be the most negative number, so write
5082      the comparison this way to avoid a compiler-time warning.  */
5083   else if (- normalizep == STORE_FLAG_VALUE)
5084     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5085
5086   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5087      it hard to use a value of just the sign bit due to ANSI integer
5088      constant typing rules.  */
5089   else if (GET_MODE_BITSIZE (result_mode) <= HOST_BITS_PER_WIDE_INT
5090            && (STORE_FLAG_VALUE
5091                & ((HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (result_mode) - 1))))
5092     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5093                         size_int (GET_MODE_BITSIZE (result_mode) - 1), subtarget,
5094                         normalizep == 1);
5095   else
5096     {
5097       gcc_assert (STORE_FLAG_VALUE & 1);
5098
5099       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5100       if (normalizep == -1)
5101         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5102     }
5103
5104   /* If we were converting to a smaller mode, do the conversion now.  */
5105   if (target_mode != result_mode)
5106     {
5107       convert_move (target, op0, 0);
5108       return target;
5109     }
5110   else
5111     return op0;
5112 }
5113
5114
5115 /* A subroutine of emit_store_flag only including "tricks" that do not
5116    need a recursive call.  These are kept separate to avoid infinite
5117    loops.  */
5118
5119 static rtx
5120 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5121                    enum machine_mode mode, int unsignedp, int normalizep,
5122                    enum machine_mode target_mode)
5123 {
5124   rtx subtarget;
5125   enum insn_code icode;
5126   enum machine_mode compare_mode;
5127   enum mode_class mclass;
5128   enum rtx_code scode;
5129   rtx tem;
5130
5131   if (unsignedp)
5132     code = unsigned_condition (code);
5133   scode = swap_condition (code);
5134
5135   /* If one operand is constant, make it the second one.  Only do this
5136      if the other operand is not constant as well.  */
5137
5138   if (swap_commutative_operands_p (op0, op1))
5139     {
5140       tem = op0;
5141       op0 = op1;
5142       op1 = tem;
5143       code = swap_condition (code);
5144     }
5145
5146   if (mode == VOIDmode)
5147     mode = GET_MODE (op0);
5148
5149   /* For some comparisons with 1 and -1, we can convert this to
5150      comparisons with zero.  This will often produce more opportunities for
5151      store-flag insns.  */
5152
5153   switch (code)
5154     {
5155     case LT:
5156       if (op1 == const1_rtx)
5157         op1 = const0_rtx, code = LE;
5158       break;
5159     case LE:
5160       if (op1 == constm1_rtx)
5161         op1 = const0_rtx, code = LT;
5162       break;
5163     case GE:
5164       if (op1 == const1_rtx)
5165         op1 = const0_rtx, code = GT;
5166       break;
5167     case GT:
5168       if (op1 == constm1_rtx)
5169         op1 = const0_rtx, code = GE;
5170       break;
5171     case GEU:
5172       if (op1 == const1_rtx)
5173         op1 = const0_rtx, code = NE;
5174       break;
5175     case LTU:
5176       if (op1 == const1_rtx)
5177         op1 = const0_rtx, code = EQ;
5178       break;
5179     default:
5180       break;
5181     }
5182
5183   /* If we are comparing a double-word integer with zero or -1, we can
5184      convert the comparison into one involving a single word.  */
5185   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5186       && GET_MODE_CLASS (mode) == MODE_INT
5187       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5188     {
5189       if ((code == EQ || code == NE)
5190           && (op1 == const0_rtx || op1 == constm1_rtx))
5191         {
5192           rtx op00, op01;
5193
5194           /* Do a logical OR or AND of the two words and compare the
5195              result.  */
5196           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5197           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5198           tem = expand_binop (word_mode,
5199                               op1 == const0_rtx ? ior_optab : and_optab,
5200                               op00, op01, NULL_RTX, unsignedp,
5201                               OPTAB_DIRECT);
5202
5203           if (tem != 0)
5204             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5205                                    unsignedp, normalizep);
5206         }
5207       else if ((code == LT || code == GE) && op1 == const0_rtx)
5208         {
5209           rtx op0h;
5210
5211           /* If testing the sign bit, can just test on high word.  */
5212           op0h = simplify_gen_subreg (word_mode, op0, mode,
5213                                       subreg_highpart_offset (word_mode,
5214                                                               mode));
5215           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5216                                  unsignedp, normalizep);
5217         }
5218       else
5219         tem = NULL_RTX;
5220
5221       if (tem)
5222         {
5223           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5224             return tem;
5225           if (!target)
5226             target = gen_reg_rtx (target_mode);
5227
5228           convert_move (target, tem,
5229                         0 == ((normalizep ? normalizep : STORE_FLAG_VALUE)
5230                               & ((HOST_WIDE_INT) 1
5231                                  << (GET_MODE_BITSIZE (word_mode) -1))));
5232           return target;
5233         }
5234     }
5235
5236   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5237      complement of A (for GE) and shifting the sign bit to the low bit.  */
5238   if (op1 == const0_rtx && (code == LT || code == GE)
5239       && GET_MODE_CLASS (mode) == MODE_INT
5240       && (normalizep || STORE_FLAG_VALUE == 1
5241           || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5242               && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5243                   == ((unsigned HOST_WIDE_INT) 1
5244                       << (GET_MODE_BITSIZE (mode) - 1))))))
5245     {
5246       subtarget = target;
5247
5248       if (!target)
5249         target_mode = mode;
5250
5251       /* If the result is to be wider than OP0, it is best to convert it
5252          first.  If it is to be narrower, it is *incorrect* to convert it
5253          first.  */
5254       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5255         {
5256           op0 = convert_modes (target_mode, mode, op0, 0);
5257           mode = target_mode;
5258         }
5259
5260       if (target_mode != mode)
5261         subtarget = 0;
5262
5263       if (code == GE)
5264         op0 = expand_unop (mode, one_cmpl_optab, op0,
5265                            ((STORE_FLAG_VALUE == 1 || normalizep)
5266                             ? 0 : subtarget), 0);
5267
5268       if (STORE_FLAG_VALUE == 1 || normalizep)
5269         /* If we are supposed to produce a 0/1 value, we want to do
5270            a logical shift from the sign bit to the low-order bit; for
5271            a -1/0 value, we do an arithmetic shift.  */
5272         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5273                             size_int (GET_MODE_BITSIZE (mode) - 1),
5274                             subtarget, normalizep != -1);
5275
5276       if (mode != target_mode)
5277         op0 = convert_modes (target_mode, mode, op0, 0);
5278
5279       return op0;
5280     }
5281
5282   mclass = GET_MODE_CLASS (mode);
5283   for (compare_mode = mode; compare_mode != VOIDmode;
5284        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5285     {
5286      enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5287      icode = optab_handler (cstore_optab, optab_mode);
5288      if (icode != CODE_FOR_nothing)
5289         {
5290           do_pending_stack_adjust ();
5291           tem = emit_cstore (target, icode, code, mode, compare_mode,
5292                              unsignedp, op0, op1, normalizep, target_mode);
5293           if (tem)
5294             return tem;
5295
5296           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5297             {
5298               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5299                                  unsignedp, op1, op0, normalizep, target_mode);
5300               if (tem)
5301                 return tem;
5302             }
5303           break;
5304         }
5305     }
5306
5307   return 0;
5308 }
5309
5310 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5311    and storing in TARGET.  Normally return TARGET.
5312    Return 0 if that cannot be done.
5313
5314    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5315    it is VOIDmode, they cannot both be CONST_INT.
5316
5317    UNSIGNEDP is for the case where we have to widen the operands
5318    to perform the operation.  It says to use zero-extension.
5319
5320    NORMALIZEP is 1 if we should convert the result to be either zero
5321    or one.  Normalize is -1 if we should convert the result to be
5322    either zero or -1.  If NORMALIZEP is zero, the result will be left
5323    "raw" out of the scc insn.  */
5324
5325 rtx
5326 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5327                  enum machine_mode mode, int unsignedp, int normalizep)
5328 {
5329   enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5330   enum rtx_code rcode;
5331   rtx subtarget;
5332   rtx tem, last, trueval;
5333
5334   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5335                            target_mode);
5336   if (tem)
5337     return tem;
5338
5339   /* If we reached here, we can't do this with a scc insn, however there
5340      are some comparisons that can be done in other ways.  Don't do any
5341      of these cases if branches are very cheap.  */
5342   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5343     return 0;
5344
5345   /* See what we need to return.  We can only return a 1, -1, or the
5346      sign bit.  */
5347
5348   if (normalizep == 0)
5349     {
5350       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5351         normalizep = STORE_FLAG_VALUE;
5352
5353       else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5354                && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5355                    == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
5356         ;
5357       else
5358         return 0;
5359     }
5360
5361   last = get_last_insn ();
5362
5363   /* If optimizing, use different pseudo registers for each insn, instead
5364      of reusing the same pseudo.  This leads to better CSE, but slows
5365      down the compiler, since there are more pseudos */
5366   subtarget = (!optimize
5367                && (target_mode == mode)) ? target : NULL_RTX;
5368   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5369
5370   /* For floating-point comparisons, try the reverse comparison or try
5371      changing the "orderedness" of the comparison.  */
5372   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5373     {
5374       enum rtx_code first_code;
5375       bool and_them;
5376
5377       rcode = reverse_condition_maybe_unordered (code);
5378       if (can_compare_p (rcode, mode, ccp_store_flag)
5379           && (code == ORDERED || code == UNORDERED
5380               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5381               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5382         {
5383           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5384                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5385
5386           /* For the reverse comparison, use either an addition or a XOR.  */
5387           if (want_add
5388               && rtx_cost (GEN_INT (normalizep), PLUS,
5389                            optimize_insn_for_speed_p ()) == 0)
5390             {
5391               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5392                                        STORE_FLAG_VALUE, target_mode);
5393               if (tem)
5394                 return expand_binop (target_mode, add_optab, tem,
5395                                      GEN_INT (normalizep),
5396                                      target, 0, OPTAB_WIDEN);
5397             }
5398           else if (!want_add
5399                    && rtx_cost (trueval, XOR,
5400                                 optimize_insn_for_speed_p ()) == 0)
5401             {
5402               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5403                                        normalizep, target_mode);
5404               if (tem)
5405                 return expand_binop (target_mode, xor_optab, tem, trueval,
5406                                      target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5407             }
5408         }
5409
5410       delete_insns_since (last);
5411
5412       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5413       if (code == ORDERED || code == UNORDERED)
5414         return 0;
5415
5416       and_them = split_comparison (code, mode, &first_code, &code);
5417
5418       /* If there are no NaNs, the first comparison should always fall through.
5419          Effectively change the comparison to the other one.  */
5420       if (!HONOR_NANS (mode))
5421         {
5422           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5423           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5424                                     target_mode);
5425         }
5426
5427 #ifdef HAVE_conditional_move
5428       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5429          conditional move.  */
5430       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5431                                normalizep, target_mode);
5432       if (tem == 0)
5433         return 0;
5434
5435       if (and_them)
5436         tem = emit_conditional_move (target, code, op0, op1, mode,
5437                                      tem, const0_rtx, GET_MODE (tem), 0);
5438       else
5439         tem = emit_conditional_move (target, code, op0, op1, mode,
5440                                      trueval, tem, GET_MODE (tem), 0);
5441
5442       if (tem == 0)
5443         delete_insns_since (last);
5444       return tem;
5445 #else
5446       return 0;
5447 #endif
5448     }
5449
5450   /* The remaining tricks only apply to integer comparisons.  */
5451
5452   if (GET_MODE_CLASS (mode) != MODE_INT)
5453     return 0;
5454
5455   /* If this is an equality comparison of integers, we can try to exclusive-or
5456      (or subtract) the two operands and use a recursive call to try the
5457      comparison with zero.  Don't do any of these cases if branches are
5458      very cheap.  */
5459
5460   if ((code == EQ || code == NE) && op1 != const0_rtx)
5461     {
5462       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5463                           OPTAB_WIDEN);
5464
5465       if (tem == 0)
5466         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5467                             OPTAB_WIDEN);
5468       if (tem != 0)
5469         tem = emit_store_flag (target, code, tem, const0_rtx,
5470                                mode, unsignedp, normalizep);
5471       if (tem != 0)
5472         return tem;
5473
5474       delete_insns_since (last);
5475     }
5476
5477   /* For integer comparisons, try the reverse comparison.  However, for
5478      small X and if we'd have anyway to extend, implementing "X != 0"
5479      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5480   rcode = reverse_condition (code);
5481   if (can_compare_p (rcode, mode, ccp_store_flag)
5482       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5483             && code == NE
5484             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5485             && op1 == const0_rtx))
5486     {
5487       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5488                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5489
5490       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5491       if (want_add
5492           && rtx_cost (GEN_INT (normalizep), PLUS,
5493                        optimize_insn_for_speed_p ()) == 0)
5494         {
5495           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5496                                    STORE_FLAG_VALUE, target_mode);
5497           if (tem != 0)
5498             tem = expand_binop (target_mode, add_optab, tem,
5499                                 GEN_INT (normalizep), target, 0, OPTAB_WIDEN);
5500         }
5501       else if (!want_add
5502                && rtx_cost (trueval, XOR,
5503                             optimize_insn_for_speed_p ()) == 0)
5504         {
5505           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5506                                    normalizep, target_mode);
5507           if (tem != 0)
5508             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5509                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5510         }
5511
5512       if (tem != 0)
5513         return tem;
5514       delete_insns_since (last);
5515     }
5516
5517   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5518      the constant zero.  Reject all other comparisons at this point.  Only
5519      do LE and GT if branches are expensive since they are expensive on
5520      2-operand machines.  */
5521
5522   if (op1 != const0_rtx
5523       || (code != EQ && code != NE
5524           && (BRANCH_COST (optimize_insn_for_speed_p (),
5525                            false) <= 1 || (code != LE && code != GT))))
5526     return 0;
5527
5528   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5529      do the necessary operation below.  */
5530
5531   tem = 0;
5532
5533   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5534      the sign bit set.  */
5535
5536   if (code == LE)
5537     {
5538       /* This is destructive, so SUBTARGET can't be OP0.  */
5539       if (rtx_equal_p (subtarget, op0))
5540         subtarget = 0;
5541
5542       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5543                           OPTAB_WIDEN);
5544       if (tem)
5545         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5546                             OPTAB_WIDEN);
5547     }
5548
5549   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5550      number of bits in the mode of OP0, minus one.  */
5551
5552   if (code == GT)
5553     {
5554       if (rtx_equal_p (subtarget, op0))
5555         subtarget = 0;
5556
5557       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5558                           size_int (GET_MODE_BITSIZE (mode) - 1),
5559                           subtarget, 0);
5560       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5561                           OPTAB_WIDEN);
5562     }
5563
5564   if (code == EQ || code == NE)
5565     {
5566       /* For EQ or NE, one way to do the comparison is to apply an operation
5567          that converts the operand into a positive number if it is nonzero
5568          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5569          for NE we negate.  This puts the result in the sign bit.  Then we
5570          normalize with a shift, if needed.
5571
5572          Two operations that can do the above actions are ABS and FFS, so try
5573          them.  If that doesn't work, and MODE is smaller than a full word,
5574          we can use zero-extension to the wider mode (an unsigned conversion)
5575          as the operation.  */
5576
5577       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5578          that is compensated by the subsequent overflow when subtracting
5579          one / negating.  */
5580
5581       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5582         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5583       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5584         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5585       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5586         {
5587           tem = convert_modes (word_mode, mode, op0, 1);
5588           mode = word_mode;
5589         }
5590
5591       if (tem != 0)
5592         {
5593           if (code == EQ)
5594             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5595                                 0, OPTAB_WIDEN);
5596           else
5597             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5598         }
5599
5600       /* If we couldn't do it that way, for NE we can "or" the two's complement
5601          of the value with itself.  For EQ, we take the one's complement of
5602          that "or", which is an extra insn, so we only handle EQ if branches
5603          are expensive.  */
5604
5605       if (tem == 0
5606           && (code == NE
5607               || BRANCH_COST (optimize_insn_for_speed_p (),
5608                               false) > 1))
5609         {
5610           if (rtx_equal_p (subtarget, op0))
5611             subtarget = 0;
5612
5613           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5614           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5615                               OPTAB_WIDEN);
5616
5617           if (tem && code == EQ)
5618             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5619         }
5620     }
5621
5622   if (tem && normalizep)
5623     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5624                         size_int (GET_MODE_BITSIZE (mode) - 1),
5625                         subtarget, normalizep == 1);
5626
5627   if (tem)
5628     {
5629       if (!target)
5630         ;
5631       else if (GET_MODE (tem) != target_mode)
5632         {
5633           convert_move (target, tem, 0);
5634           tem = target;
5635         }
5636       else if (!subtarget)
5637         {
5638           emit_move_insn (target, tem);
5639           tem = target;
5640         }
5641     }
5642   else
5643     delete_insns_since (last);
5644
5645   return tem;
5646 }
5647
5648 /* Like emit_store_flag, but always succeeds.  */
5649
5650 rtx
5651 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5652                        enum machine_mode mode, int unsignedp, int normalizep)
5653 {
5654   rtx tem, label;
5655   rtx trueval, falseval;
5656
5657   /* First see if emit_store_flag can do the job.  */
5658   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5659   if (tem != 0)
5660     return tem;
5661
5662   if (!target)
5663     target = gen_reg_rtx (word_mode);
5664
5665   /* If this failed, we have to do this with set/compare/jump/set code.
5666      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5667   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5668   if (code == NE
5669       && GET_MODE_CLASS (mode) == MODE_INT
5670       && REG_P (target)
5671       && op0 == target
5672       && op1 == const0_rtx)
5673     {
5674       label = gen_label_rtx ();
5675       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5676                                mode, NULL_RTX, NULL_RTX, label, -1);
5677       emit_move_insn (target, trueval);
5678       emit_label (label);
5679       return target;
5680     }
5681
5682   if (!REG_P (target)
5683       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5684     target = gen_reg_rtx (GET_MODE (target));
5685
5686   /* Jump in the right direction if the target cannot implement CODE
5687      but can jump on its reverse condition.  */
5688   falseval = const0_rtx;
5689   if (! can_compare_p (code, mode, ccp_jump)
5690       && (! FLOAT_MODE_P (mode)
5691           || code == ORDERED || code == UNORDERED
5692           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5693           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5694     {
5695       enum rtx_code rcode;
5696       if (FLOAT_MODE_P (mode))
5697         rcode = reverse_condition_maybe_unordered (code);
5698       else
5699         rcode = reverse_condition (code);
5700
5701       /* Canonicalize to UNORDERED for the libcall.  */
5702       if (can_compare_p (rcode, mode, ccp_jump)
5703           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5704         {
5705           falseval = trueval;
5706           trueval = const0_rtx;
5707           code = rcode;
5708         }
5709     }
5710
5711   emit_move_insn (target, trueval);
5712   label = gen_label_rtx ();
5713   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5714                            NULL_RTX, label, -1);
5715
5716   emit_move_insn (target, falseval);
5717   emit_label (label);
5718
5719   return target;
5720 }
5721 \f
5722 /* Perform possibly multi-word comparison and conditional jump to LABEL
5723    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5724    now a thin wrapper around do_compare_rtx_and_jump.  */
5725
5726 static void
5727 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5728                  rtx label)
5729 {
5730   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5731   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5732                            NULL_RTX, NULL_RTX, label, -1);
5733 }