gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
   5    2011
   6    Free Software Foundation, Inc.
   7
   8 This file is part of GCC.
   9
  10 GCC is free software; you can redistribute it and/or modify it under
  11 the terms of the GNU General Public License as published by the Free
  12 Software Foundation; either version 3, or (at your option) any later
  13 version.
  14
  15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  17 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  18 for more details.
  19
  20 You should have received a copy of the GNU General Public License
  21 along with GCC; see the file COPYING3.  If not see
  22 <http://www.gnu.org/licenses/>.  */
  23
  24
  25 #include "config.h"
  26 #include "system.h"
  27 #include "coretypes.h"
  28 #include "tm.h"
  29 #include "diagnostic-core.h"
  30 #include "rtl.h"
  31 #include "tree.h"
  32 #include "tm_p.h"
  33 #include "flags.h"
  34 #include "insn-config.h"
  35 #include "expr.h"
  36 #include "optabs.h"
  37 #include "recog.h"
  38 #include "langhooks.h"
  39 #include "df.h"
  40 #include "target.h"
  41 #include "expmed.h"
  42
  43 struct target_expmed default_target_expmed;
  44 #if SWITCHABLE_TARGET
  45 struct target_expmed *this_target_expmed = &default_target_expmed;
  46 #endif
  47
  48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  49                                    unsigned HOST_WIDE_INT,
  50                                    unsigned HOST_WIDE_INT,
  51                                    unsigned HOST_WIDE_INT,
  52                                    unsigned HOST_WIDE_INT,
  53                                    rtx);
  54 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  55                                    unsigned HOST_WIDE_INT,
  56                                    unsigned HOST_WIDE_INT,
  57                                    unsigned HOST_WIDE_INT,
  58                                    rtx);
  59 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  60                                     unsigned HOST_WIDE_INT,
  61                                     unsigned HOST_WIDE_INT,
  62                                     unsigned HOST_WIDE_INT, rtx, int, bool);
  63 static rtx mask_rtx (enum machine_mode, int, int, int);
  64 static rtx lshift_value (enum machine_mode, rtx, int, int);
  65 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  66                                     unsigned HOST_WIDE_INT, int);
  67 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  68 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  69 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  70
  71 /* Test whether a value is zero of a power of two.  */
  72 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
  73
  74 #ifndef SLOW_UNALIGNED_ACCESS
  75 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  76 #endif
  77
  78
  79 /* Reduce conditional compilation elsewhere.  */
  80 #ifndef HAVE_insv
  81 #define HAVE_insv       0
  82 #define CODE_FOR_insv   CODE_FOR_nothing
  83 #define gen_insv(a,b,c,d) NULL_RTX
  84 #endif
  85 #ifndef HAVE_extv
  86 #define HAVE_extv       0
  87 #define CODE_FOR_extv   CODE_FOR_nothing
  88 #define gen_extv(a,b,c,d) NULL_RTX
  89 #endif
  90 #ifndef HAVE_extzv
  91 #define HAVE_extzv      0
  92 #define CODE_FOR_extzv  CODE_FOR_nothing
  93 #define gen_extzv(a,b,c,d) NULL_RTX
  94 #endif
  95
  96 void
  97 init_expmed (void)
  98 {
  99   struct
 100   {
 101     struct rtx_def reg;         rtunion reg_fld[2];
 102     struct rtx_def plus;        rtunion plus_fld1;
 103     struct rtx_def neg;
 104     struct rtx_def mult;        rtunion mult_fld1;
 105     struct rtx_def sdiv;        rtunion sdiv_fld1;
 106     struct rtx_def udiv;        rtunion udiv_fld1;
 107     struct rtx_def zext;
 108     struct rtx_def sdiv_32;     rtunion sdiv_32_fld1;
 109     struct rtx_def smod_32;     rtunion smod_32_fld1;
 110     struct rtx_def wide_mult;   rtunion wide_mult_fld1;
 111     struct rtx_def wide_lshr;   rtunion wide_lshr_fld1;
 112     struct rtx_def wide_trunc;
 113     struct rtx_def shift;       rtunion shift_fld1;
 114     struct rtx_def shift_mult;  rtunion shift_mult_fld1;
 115     struct rtx_def shift_add;   rtunion shift_add_fld1;
 116     struct rtx_def shift_sub0;  rtunion shift_sub0_fld1;
 117     struct rtx_def shift_sub1;  rtunion shift_sub1_fld1;
 118   } all;
 119
 120   rtx pow2[MAX_BITS_PER_WORD];
 121   rtx cint[MAX_BITS_PER_WORD];
 122   int m, n;
 123   enum machine_mode mode, wider_mode;
 124   int speed;
 125
 126
 127   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 128     {
 129       pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 130       cint[m] = GEN_INT (m);
 131     }
 132   memset (&all, 0, sizeof all);
 133
 134   PUT_CODE (&all.reg, REG);
 135   /* Avoid using hard regs in ways which may be unsupported.  */
 136   SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
 137
 138   PUT_CODE (&all.plus, PLUS);
 139   XEXP (&all.plus, 0) = &all.reg;
 140   XEXP (&all.plus, 1) = &all.reg;
 141
 142   PUT_CODE (&all.neg, NEG);
 143   XEXP (&all.neg, 0) = &all.reg;
 144
 145   PUT_CODE (&all.mult, MULT);
 146   XEXP (&all.mult, 0) = &all.reg;
 147   XEXP (&all.mult, 1) = &all.reg;
 148
 149   PUT_CODE (&all.sdiv, DIV);
 150   XEXP (&all.sdiv, 0) = &all.reg;
 151   XEXP (&all.sdiv, 1) = &all.reg;
 152
 153   PUT_CODE (&all.udiv, UDIV);
 154   XEXP (&all.udiv, 0) = &all.reg;
 155   XEXP (&all.udiv, 1) = &all.reg;
 156
 157   PUT_CODE (&all.sdiv_32, DIV);
 158   XEXP (&all.sdiv_32, 0) = &all.reg;
 159   XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
 160
 161   PUT_CODE (&all.smod_32, MOD);
 162   XEXP (&all.smod_32, 0) = &all.reg;
 163   XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
 164
 165   PUT_CODE (&all.zext, ZERO_EXTEND);
 166   XEXP (&all.zext, 0) = &all.reg;
 167
 168   PUT_CODE (&all.wide_mult, MULT);
 169   XEXP (&all.wide_mult, 0) = &all.zext;
 170   XEXP (&all.wide_mult, 1) = &all.zext;
 171
 172   PUT_CODE (&all.wide_lshr, LSHIFTRT);
 173   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
 174
 175   PUT_CODE (&all.wide_trunc, TRUNCATE);
 176   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
 177
 178   PUT_CODE (&all.shift, ASHIFT);
 179   XEXP (&all.shift, 0) = &all.reg;
 180
 181   PUT_CODE (&all.shift_mult, MULT);
 182   XEXP (&all.shift_mult, 0) = &all.reg;
 183
 184   PUT_CODE (&all.shift_add, PLUS);
 185   XEXP (&all.shift_add, 0) = &all.shift_mult;
 186   XEXP (&all.shift_add, 1) = &all.reg;
 187
 188   PUT_CODE (&all.shift_sub0, MINUS);
 189   XEXP (&all.shift_sub0, 0) = &all.shift_mult;
 190   XEXP (&all.shift_sub0, 1) = &all.reg;
 191
 192   PUT_CODE (&all.shift_sub1, MINUS);
 193   XEXP (&all.shift_sub1, 0) = &all.reg;
 194   XEXP (&all.shift_sub1, 1) = &all.shift_mult;
 195
 196   for (speed = 0; speed < 2; speed++)
 197     {
 198       crtl->maybe_hot_insn_p = speed;
 199       zero_cost[speed] = set_src_cost (const0_rtx, speed);
 200
 201       for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 202            mode != VOIDmode;
 203            mode = GET_MODE_WIDER_MODE (mode))
 204         {
 205           PUT_MODE (&all.reg, mode);
 206           PUT_MODE (&all.plus, mode);
 207           PUT_MODE (&all.neg, mode);
 208           PUT_MODE (&all.mult, mode);
 209           PUT_MODE (&all.sdiv, mode);
 210           PUT_MODE (&all.udiv, mode);
 211           PUT_MODE (&all.sdiv_32, mode);
 212           PUT_MODE (&all.smod_32, mode);
 213           PUT_MODE (&all.wide_trunc, mode);
 214           PUT_MODE (&all.shift, mode);
 215           PUT_MODE (&all.shift_mult, mode);
 216           PUT_MODE (&all.shift_add, mode);
 217           PUT_MODE (&all.shift_sub0, mode);
 218           PUT_MODE (&all.shift_sub1, mode);
 219
 220           add_cost[speed][mode] = set_src_cost (&all.plus, speed);
 221           neg_cost[speed][mode] = set_src_cost (&all.neg, speed);
 222           mul_cost[speed][mode] = set_src_cost (&all.mult, speed);
 223           sdiv_cost[speed][mode] = set_src_cost (&all.sdiv, speed);
 224           udiv_cost[speed][mode] = set_src_cost (&all.udiv, speed);
 225
 226           sdiv_pow2_cheap[speed][mode] = (set_src_cost (&all.sdiv_32, speed)
 227                                           <= 2 * add_cost[speed][mode]);
 228           smod_pow2_cheap[speed][mode] = (set_src_cost (&all.smod_32, speed)
 229                                           <= 4 * add_cost[speed][mode]);
 230
 231           wider_mode = GET_MODE_WIDER_MODE (mode);
 232           if (wider_mode != VOIDmode)
 233             {
 234               PUT_MODE (&all.zext, wider_mode);
 235               PUT_MODE (&all.wide_mult, wider_mode);
 236               PUT_MODE (&all.wide_lshr, wider_mode);
 237               XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
 238
 239               mul_widen_cost[speed][wider_mode]
 240                 = set_src_cost (&all.wide_mult, speed);
 241               mul_highpart_cost[speed][mode]
 242                 = set_src_cost (&all.wide_trunc, speed);
 243             }
 244
 245           shift_cost[speed][mode][0] = 0;
 246           shiftadd_cost[speed][mode][0] = shiftsub0_cost[speed][mode][0]
 247             = shiftsub1_cost[speed][mode][0] = add_cost[speed][mode];
 248
 249           n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
 250           for (m = 1; m < n; m++)
 251             {
 252               XEXP (&all.shift, 1) = cint[m];
 253               XEXP (&all.shift_mult, 1) = pow2[m];
 254
 255               shift_cost[speed][mode][m] = set_src_cost (&all.shift, speed);
 256               shiftadd_cost[speed][mode][m] = set_src_cost (&all.shift_add,
 257                                                             speed);
 258               shiftsub0_cost[speed][mode][m] = set_src_cost (&all.shift_sub0,
 259                                                              speed);
 260               shiftsub1_cost[speed][mode][m] = set_src_cost (&all.shift_sub1,
 261                                                              speed);
 262             }
 263         }
 264     }
 265   if (alg_hash_used_p)
 266     memset (alg_hash, 0, sizeof (alg_hash));
 267   else
 268     alg_hash_used_p = true;
 269   default_rtl_profile ();
 270 }
 271
 272 /* Return an rtx representing minus the value of X.
 273    MODE is the intended mode of the result,
 274    useful if X is a CONST_INT.  */
 275
 276 rtx
 277 negate_rtx (enum machine_mode mode, rtx x)
 278 {
 279   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 280
 281   if (result == 0)
 282     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 283
 284   return result;
 285 }
 286
 287 /* Report on the availability of insv/extv/extzv and the desired mode
 288    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
 289    is false; else the mode of the specified operand.  If OPNO is -1,
 290    all the caller cares about is whether the insn is available.  */
 291 enum machine_mode
 292 mode_for_extraction (enum extraction_pattern pattern, int opno)
 293 {
 294   const struct insn_data_d *data;
 295
 296   switch (pattern)
 297     {
 298     case EP_insv:
 299       if (HAVE_insv)
 300         {
 301           data = &insn_data[CODE_FOR_insv];
 302           break;
 303         }
 304       return MAX_MACHINE_MODE;
 305
 306     case EP_extv:
 307       if (HAVE_extv)
 308         {
 309           data = &insn_data[CODE_FOR_extv];
 310           break;
 311         }
 312       return MAX_MACHINE_MODE;
 313
 314     case EP_extzv:
 315       if (HAVE_extzv)
 316         {
 317           data = &insn_data[CODE_FOR_extzv];
 318           break;
 319         }
 320       return MAX_MACHINE_MODE;
 321
 322     default:
 323       gcc_unreachable ();
 324     }
 325
 326   if (opno == -1)
 327     return VOIDmode;
 328
 329   /* Everyone who uses this function used to follow it with
 330      if (result == VOIDmode) result = word_mode; */
 331   if (data->operand[opno].mode == VOIDmode)
 332     return word_mode;
 333   return data->operand[opno].mode;
 334 }
 335 \f
 336 /* A subroutine of store_bit_field, with the same arguments.  Return true
 337    if the operation could be implemented.
 338
 339    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 340    no other way of implementing the operation.  If FALLBACK_P is false,
 341    return false instead.  */
 342
 343 static bool
 344 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 345                    unsigned HOST_WIDE_INT bitnum,
 346                    unsigned HOST_WIDE_INT bitregion_start,
 347                    unsigned HOST_WIDE_INT bitregion_end,
 348                    enum machine_mode fieldmode,
 349                    rtx value, bool fallback_p)
 350 {
 351   unsigned int unit
 352     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
 353   unsigned HOST_WIDE_INT offset, bitpos;
 354   rtx op0 = str_rtx;
 355   int byte_offset;
 356   rtx orig_value;
 357
 358   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
 359
 360   while (GET_CODE (op0) == SUBREG)
 361     {
 362       /* The following line once was done only if WORDS_BIG_ENDIAN,
 363          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 364          meaningful at a much higher level; when structures are copied
 365          between memory and regs, the higher-numbered regs
 366          always get higher addresses.  */
 367       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 368       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 369
 370       byte_offset = 0;
 371
 372       /* Paradoxical subregs need special handling on big endian machines.  */
 373       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 374         {
 375           int difference = inner_mode_size - outer_mode_size;
 376
 377           if (WORDS_BIG_ENDIAN)
 378             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 379           if (BYTES_BIG_ENDIAN)
 380             byte_offset += difference % UNITS_PER_WORD;
 381         }
 382       else
 383         byte_offset = SUBREG_BYTE (op0);
 384
 385       bitnum += byte_offset * BITS_PER_UNIT;
 386       op0 = SUBREG_REG (op0);
 387     }
 388
 389   /* No action is needed if the target is a register and if the field
 390      lies completely outside that register.  This can occur if the source
 391      code contains an out-of-bounds access to a small array.  */
 392   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 393     return true;
 394
 395   /* Use vec_set patterns for inserting parts of vectors whenever
 396      available.  */
 397   if (VECTOR_MODE_P (GET_MODE (op0))
 398       && !MEM_P (op0)
 399       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
 400       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 401       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 402       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 403     {
 404       struct expand_operand ops[3];
 405       enum machine_mode outermode = GET_MODE (op0);
 406       enum machine_mode innermode = GET_MODE_INNER (outermode);
 407       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 408       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 409
 410       create_fixed_operand (&ops[0], op0);
 411       create_input_operand (&ops[1], value, innermode);
 412       create_integer_operand (&ops[2], pos);
 413       if (maybe_expand_insn (icode, 3, ops))
 414         return true;
 415     }
 416
 417   /* If the target is a register, overwriting the entire object, or storing
 418      a full-word or multi-word field can be done with just a SUBREG.
 419
 420      If the target is memory, storing any naturally aligned field can be
 421      done with a simple store.  For targets that support fast unaligned
 422      memory, any naturally sized, unit aligned field can be done directly.  */
 423
 424   offset = bitnum / unit;
 425   bitpos = bitnum % unit;
 426   byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 427                 + (offset * UNITS_PER_WORD);
 428
 429   if (bitpos == 0
 430       && bitsize == GET_MODE_BITSIZE (fieldmode)
 431       && (!MEM_P (op0)
 432           ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 433               || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
 434              && ((GET_MODE (op0) == fieldmode && byte_offset == 0)
 435                  || validate_subreg (fieldmode, GET_MODE (op0), op0,
 436                                      byte_offset)))
 437           : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 438              || (offset * BITS_PER_UNIT % bitsize == 0
 439                  && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
 440     {
 441       if (MEM_P (op0))
 442         op0 = adjust_address (op0, fieldmode, offset);
 443       else if (GET_MODE (op0) != fieldmode)
 444         op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 445                                    byte_offset);
 446       emit_move_insn (op0, value);
 447       return true;
 448     }
 449
 450   /* Make sure we are playing with integral modes.  Pun with subregs
 451      if we aren't.  This must come after the entire register case above,
 452      since that case is valid for any mode.  The following cases are only
 453      valid for integral modes.  */
 454   {
 455     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 456     if (imode != GET_MODE (op0))
 457       {
 458         if (MEM_P (op0))
 459           op0 = adjust_address (op0, imode, 0);
 460         else
 461           {
 462             gcc_assert (imode != BLKmode);
 463             op0 = gen_lowpart (imode, op0);
 464           }
 465       }
 466   }
 467
 468   /* We may be accessing data outside the field, which means
 469      we can alias adjacent data.  */
 470   /* ?? not always for C++0x memory model ?? */
 471   if (MEM_P (op0))
 472     {
 473       op0 = shallow_copy_rtx (op0);
 474       set_mem_alias_set (op0, 0);
 475       set_mem_expr (op0, 0);
 476     }
 477
 478   /* If OP0 is a register, BITPOS must count within a word.
 479      But as we have it, it counts within whatever size OP0 now has.
 480      On a bigendian machine, these are not the same, so convert.  */
 481   if (BYTES_BIG_ENDIAN
 482       && !MEM_P (op0)
 483       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 484     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 485
 486   /* Storing an lsb-aligned field in a register
 487      can be done with a movestrict instruction.  */
 488
 489   if (!MEM_P (op0)
 490       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 491       && bitsize == GET_MODE_BITSIZE (fieldmode)
 492       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 493     {
 494       struct expand_operand ops[2];
 495       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 496       rtx arg0 = op0;
 497       unsigned HOST_WIDE_INT subreg_off;
 498
 499       if (GET_CODE (arg0) == SUBREG)
 500         {
 501           /* Else we've got some float mode source being extracted into
 502              a different float mode destination -- this combination of
 503              subregs results in Severe Tire Damage.  */
 504           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 505                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 506                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 507           arg0 = SUBREG_REG (arg0);
 508         }
 509
 510       subreg_off = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 511                    + (offset * UNITS_PER_WORD);
 512       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 513         {
 514           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 515
 516           create_fixed_operand (&ops[0], arg0);
 517           /* Shrink the source operand to FIELDMODE.  */
 518           create_convert_operand_to (&ops[1], value, fieldmode, false);
 519           if (maybe_expand_insn (icode, 2, ops))
 520             return true;
 521         }
 522     }
 523
 524   /* Handle fields bigger than a word.  */
 525
 526   if (bitsize > BITS_PER_WORD)
 527     {
 528       /* Here we transfer the words of the field
 529          in the order least significant first.
 530          This is because the most significant word is the one which may
 531          be less than full.
 532          However, only do that if the value is not BLKmode.  */
 533
 534       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 535       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 536       unsigned int i;
 537       rtx last;
 538
 539       /* This is the mode we must force value to, so that there will be enough
 540          subwords to extract.  Note that fieldmode will often (always?) be
 541          VOIDmode, because that is what store_field uses to indicate that this
 542          is a bit field, but passing VOIDmode to operand_subword_force
 543          is not allowed.  */
 544       fieldmode = GET_MODE (value);
 545       if (fieldmode == VOIDmode)
 546         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 547
 548       last = get_last_insn ();
 549       for (i = 0; i < nwords; i++)
 550         {
 551           /* If I is 0, use the low-order word in both field and target;
 552              if I is 1, use the next to lowest word; and so on.  */
 553           unsigned int wordnum = (backwards ? nwords - i - 1 : i);
 554           unsigned int bit_offset = (backwards
 555                                      ? MAX ((int) bitsize - ((int) i + 1)
 556                                             * BITS_PER_WORD,
 557                                             0)
 558                                      : (int) i * BITS_PER_WORD);
 559           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 560
 561           if (!store_bit_field_1 (op0, MIN (BITS_PER_WORD,
 562                                             bitsize - i * BITS_PER_WORD),
 563                                   bitnum + bit_offset,
 564                                   bitregion_start, bitregion_end,
 565                                   word_mode,
 566                                   value_word, fallback_p))
 567             {
 568               delete_insns_since (last);
 569               return false;
 570             }
 571         }
 572       return true;
 573     }
 574
 575   /* From here on we can assume that the field to be stored in is
 576      a full-word (whatever type that is), since it is shorter than a word.  */
 577
 578   /* OFFSET is the number of words or bytes (UNIT says which)
 579      from STR_RTX to the first word or byte containing part of the field.  */
 580
 581   if (!MEM_P (op0))
 582     {
 583       if (offset != 0
 584           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 585         {
 586           if (!REG_P (op0))
 587             {
 588               /* Since this is a destination (lvalue), we can't copy
 589                  it to a pseudo.  We can remove a SUBREG that does not
 590                  change the size of the operand.  Such a SUBREG may
 591                  have been added above.  */
 592               gcc_assert (GET_CODE (op0) == SUBREG
 593                           && (GET_MODE_SIZE (GET_MODE (op0))
 594                               == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
 595               op0 = SUBREG_REG (op0);
 596             }
 597           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 598                                 op0, (offset * UNITS_PER_WORD));
 599         }
 600       offset = 0;
 601     }
 602
 603   /* If VALUE has a floating-point or complex mode, access it as an
 604      integer of the corresponding size.  This can occur on a machine
 605      with 64 bit registers that uses SFmode for float.  It can also
 606      occur for unaligned float or complex fields.  */
 607   orig_value = value;
 608   if (GET_MODE (value) != VOIDmode
 609       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 610       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 611     {
 612       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 613       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 614     }
 615
 616   /* Now OFFSET is nonzero only if OP0 is memory
 617      and is therefore always measured in bytes.  */
 618
 619   if (HAVE_insv
 620       && GET_MODE (value) != BLKmode
 621       && bitsize > 0
 622       && GET_MODE_BITSIZE (op_mode) >= bitsize
 623       && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
 624             && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))))
 625     {
 626       struct expand_operand ops[4];
 627       int xbitpos = bitpos;
 628       rtx value1;
 629       rtx xop0 = op0;
 630       rtx last = get_last_insn ();
 631       bool copy_back = false;
 632
 633       /* Add OFFSET into OP0's address.  */
 634       if (MEM_P (xop0))
 635         xop0 = adjust_address (xop0, byte_mode, offset);
 636
 637       /* If xop0 is a register, we need it in OP_MODE
 638          to make it acceptable to the format of insv.  */
 639       if (GET_CODE (xop0) == SUBREG)
 640         /* We can't just change the mode, because this might clobber op0,
 641            and we will need the original value of op0 if insv fails.  */
 642         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 643       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 644         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 645
 646       /* If the destination is a paradoxical subreg such that we need a
 647          truncate to the inner mode, perform the insertion on a temporary and
 648          truncate the result to the original destination.  Note that we can't
 649          just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 650          X) 0)) is (reg:N X).  */
 651       if (GET_CODE (xop0) == SUBREG
 652           && REG_P (SUBREG_REG (xop0))
 653           && (!TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 654                                               op_mode)))
 655         {
 656           rtx tem = gen_reg_rtx (op_mode);
 657           emit_move_insn (tem, xop0);
 658           xop0 = tem;
 659           copy_back = true;
 660         }
 661
 662       /* On big-endian machines, we count bits from the most significant.
 663          If the bit field insn does not, we must invert.  */
 664
 665       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 666         xbitpos = unit - bitsize - xbitpos;
 667
 668       /* We have been counting XBITPOS within UNIT.
 669          Count instead within the size of the register.  */
 670       if (BITS_BIG_ENDIAN && !MEM_P (xop0))
 671         xbitpos += GET_MODE_BITSIZE (op_mode) - unit;
 672
 673       unit = GET_MODE_BITSIZE (op_mode);
 674
 675       /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 676       value1 = value;
 677       if (GET_MODE (value) != op_mode)
 678         {
 679           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 680             {
 681               /* Optimization: Don't bother really extending VALUE
 682                  if it has all the bits we will actually use.  However,
 683                  if we must narrow it, be sure we do it correctly.  */
 684
 685               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 686                 {
 687                   rtx tmp;
 688
 689                   tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 690                   if (! tmp)
 691                     tmp = simplify_gen_subreg (op_mode,
 692                                                force_reg (GET_MODE (value),
 693                                                           value1),
 694                                                GET_MODE (value), 0);
 695                   value1 = tmp;
 696                 }
 697               else
 698                 value1 = gen_lowpart (op_mode, value1);
 699             }
 700           else if (CONST_INT_P (value))
 701             value1 = gen_int_mode (INTVAL (value), op_mode);
 702           else
 703             /* Parse phase is supposed to make VALUE's data type
 704                match that of the component reference, which is a type
 705                at least as wide as the field; so VALUE should have
 706                a mode that corresponds to that type.  */
 707             gcc_assert (CONSTANT_P (value));
 708         }
 709
 710       create_fixed_operand (&ops[0], xop0);
 711       create_integer_operand (&ops[1], bitsize);
 712       create_integer_operand (&ops[2], xbitpos);
 713       create_input_operand (&ops[3], value1, op_mode);
 714       if (maybe_expand_insn (CODE_FOR_insv, 4, ops))
 715         {
 716           if (copy_back)
 717             convert_move (op0, xop0, true);
 718           return true;
 719         }
 720       delete_insns_since (last);
 721     }
 722
 723   /* If OP0 is a memory, try copying it to a register and seeing if a
 724      cheap register alternative is available.  */
 725   if (HAVE_insv && MEM_P (op0))
 726     {
 727       enum machine_mode bestmode;
 728       unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
 729
 730       if (bitregion_end)
 731         maxbits = bitregion_end - bitregion_start + 1;
 732
 733       /* Get the mode to use for inserting into this field.  If OP0 is
 734          BLKmode, get the smallest mode consistent with the alignment. If
 735          OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
 736          mode. Otherwise, use the smallest mode containing the field.  */
 737
 738       if (GET_MODE (op0) == BLKmode
 739           || GET_MODE_BITSIZE (GET_MODE (op0)) > maxbits
 740           || (op_mode != MAX_MACHINE_MODE
 741               && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode)))
 742         bestmode = get_best_mode  (bitsize, bitnum,
 743                                   bitregion_start, bitregion_end,
 744                                   MEM_ALIGN (op0),
 745                                   (op_mode == MAX_MACHINE_MODE
 746                                    ? VOIDmode : op_mode),
 747                                   MEM_VOLATILE_P (op0));
 748       else
 749         bestmode = GET_MODE (op0);
 750
 751       if (bestmode != VOIDmode
 752           && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode)
 753           && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
 754                && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
 755         {
 756           rtx last, tempreg, xop0;
 757           unsigned HOST_WIDE_INT xoffset, xbitpos;
 758
 759           last = get_last_insn ();
 760
 761           /* Adjust address to point to the containing unit of
 762              that mode.  Compute the offset as a multiple of this unit,
 763              counting in bytes.  */
 764           unit = GET_MODE_BITSIZE (bestmode);
 765           xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 766           xbitpos = bitnum % unit;
 767           xop0 = adjust_address (op0, bestmode, xoffset);
 768
 769           /* Fetch that unit, store the bitfield in it, then store
 770              the unit.  */
 771           tempreg = copy_to_reg (xop0);
 772           if (store_bit_field_1 (tempreg, bitsize, xbitpos,
 773                                  bitregion_start, bitregion_end,
 774                                  fieldmode, orig_value, false))
 775             {
 776               emit_move_insn (xop0, tempreg);
 777               return true;
 778             }
 779           delete_insns_since (last);
 780         }
 781     }
 782
 783   if (!fallback_p)
 784     return false;
 785
 786   store_fixed_bit_field (op0, offset, bitsize, bitpos,
 787                          bitregion_start, bitregion_end, value);
 788   return true;
 789 }
 790
 791 /* Generate code to store value from rtx VALUE
 792    into a bit-field within structure STR_RTX
 793    containing BITSIZE bits starting at bit BITNUM.
 794
 795    BITREGION_START is bitpos of the first bitfield in this region.
 796    BITREGION_END is the bitpos of the ending bitfield in this region.
 797    These two fields are 0, if the C++ memory model does not apply,
 798    or we are not interested in keeping track of bitfield regions.
 799
 800    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.  */
 801
 802 void
 803 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 804                  unsigned HOST_WIDE_INT bitnum,
 805                  unsigned HOST_WIDE_INT bitregion_start,
 806                  unsigned HOST_WIDE_INT bitregion_end,
 807                  enum machine_mode fieldmode,
 808                  rtx value)
 809 {
 810   /* Under the C++0x memory model, we must not touch bits outside the
 811      bit region.  Adjust the address to start at the beginning of the
 812      bit region.  */
 813   if (MEM_P (str_rtx)
 814       && bitregion_start > 0)
 815     {
 816       enum machine_mode bestmode;
 817       enum machine_mode op_mode;
 818       unsigned HOST_WIDE_INT offset;
 819
 820       op_mode = mode_for_extraction (EP_insv, 3);
 821       if (op_mode == MAX_MACHINE_MODE)
 822         op_mode = VOIDmode;
 823
 824       offset = bitregion_start / BITS_PER_UNIT;
 825       bitnum -= bitregion_start;
 826       bitregion_end -= bitregion_start;
 827       bitregion_start = 0;
 828       bestmode = get_best_mode (bitsize, bitnum,
 829                                 bitregion_start, bitregion_end,
 830                                 MEM_ALIGN (str_rtx),
 831                                 op_mode,
 832                                 MEM_VOLATILE_P (str_rtx));
 833       str_rtx = adjust_address (str_rtx, bestmode, offset);
 834     }
 835
 836   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
 837                           bitregion_start, bitregion_end,
 838                           fieldmode, value, true))
 839     gcc_unreachable ();
 840 }
 841 \f
 842 /* Use shifts and boolean operations to store VALUE
 843    into a bit field of width BITSIZE
 844    in a memory location specified by OP0 except offset by OFFSET bytes.
 845      (OFFSET must be 0 if OP0 is a register.)
 846    The field starts at position BITPOS within the byte.
 847     (If OP0 is a register, it may be a full word or a narrower mode,
 848      but BITPOS still counts within a full word,
 849      which is significant on bigendian machines.)  */
 850
 851 static void
 852 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
 853                        unsigned HOST_WIDE_INT bitsize,
 854                        unsigned HOST_WIDE_INT bitpos,
 855                        unsigned HOST_WIDE_INT bitregion_start,
 856                        unsigned HOST_WIDE_INT bitregion_end,
 857                        rtx value)
 858 {
 859   enum machine_mode mode;
 860   unsigned int total_bits = BITS_PER_WORD;
 861   rtx temp;
 862   int all_zero = 0;
 863   int all_one = 0;
 864
 865   /* There is a case not handled here:
 866      a structure with a known alignment of just a halfword
 867      and a field split across two aligned halfwords within the structure.
 868      Or likewise a structure with a known alignment of just a byte
 869      and a field split across two bytes.
 870      Such cases are not supposed to be able to occur.  */
 871
 872   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 873     {
 874       gcc_assert (!offset);
 875       /* Special treatment for a bit field split across two registers.  */
 876       if (bitsize + bitpos > BITS_PER_WORD)
 877         {
 878           store_split_bit_field (op0, bitsize, bitpos,
 879                                  bitregion_start, bitregion_end,
 880                                  value);
 881           return;
 882         }
 883     }
 884   else
 885     {
 886       unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
 887
 888       if (bitregion_end)
 889         maxbits = bitregion_end - bitregion_start + 1;
 890
 891       /* Get the proper mode to use for this field.  We want a mode that
 892          includes the entire field.  If such a mode would be larger than
 893          a word, we won't be doing the extraction the normal way.
 894          We don't want a mode bigger than the destination.  */
 895
 896       mode = GET_MODE (op0);
 897       if (GET_MODE_BITSIZE (mode) == 0
 898           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 899         mode = word_mode;
 900
 901       if (MEM_VOLATILE_P (op0)
 902           && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
 903           && GET_MODE_BITSIZE (GET_MODE (op0)) <= maxbits
 904           && flag_strict_volatile_bitfields > 0)
 905         mode = GET_MODE (op0);
 906       else
 907         mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 908                               bitregion_start, bitregion_end,
 909                               MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 910
 911       if (mode == VOIDmode)
 912         {
 913           /* The only way this should occur is if the field spans word
 914              boundaries.  */
 915           store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
 916                                  bitregion_start, bitregion_end, value);
 917           return;
 918         }
 919
 920       total_bits = GET_MODE_BITSIZE (mode);
 921
 922       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 923          be in the range 0 to total_bits-1, and put any excess bytes in
 924          OFFSET.  */
 925       if (bitpos >= total_bits)
 926         {
 927           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 928           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 929                      * BITS_PER_UNIT);
 930         }
 931
 932       /* Get ref to an aligned byte, halfword, or word containing the field.
 933          Adjust BITPOS to be position within a word,
 934          and OFFSET to be the offset of that word.
 935          Then alter OP0 to refer to that word.  */
 936       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 937       offset -= (offset % (total_bits / BITS_PER_UNIT));
 938       op0 = adjust_address (op0, mode, offset);
 939     }
 940
 941   mode = GET_MODE (op0);
 942
 943   /* Now MODE is either some integral mode for a MEM as OP0,
 944      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 945      The bit field is contained entirely within OP0.
 946      BITPOS is the starting bit number within OP0.
 947      (OP0's mode may actually be narrower than MODE.)  */
 948
 949   if (BYTES_BIG_ENDIAN)
 950       /* BITPOS is the distance between our msb
 951          and that of the containing datum.
 952          Convert it to the distance from the lsb.  */
 953       bitpos = total_bits - bitsize - bitpos;
 954
 955   /* Now BITPOS is always the distance between our lsb
 956      and that of OP0.  */
 957
 958   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 959      we must first convert its mode to MODE.  */
 960
 961   if (CONST_INT_P (value))
 962     {
 963       HOST_WIDE_INT v = INTVAL (value);
 964
 965       if (bitsize < HOST_BITS_PER_WIDE_INT)
 966         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 967
 968       if (v == 0)
 969         all_zero = 1;
 970       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 971                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 972                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 973         all_one = 1;
 974
 975       value = lshift_value (mode, value, bitpos, bitsize);
 976     }
 977   else
 978     {
 979       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 980                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 981
 982       if (GET_MODE (value) != mode)
 983         value = convert_to_mode (mode, value, 1);
 984
 985       if (must_and)
 986         value = expand_binop (mode, and_optab, value,
 987                               mask_rtx (mode, 0, bitsize, 0),
 988                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
 989       if (bitpos > 0)
 990         value = expand_shift (LSHIFT_EXPR, mode, value,
 991                               bitpos, NULL_RTX, 1);
 992     }
 993
 994   /* Now clear the chosen bits in OP0,
 995      except that if VALUE is -1 we need not bother.  */
 996   /* We keep the intermediates in registers to allow CSE to combine
 997      consecutive bitfield assignments.  */
 998
 999   temp = force_reg (mode, op0);
1000
1001   if (! all_one)
1002     {
1003       temp = expand_binop (mode, and_optab, temp,
1004                            mask_rtx (mode, bitpos, bitsize, 1),
1005                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1006       temp = force_reg (mode, temp);
1007     }
1008
1009   /* Now logical-or VALUE into OP0, unless it is zero.  */
1010
1011   if (! all_zero)
1012     {
1013       temp = expand_binop (mode, ior_optab, temp, value,
1014                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1015       temp = force_reg (mode, temp);
1016     }
1017
1018   if (op0 != temp)
1019     {
1020       op0 = copy_rtx (op0);
1021       emit_move_insn (op0, temp);
1022     }
1023 }
1024 \f
1025 /* Store a bit field that is split across multiple accessible memory objects.
1026
1027    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1028    BITSIZE is the field width; BITPOS the position of its first bit
1029    (within the word).
1030    VALUE is the value to store.
1031
1032    This does not yet handle fields wider than BITS_PER_WORD.  */
1033
1034 static void
1035 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1036                        unsigned HOST_WIDE_INT bitpos,
1037                        unsigned HOST_WIDE_INT bitregion_start,
1038                        unsigned HOST_WIDE_INT bitregion_end,
1039                        rtx value)
1040 {
1041   unsigned int unit;
1042   unsigned int bitsdone = 0;
1043
1044   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1045      much at a time.  */
1046   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1047     unit = BITS_PER_WORD;
1048   else
1049     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1050
1051   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1052      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1053      that VALUE might be a floating-point constant.  */
1054   if (CONSTANT_P (value) && !CONST_INT_P (value))
1055     {
1056       rtx word = gen_lowpart_common (word_mode, value);
1057
1058       if (word && (value != word))
1059         value = word;
1060       else
1061         value = gen_lowpart_common (word_mode,
1062                                     force_reg (GET_MODE (value) != VOIDmode
1063                                                ? GET_MODE (value)
1064                                                : word_mode, value));
1065     }
1066
1067   while (bitsdone < bitsize)
1068     {
1069       unsigned HOST_WIDE_INT thissize;
1070       rtx part, word;
1071       unsigned HOST_WIDE_INT thispos;
1072       unsigned HOST_WIDE_INT offset;
1073
1074       offset = (bitpos + bitsdone) / unit;
1075       thispos = (bitpos + bitsdone) % unit;
1076
1077       /* THISSIZE must not overrun a word boundary.  Otherwise,
1078          store_fixed_bit_field will call us again, and we will mutually
1079          recurse forever.  */
1080       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1081       thissize = MIN (thissize, unit - thispos);
1082
1083       if (BYTES_BIG_ENDIAN)
1084         {
1085           int total_bits;
1086
1087           /* We must do an endian conversion exactly the same way as it is
1088              done in extract_bit_field, so that the two calls to
1089              extract_fixed_bit_field will have comparable arguments.  */
1090           if (!MEM_P (value) || GET_MODE (value) == BLKmode)
1091             total_bits = BITS_PER_WORD;
1092           else
1093             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1094
1095           /* Fetch successively less significant portions.  */
1096           if (CONST_INT_P (value))
1097             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1098                              >> (bitsize - bitsdone - thissize))
1099                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1100           else
1101             /* The args are chosen so that the last part includes the
1102                lsb.  Give extract_bit_field the value it needs (with
1103                endianness compensation) to fetch the piece we want.  */
1104             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1105                                             total_bits - bitsize + bitsdone,
1106                                             NULL_RTX, 1, false);
1107         }
1108       else
1109         {
1110           /* Fetch successively more significant portions.  */
1111           if (CONST_INT_P (value))
1112             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1113                              >> bitsdone)
1114                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1115           else
1116             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1117                                             bitsdone, NULL_RTX, 1, false);
1118         }
1119
1120       /* If OP0 is a register, then handle OFFSET here.
1121
1122          When handling multiword bitfields, extract_bit_field may pass
1123          down a word_mode SUBREG of a larger REG for a bitfield that actually
1124          crosses a word boundary.  Thus, for a SUBREG, we must find
1125          the current word starting from the base register.  */
1126       if (GET_CODE (op0) == SUBREG)
1127         {
1128           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1129           enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1130           if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1131             word = word_offset ? const0_rtx : op0;
1132           else
1133             word = operand_subword_force (SUBREG_REG (op0), word_offset,
1134                                           GET_MODE (SUBREG_REG (op0)));
1135           offset = 0;
1136         }
1137       else if (REG_P (op0))
1138         {
1139           enum machine_mode op0_mode = GET_MODE (op0);
1140           if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1141             word = offset ? const0_rtx : op0;
1142           else
1143             word = operand_subword_force (op0, offset, GET_MODE (op0));
1144           offset = 0;
1145         }
1146       else
1147         word = op0;
1148
1149       /* OFFSET is in UNITs, and UNIT is in bits.
1150          store_fixed_bit_field wants offset in bytes.  If WORD is const0_rtx,
1151          it is just an out-of-bounds access.  Ignore it.  */
1152       if (word != const0_rtx)
1153         store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1154                                thispos, bitregion_start, bitregion_end, part);
1155       bitsdone += thissize;
1156     }
1157 }
1158 \f
1159 /* A subroutine of extract_bit_field_1 that converts return value X
1160    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1161    to extract_bit_field.  */
1162
1163 static rtx
1164 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1165                              enum machine_mode tmode, bool unsignedp)
1166 {
1167   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1168     return x;
1169
1170   /* If the x mode is not a scalar integral, first convert to the
1171      integer mode of that size and then access it as a floating-point
1172      value via a SUBREG.  */
1173   if (!SCALAR_INT_MODE_P (tmode))
1174     {
1175       enum machine_mode smode;
1176
1177       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1178       x = convert_to_mode (smode, x, unsignedp);
1179       x = force_reg (smode, x);
1180       return gen_lowpart (tmode, x);
1181     }
1182
1183   return convert_to_mode (tmode, x, unsignedp);
1184 }
1185
1186 /* A subroutine of extract_bit_field, with the same arguments.
1187    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1188    if we can find no other means of implementing the operation.
1189    if FALLBACK_P is false, return NULL instead.  */
1190
1191 static rtx
1192 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1193                      unsigned HOST_WIDE_INT bitnum,
1194                      int unsignedp, bool packedp, rtx target,
1195                      enum machine_mode mode, enum machine_mode tmode,
1196                      bool fallback_p)
1197 {
1198   unsigned int unit
1199     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1200   unsigned HOST_WIDE_INT offset, bitpos;
1201   rtx op0 = str_rtx;
1202   enum machine_mode int_mode;
1203   enum machine_mode ext_mode;
1204   enum machine_mode mode1;
1205   int byte_offset;
1206
1207   if (tmode == VOIDmode)
1208     tmode = mode;
1209
1210   while (GET_CODE (op0) == SUBREG)
1211     {
1212       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1213       op0 = SUBREG_REG (op0);
1214     }
1215
1216   /* If we have an out-of-bounds access to a register, just return an
1217      uninitialized register of the required mode.  This can occur if the
1218      source code contains an out-of-bounds access to a small array.  */
1219   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1220     return gen_reg_rtx (tmode);
1221
1222   if (REG_P (op0)
1223       && mode == GET_MODE (op0)
1224       && bitnum == 0
1225       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1226     {
1227       /* We're trying to extract a full register from itself.  */
1228       return op0;
1229     }
1230
1231   /* See if we can get a better vector mode before extracting.  */
1232   if (VECTOR_MODE_P (GET_MODE (op0))
1233       && !MEM_P (op0)
1234       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1235     {
1236       enum machine_mode new_mode;
1237
1238       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1239         new_mode = MIN_MODE_VECTOR_FLOAT;
1240       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1241         new_mode = MIN_MODE_VECTOR_FRACT;
1242       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1243         new_mode = MIN_MODE_VECTOR_UFRACT;
1244       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1245         new_mode = MIN_MODE_VECTOR_ACCUM;
1246       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1247         new_mode = MIN_MODE_VECTOR_UACCUM;
1248       else
1249         new_mode = MIN_MODE_VECTOR_INT;
1250
1251       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1252         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1253             && targetm.vector_mode_supported_p (new_mode))
1254           break;
1255       if (new_mode != VOIDmode)
1256         op0 = gen_lowpart (new_mode, op0);
1257     }
1258
1259   /* Use vec_extract patterns for extracting parts of vectors whenever
1260      available.  */
1261   if (VECTOR_MODE_P (GET_MODE (op0))
1262       && !MEM_P (op0)
1263       && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1264       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1265           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1266     {
1267       struct expand_operand ops[3];
1268       enum machine_mode outermode = GET_MODE (op0);
1269       enum machine_mode innermode = GET_MODE_INNER (outermode);
1270       enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1271       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1272
1273       create_output_operand (&ops[0], target, innermode);
1274       create_input_operand (&ops[1], op0, outermode);
1275       create_integer_operand (&ops[2], pos);
1276       if (maybe_expand_insn (icode, 3, ops))
1277         {
1278           target = ops[0].value;
1279           if (GET_MODE (target) != mode)
1280             return gen_lowpart (tmode, target);
1281           return target;
1282         }
1283     }
1284
1285   /* Make sure we are playing with integral modes.  Pun with subregs
1286      if we aren't.  */
1287   {
1288     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1289     if (imode != GET_MODE (op0))
1290       {
1291         if (MEM_P (op0))
1292           op0 = adjust_address (op0, imode, 0);
1293         else if (imode != BLKmode)
1294           {
1295             op0 = gen_lowpart (imode, op0);
1296
1297             /* If we got a SUBREG, force it into a register since we
1298                aren't going to be able to do another SUBREG on it.  */
1299             if (GET_CODE (op0) == SUBREG)
1300               op0 = force_reg (imode, op0);
1301           }
1302         else if (REG_P (op0))
1303           {
1304             rtx reg, subreg;
1305             imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1306                                             MODE_INT);
1307             reg = gen_reg_rtx (imode);
1308             subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1309             emit_move_insn (subreg, op0);
1310             op0 = reg;
1311             bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1312           }
1313         else
1314           {
1315             rtx mem = assign_stack_temp (GET_MODE (op0),
1316                                          GET_MODE_SIZE (GET_MODE (op0)), 0);
1317             emit_move_insn (mem, op0);
1318             op0 = adjust_address (mem, BLKmode, 0);
1319           }
1320       }
1321   }
1322
1323   /* We may be accessing data outside the field, which means
1324      we can alias adjacent data.  */
1325   if (MEM_P (op0))
1326     {
1327       op0 = shallow_copy_rtx (op0);
1328       set_mem_alias_set (op0, 0);
1329       set_mem_expr (op0, 0);
1330     }
1331
1332   /* Extraction of a full-word or multi-word value from a structure
1333      in a register or aligned memory can be done with just a SUBREG.
1334      A subword value in the least significant part of a register
1335      can also be extracted with a SUBREG.  For this, we need the
1336      byte offset of the value in op0.  */
1337
1338   bitpos = bitnum % unit;
1339   offset = bitnum / unit;
1340   byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1341
1342   /* If OP0 is a register, BITPOS must count within a word.
1343      But as we have it, it counts within whatever size OP0 now has.
1344      On a bigendian machine, these are not the same, so convert.  */
1345   if (BYTES_BIG_ENDIAN
1346       && !MEM_P (op0)
1347       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1348     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1349
1350   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1351      If that's wrong, the solution is to test for it and set TARGET to 0
1352      if needed.  */
1353
1354   /* Only scalar integer modes can be converted via subregs.  There is an
1355      additional problem for FP modes here in that they can have a precision
1356      which is different from the size.  mode_for_size uses precision, but
1357      we want a mode based on the size, so we must avoid calling it for FP
1358      modes.  */
1359   mode1  = (SCALAR_INT_MODE_P (tmode)
1360             ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1361             : mode);
1362
1363   /* If the bitfield is volatile, we need to make sure the access
1364      remains on a type-aligned boundary.  */
1365   if (GET_CODE (op0) == MEM
1366       && MEM_VOLATILE_P (op0)
1367       && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
1368       && flag_strict_volatile_bitfields > 0)
1369     goto no_subreg_mode_swap;
1370
1371   if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1372         && bitpos % BITS_PER_WORD == 0)
1373        || (mode1 != BLKmode
1374            /* ??? The big endian test here is wrong.  This is correct
1375               if the value is in a register, and if mode_for_size is not
1376               the same mode as op0.  This causes us to get unnecessarily
1377               inefficient code from the Thumb port when -mbig-endian.  */
1378            && (BYTES_BIG_ENDIAN
1379                ? bitpos + bitsize == BITS_PER_WORD
1380                : bitpos == 0)))
1381       && ((!MEM_P (op0)
1382            && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0))
1383            && GET_MODE_SIZE (mode1) != 0
1384            && byte_offset % GET_MODE_SIZE (mode1) == 0)
1385           || (MEM_P (op0)
1386               && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1387                   || (offset * BITS_PER_UNIT % bitsize == 0
1388                       && MEM_ALIGN (op0) % bitsize == 0)))))
1389     {
1390       if (MEM_P (op0))
1391         op0 = adjust_address (op0, mode1, offset);
1392       else if (mode1 != GET_MODE (op0))
1393         {
1394           rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1395                                          byte_offset);
1396           if (sub == NULL)
1397             goto no_subreg_mode_swap;
1398           op0 = sub;
1399         }
1400       if (mode1 != mode)
1401         return convert_to_mode (tmode, op0, unsignedp);
1402       return op0;
1403     }
1404  no_subreg_mode_swap:
1405
1406   /* Handle fields bigger than a word.  */
1407
1408   if (bitsize > BITS_PER_WORD)
1409     {
1410       /* Here we transfer the words of the field
1411          in the order least significant first.
1412          This is because the most significant word is the one which may
1413          be less than full.  */
1414
1415       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1416       unsigned int i;
1417
1418       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1419         target = gen_reg_rtx (mode);
1420
1421       /* Indicate for flow that the entire target reg is being set.  */
1422       emit_clobber (target);
1423
1424       for (i = 0; i < nwords; i++)
1425         {
1426           /* If I is 0, use the low-order word in both field and target;
1427              if I is 1, use the next to lowest word; and so on.  */
1428           /* Word number in TARGET to use.  */
1429           unsigned int wordnum
1430             = (WORDS_BIG_ENDIAN
1431                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1432                : i);
1433           /* Offset from start of field in OP0.  */
1434           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1435                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1436                                                 * (int) BITS_PER_WORD))
1437                                      : (int) i * BITS_PER_WORD);
1438           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1439           rtx result_part
1440             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1441                                            bitsize - i * BITS_PER_WORD),
1442                                  bitnum + bit_offset, 1, false, target_part, mode,
1443                                  word_mode);
1444
1445           gcc_assert (target_part);
1446
1447           if (result_part != target_part)
1448             emit_move_insn (target_part, result_part);
1449         }
1450
1451       if (unsignedp)
1452         {
1453           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1454              need to be zero'd out.  */
1455           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1456             {
1457               unsigned int i, total_words;
1458
1459               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1460               for (i = nwords; i < total_words; i++)
1461                 emit_move_insn
1462                   (operand_subword (target,
1463                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1464                                     1, VOIDmode),
1465                    const0_rtx);
1466             }
1467           return target;
1468         }
1469
1470       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1471       target = expand_shift (LSHIFT_EXPR, mode, target,
1472                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1473       return expand_shift (RSHIFT_EXPR, mode, target,
1474                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1475     }
1476
1477   /* From here on we know the desired field is smaller than a word.  */
1478
1479   /* Check if there is a correspondingly-sized integer field, so we can
1480      safely extract it as one size of integer, if necessary; then
1481      truncate or extend to the size that is wanted; then use SUBREGs or
1482      convert_to_mode to get one of the modes we really wanted.  */
1483
1484   int_mode = int_mode_for_mode (tmode);
1485   if (int_mode == BLKmode)
1486     int_mode = int_mode_for_mode (mode);
1487   /* Should probably push op0 out to memory and then do a load.  */
1488   gcc_assert (int_mode != BLKmode);
1489
1490   /* OFFSET is the number of words or bytes (UNIT says which)
1491      from STR_RTX to the first word or byte containing part of the field.  */
1492   if (!MEM_P (op0))
1493     {
1494       if (offset != 0
1495           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1496         {
1497           if (!REG_P (op0))
1498             op0 = copy_to_reg (op0);
1499           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1500                                 op0, (offset * UNITS_PER_WORD));
1501         }
1502       offset = 0;
1503     }
1504
1505   /* Now OFFSET is nonzero only for memory operands.  */
1506   ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0);
1507   if (ext_mode != MAX_MACHINE_MODE
1508       && bitsize > 0
1509       && GET_MODE_BITSIZE (ext_mode) >= bitsize
1510       /* If op0 is a register, we need it in EXT_MODE to make it
1511          acceptable to the format of ext(z)v.  */
1512       && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1513       && !((REG_P (op0) || GET_CODE (op0) == SUBREG)
1514            && (bitsize + bitpos > GET_MODE_BITSIZE (ext_mode))))
1515     {
1516       struct expand_operand ops[4];
1517       unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1518       rtx xop0 = op0;
1519       rtx xtarget = target;
1520       rtx xspec_target = target;
1521       rtx xspec_target_subreg = 0;
1522
1523       /* If op0 is a register, we need it in EXT_MODE to make it
1524          acceptable to the format of ext(z)v.  */
1525       if (REG_P (xop0) && GET_MODE (xop0) != ext_mode)
1526         xop0 = gen_lowpart_SUBREG (ext_mode, xop0);
1527       if (MEM_P (xop0))
1528         /* Get ref to first byte containing part of the field.  */
1529         xop0 = adjust_address (xop0, byte_mode, xoffset);
1530
1531       /* On big-endian machines, we count bits from the most significant.
1532          If the bit field insn does not, we must invert.  */
1533       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1534         xbitpos = unit - bitsize - xbitpos;
1535
1536       /* Now convert from counting within UNIT to counting in EXT_MODE.  */
1537       if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1538         xbitpos += GET_MODE_BITSIZE (ext_mode) - unit;
1539
1540       unit = GET_MODE_BITSIZE (ext_mode);
1541
1542       if (xtarget == 0)
1543         xtarget = xspec_target = gen_reg_rtx (tmode);
1544
1545       if (GET_MODE (xtarget) != ext_mode)
1546         {
1547           /* Don't use LHS paradoxical subreg if explicit truncation is needed
1548              between the mode of the extraction (word_mode) and the target
1549              mode.  Instead, create a temporary and use convert_move to set
1550              the target.  */
1551           if (REG_P (xtarget)
1552               && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (xtarget), ext_mode))
1553             {
1554               xtarget = gen_lowpart (ext_mode, xtarget);
1555               if (GET_MODE_PRECISION (ext_mode)
1556                   > GET_MODE_PRECISION (GET_MODE (xspec_target)))
1557                 xspec_target_subreg = xtarget;
1558             }
1559           else
1560             xtarget = gen_reg_rtx (ext_mode);
1561         }
1562
1563       create_output_operand (&ops[0], xtarget, ext_mode);
1564       create_fixed_operand (&ops[1], xop0);
1565       create_integer_operand (&ops[2], bitsize);
1566       create_integer_operand (&ops[3], xbitpos);
1567       if (maybe_expand_insn (unsignedp ? CODE_FOR_extzv : CODE_FOR_extv,
1568                              4, ops))
1569         {
1570           xtarget = ops[0].value;
1571           if (xtarget == xspec_target)
1572             return xtarget;
1573           if (xtarget == xspec_target_subreg)
1574             return xspec_target;
1575           return convert_extracted_bit_field (xtarget, mode, tmode, unsignedp);
1576         }
1577     }
1578
1579   /* If OP0 is a memory, try copying it to a register and seeing if a
1580      cheap register alternative is available.  */
1581   if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0))
1582     {
1583       enum machine_mode bestmode;
1584
1585       /* Get the mode to use for inserting into this field.  If
1586          OP0 is BLKmode, get the smallest mode consistent with the
1587          alignment. If OP0 is a non-BLKmode object that is no
1588          wider than EXT_MODE, use its mode. Otherwise, use the
1589          smallest mode containing the field.  */
1590
1591       if (GET_MODE (op0) == BLKmode
1592           || (ext_mode != MAX_MACHINE_MODE
1593               && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode)))
1594         bestmode = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
1595                                   (ext_mode == MAX_MACHINE_MODE
1596                                    ? VOIDmode : ext_mode),
1597                                   MEM_VOLATILE_P (op0));
1598       else
1599         bestmode = GET_MODE (op0);
1600
1601       if (bestmode != VOIDmode
1602           && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
1603                && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
1604         {
1605           unsigned HOST_WIDE_INT xoffset, xbitpos;
1606
1607           /* Compute the offset as a multiple of this unit,
1608              counting in bytes.  */
1609           unit = GET_MODE_BITSIZE (bestmode);
1610           xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1611           xbitpos = bitnum % unit;
1612
1613           /* Make sure the register is big enough for the whole field.  */
1614           if (xoffset * BITS_PER_UNIT + unit
1615               >= offset * BITS_PER_UNIT + bitsize)
1616             {
1617               rtx last, result, xop0;
1618
1619               last = get_last_insn ();
1620
1621               /* Fetch it to a register in that size.  */
1622               xop0 = adjust_address (op0, bestmode, xoffset);
1623               xop0 = force_reg (bestmode, xop0);
1624               result = extract_bit_field_1 (xop0, bitsize, xbitpos,
1625                                             unsignedp, packedp, target,
1626                                             mode, tmode, false);
1627               if (result)
1628                 return result;
1629
1630               delete_insns_since (last);
1631             }
1632         }
1633     }
1634
1635   if (!fallback_p)
1636     return NULL;
1637
1638   target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1639                                     bitpos, target, unsignedp, packedp);
1640   return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1641 }
1642
1643 /* Generate code to extract a byte-field from STR_RTX
1644    containing BITSIZE bits, starting at BITNUM,
1645    and put it in TARGET if possible (if TARGET is nonzero).
1646    Regardless of TARGET, we return the rtx for where the value is placed.
1647
1648    STR_RTX is the structure containing the byte (a REG or MEM).
1649    UNSIGNEDP is nonzero if this is an unsigned bit field.
1650    PACKEDP is nonzero if the field has the packed attribute.
1651    MODE is the natural mode of the field value once extracted.
1652    TMODE is the mode the caller would like the value to have;
1653    but the value may be returned with type MODE instead.
1654
1655    If a TARGET is specified and we can store in it at no extra cost,
1656    we do so, and return TARGET.
1657    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1658    if they are equally easy.  */
1659
1660 rtx
1661 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1662                    unsigned HOST_WIDE_INT bitnum, int unsignedp, bool packedp,
1663                    rtx target, enum machine_mode mode, enum machine_mode tmode)
1664 {
1665   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, packedp,
1666                               target, mode, tmode, true);
1667 }
1668 \f
1669 /* Extract a bit field using shifts and boolean operations
1670    Returns an rtx to represent the value.
1671    OP0 addresses a register (word) or memory (byte).
1672    BITPOS says which bit within the word or byte the bit field starts in.
1673    OFFSET says how many bytes farther the bit field starts;
1674     it is 0 if OP0 is a register.
1675    BITSIZE says how many bits long the bit field is.
1676     (If OP0 is a register, it may be narrower than a full word,
1677      but BITPOS still counts within a full word,
1678      which is significant on bigendian machines.)
1679
1680    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1681    PACKEDP is true if the field has the packed attribute.
1682
1683    If TARGET is nonzero, attempts to store the value there
1684    and return TARGET, but this is not guaranteed.
1685    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1686
1687 static rtx
1688 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1689                          unsigned HOST_WIDE_INT offset,
1690                          unsigned HOST_WIDE_INT bitsize,
1691                          unsigned HOST_WIDE_INT bitpos, rtx target,
1692                          int unsignedp, bool packedp)
1693 {
1694   unsigned int total_bits = BITS_PER_WORD;
1695   enum machine_mode mode;
1696
1697   if (GET_CODE (op0) == SUBREG || REG_P (op0))
1698     {
1699       /* Special treatment for a bit field split across two registers.  */
1700       if (bitsize + bitpos > BITS_PER_WORD)
1701         return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1702     }
1703   else
1704     {
1705       /* Get the proper mode to use for this field.  We want a mode that
1706          includes the entire field.  If such a mode would be larger than
1707          a word, we won't be doing the extraction the normal way.  */
1708
1709       if (MEM_VOLATILE_P (op0)
1710           && flag_strict_volatile_bitfields > 0)
1711         {
1712           if (GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1713             mode = GET_MODE (op0);
1714           else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1715             mode = GET_MODE (target);
1716           else
1717             mode = tmode;
1718         }
1719       else
1720         mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT, 0, 0,
1721                               MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1722
1723       if (mode == VOIDmode)
1724         /* The only way this should occur is if the field spans word
1725            boundaries.  */
1726         return extract_split_bit_field (op0, bitsize,
1727                                         bitpos + offset * BITS_PER_UNIT,
1728                                         unsignedp);
1729
1730       total_bits = GET_MODE_BITSIZE (mode);
1731
1732       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1733          be in the range 0 to total_bits-1, and put any excess bytes in
1734          OFFSET.  */
1735       if (bitpos >= total_bits)
1736         {
1737           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1738           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1739                      * BITS_PER_UNIT);
1740         }
1741
1742       /* If we're accessing a volatile MEM, we can't do the next
1743          alignment step if it results in a multi-word access where we
1744          otherwise wouldn't have one.  So, check for that case
1745          here.  */
1746       if (MEM_P (op0)
1747           && MEM_VOLATILE_P (op0)
1748           && flag_strict_volatile_bitfields > 0
1749           && bitpos + bitsize <= total_bits
1750           && bitpos + bitsize + (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT > total_bits)
1751         {
1752           if (STRICT_ALIGNMENT)
1753             {
1754               static bool informed_about_misalignment = false;
1755               bool warned;
1756
1757               if (packedp)
1758                 {
1759                   if (bitsize == total_bits)
1760                     warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1761                                          "multiple accesses to volatile structure member"
1762                                          " because of packed attribute");
1763                   else
1764                     warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1765                                          "multiple accesses to volatile structure bitfield"
1766                                          " because of packed attribute");
1767
1768                   return extract_split_bit_field (op0, bitsize,
1769                                                   bitpos + offset * BITS_PER_UNIT,
1770                                                   unsignedp);
1771                 }
1772
1773               if (bitsize == total_bits)
1774                 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1775                                      "mis-aligned access used for structure member");
1776               else
1777                 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1778                                      "mis-aligned access used for structure bitfield");
1779
1780               if (! informed_about_misalignment && warned)
1781                 {
1782                   informed_about_misalignment = true;
1783                   inform (input_location,
1784                           "when a volatile object spans multiple type-sized locations,"
1785                           " the compiler must choose between using a single mis-aligned access to"
1786                           " preserve the volatility, or using multiple aligned accesses to avoid"
1787                           " runtime faults; this code may fail at runtime if the hardware does"
1788                           " not allow this access");
1789                 }
1790             }
1791         }
1792       else
1793         {
1794
1795           /* Get ref to an aligned byte, halfword, or word containing the field.
1796              Adjust BITPOS to be position within a word,
1797              and OFFSET to be the offset of that word.
1798              Then alter OP0 to refer to that word.  */
1799           bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1800           offset -= (offset % (total_bits / BITS_PER_UNIT));
1801         }
1802
1803       op0 = adjust_address (op0, mode, offset);
1804     }
1805
1806   mode = GET_MODE (op0);
1807
1808   if (BYTES_BIG_ENDIAN)
1809     /* BITPOS is the distance between our msb and that of OP0.
1810        Convert it to the distance from the lsb.  */
1811     bitpos = total_bits - bitsize - bitpos;
1812
1813   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1814      We have reduced the big-endian case to the little-endian case.  */
1815
1816   if (unsignedp)
1817     {
1818       if (bitpos)
1819         {
1820           /* If the field does not already start at the lsb,
1821              shift it so it does.  */
1822           /* Maybe propagate the target for the shift.  */
1823           /* But not if we will return it--could confuse integrate.c.  */
1824           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1825           if (tmode != mode) subtarget = 0;
1826           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitpos, subtarget, 1);
1827         }
1828       /* Convert the value to the desired mode.  */
1829       if (mode != tmode)
1830         op0 = convert_to_mode (tmode, op0, 1);
1831
1832       /* Unless the msb of the field used to be the msb when we shifted,
1833          mask out the upper bits.  */
1834
1835       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1836         return expand_binop (GET_MODE (op0), and_optab, op0,
1837                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1838                              target, 1, OPTAB_LIB_WIDEN);
1839       return op0;
1840     }
1841
1842   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1843      then arithmetic-shift its lsb to the lsb of the word.  */
1844   op0 = force_reg (mode, op0);
1845
1846   /* Find the narrowest integer mode that contains the field.  */
1847
1848   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1849        mode = GET_MODE_WIDER_MODE (mode))
1850     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1851       {
1852         op0 = convert_to_mode (mode, op0, 0);
1853         break;
1854       }
1855
1856   if (mode != tmode)
1857     target = 0;
1858
1859   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1860     {
1861       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitpos);
1862       /* Maybe propagate the target for the shift.  */
1863       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1864       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1865     }
1866
1867   return expand_shift (RSHIFT_EXPR, mode, op0,
1868                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1869 }
1870 \f
1871 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1872    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1873    complement of that if COMPLEMENT.  The mask is truncated if
1874    necessary to the width of mode MODE.  The mask is zero-extended if
1875    BITSIZE+BITPOS is too small for MODE.  */
1876
1877 static rtx
1878 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1879 {
1880   double_int mask;
1881
1882   mask = double_int_mask (bitsize);
1883   mask = double_int_lshift (mask, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1884
1885   if (complement)
1886     mask = double_int_not (mask);
1887
1888   return immed_double_int_const (mask, mode);
1889 }
1890
1891 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1892    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1893
1894 static rtx
1895 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1896 {
1897   double_int val;
1898
1899   val = double_int_zext (uhwi_to_double_int (INTVAL (value)), bitsize);
1900   val = double_int_lshift (val, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1901
1902   return immed_double_int_const (val, mode);
1903 }
1904 \f
1905 /* Extract a bit field that is split across two words
1906    and return an RTX for the result.
1907
1908    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1909    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1910    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1911
1912 static rtx
1913 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1914                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1915 {
1916   unsigned int unit;
1917   unsigned int bitsdone = 0;
1918   rtx result = NULL_RTX;
1919   int first = 1;
1920
1921   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1922      much at a time.  */
1923   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1924     unit = BITS_PER_WORD;
1925   else
1926     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1927
1928   while (bitsdone < bitsize)
1929     {
1930       unsigned HOST_WIDE_INT thissize;
1931       rtx part, word;
1932       unsigned HOST_WIDE_INT thispos;
1933       unsigned HOST_WIDE_INT offset;
1934
1935       offset = (bitpos + bitsdone) / unit;
1936       thispos = (bitpos + bitsdone) % unit;
1937
1938       /* THISSIZE must not overrun a word boundary.  Otherwise,
1939          extract_fixed_bit_field will call us again, and we will mutually
1940          recurse forever.  */
1941       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1942       thissize = MIN (thissize, unit - thispos);
1943
1944       /* If OP0 is a register, then handle OFFSET here.
1945
1946          When handling multiword bitfields, extract_bit_field may pass
1947          down a word_mode SUBREG of a larger REG for a bitfield that actually
1948          crosses a word boundary.  Thus, for a SUBREG, we must find
1949          the current word starting from the base register.  */
1950       if (GET_CODE (op0) == SUBREG)
1951         {
1952           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1953           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1954                                         GET_MODE (SUBREG_REG (op0)));
1955           offset = 0;
1956         }
1957       else if (REG_P (op0))
1958         {
1959           word = operand_subword_force (op0, offset, GET_MODE (op0));
1960           offset = 0;
1961         }
1962       else
1963         word = op0;
1964
1965       /* Extract the parts in bit-counting order,
1966          whose meaning is determined by BYTES_PER_UNIT.
1967          OFFSET is in UNITs, and UNIT is in bits.
1968          extract_fixed_bit_field wants offset in bytes.  */
1969       part = extract_fixed_bit_field (word_mode, word,
1970                                       offset * unit / BITS_PER_UNIT,
1971                                       thissize, thispos, 0, 1, false);
1972       bitsdone += thissize;
1973
1974       /* Shift this part into place for the result.  */
1975       if (BYTES_BIG_ENDIAN)
1976         {
1977           if (bitsize != bitsdone)
1978             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1979                                  bitsize - bitsdone, 0, 1);
1980         }
1981       else
1982         {
1983           if (bitsdone != thissize)
1984             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1985                                  bitsdone - thissize, 0, 1);
1986         }
1987
1988       if (first)
1989         result = part;
1990       else
1991         /* Combine the parts with bitwise or.  This works
1992            because we extracted each part as an unsigned bit field.  */
1993         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1994                                OPTAB_LIB_WIDEN);
1995
1996       first = 0;
1997     }
1998
1999   /* Unsigned bit field: we are done.  */
2000   if (unsignedp)
2001     return result;
2002   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2003   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2004                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2005   return expand_shift (RSHIFT_EXPR, word_mode, result,
2006                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2007 }
2008 \f
2009 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2010    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2011    MODE, fill the upper bits with zeros.  Fail if the layout of either
2012    mode is unknown (as for CC modes) or if the extraction would involve
2013    unprofitable mode punning.  Return the value on success, otherwise
2014    return null.
2015
2016    This is different from gen_lowpart* in these respects:
2017
2018      - the returned value must always be considered an rvalue
2019
2020      - when MODE is wider than SRC_MODE, the extraction involves
2021        a zero extension
2022
2023      - when MODE is smaller than SRC_MODE, the extraction involves
2024        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2025
2026    In other words, this routine performs a computation, whereas the
2027    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2028    operations.  */
2029
2030 rtx
2031 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
2032 {
2033   enum machine_mode int_mode, src_int_mode;
2034
2035   if (mode == src_mode)
2036     return src;
2037
2038   if (CONSTANT_P (src))
2039     {
2040       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2041          fails, it will happily create (subreg (symbol_ref)) or similar
2042          invalid SUBREGs.  */
2043       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2044       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2045       if (ret)
2046         return ret;
2047
2048       if (GET_MODE (src) == VOIDmode
2049           || !validate_subreg (mode, src_mode, src, byte))
2050         return NULL_RTX;
2051
2052       src = force_reg (GET_MODE (src), src);
2053       return gen_rtx_SUBREG (mode, src, byte);
2054     }
2055
2056   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2057     return NULL_RTX;
2058
2059   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2060       && MODES_TIEABLE_P (mode, src_mode))
2061     {
2062       rtx x = gen_lowpart_common (mode, src);
2063       if (x)
2064         return x;
2065     }
2066
2067   src_int_mode = int_mode_for_mode (src_mode);
2068   int_mode = int_mode_for_mode (mode);
2069   if (src_int_mode == BLKmode || int_mode == BLKmode)
2070     return NULL_RTX;
2071
2072   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2073     return NULL_RTX;
2074   if (!MODES_TIEABLE_P (int_mode, mode))
2075     return NULL_RTX;
2076
2077   src = gen_lowpart (src_int_mode, src);
2078   src = convert_modes (int_mode, src_int_mode, src, true);
2079   src = gen_lowpart (mode, src);
2080   return src;
2081 }
2082 \f
2083 /* Add INC into TARGET.  */
2084
2085 void
2086 expand_inc (rtx target, rtx inc)
2087 {
2088   rtx value = expand_binop (GET_MODE (target), add_optab,
2089                             target, inc,
2090                             target, 0, OPTAB_LIB_WIDEN);
2091   if (value != target)
2092     emit_move_insn (target, value);
2093 }
2094
2095 /* Subtract DEC from TARGET.  */
2096
2097 void
2098 expand_dec (rtx target, rtx dec)
2099 {
2100   rtx value = expand_binop (GET_MODE (target), sub_optab,
2101                             target, dec,
2102                             target, 0, OPTAB_LIB_WIDEN);
2103   if (value != target)
2104     emit_move_insn (target, value);
2105 }
2106 \f
2107 /* Output a shift instruction for expression code CODE,
2108    with SHIFTED being the rtx for the value to shift,
2109    and AMOUNT the rtx for the amount to shift by.
2110    Store the result in the rtx TARGET, if that is convenient.
2111    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2112    Return the rtx for where the value is.  */
2113
2114 static rtx
2115 expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted,
2116                 rtx amount, rtx target, int unsignedp)
2117 {
2118   rtx op1, temp = 0;
2119   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2120   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2121   optab lshift_optab = ashl_optab;
2122   optab rshift_arith_optab = ashr_optab;
2123   optab rshift_uns_optab = lshr_optab;
2124   optab lrotate_optab = rotl_optab;
2125   optab rrotate_optab = rotr_optab;
2126   enum machine_mode op1_mode;
2127   int attempt;
2128   bool speed = optimize_insn_for_speed_p ();
2129
2130   op1 = amount;
2131   op1_mode = GET_MODE (op1);
2132
2133   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2134      shift amount is a vector, use the vector/vector shift patterns.  */
2135   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2136     {
2137       lshift_optab = vashl_optab;
2138       rshift_arith_optab = vashr_optab;
2139       rshift_uns_optab = vlshr_optab;
2140       lrotate_optab = vrotl_optab;
2141       rrotate_optab = vrotr_optab;
2142     }
2143
2144   /* Previously detected shift-counts computed by NEGATE_EXPR
2145      and shifted in the other direction; but that does not work
2146      on all machines.  */
2147
2148   if (SHIFT_COUNT_TRUNCATED)
2149     {
2150       if (CONST_INT_P (op1)
2151           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2152               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2153         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2154                        % GET_MODE_BITSIZE (mode));
2155       else if (GET_CODE (op1) == SUBREG
2156                && subreg_lowpart_p (op1)
2157                && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (op1))))
2158         op1 = SUBREG_REG (op1);
2159     }
2160
2161   if (op1 == const0_rtx)
2162     return shifted;
2163
2164   /* Check whether its cheaper to implement a left shift by a constant
2165      bit count by a sequence of additions.  */
2166   if (code == LSHIFT_EXPR
2167       && CONST_INT_P (op1)
2168       && INTVAL (op1) > 0
2169       && INTVAL (op1) < GET_MODE_PRECISION (mode)
2170       && INTVAL (op1) < MAX_BITS_PER_WORD
2171       && shift_cost[speed][mode][INTVAL (op1)] > INTVAL (op1) * add_cost[speed][mode]
2172       && shift_cost[speed][mode][INTVAL (op1)] != MAX_COST)
2173     {
2174       int i;
2175       for (i = 0; i < INTVAL (op1); i++)
2176         {
2177           temp = force_reg (mode, shifted);
2178           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2179                                   unsignedp, OPTAB_LIB_WIDEN);
2180         }
2181       return shifted;
2182     }
2183
2184   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2185     {
2186       enum optab_methods methods;
2187
2188       if (attempt == 0)
2189         methods = OPTAB_DIRECT;
2190       else if (attempt == 1)
2191         methods = OPTAB_WIDEN;
2192       else
2193         methods = OPTAB_LIB_WIDEN;
2194
2195       if (rotate)
2196         {
2197           /* Widening does not work for rotation.  */
2198           if (methods == OPTAB_WIDEN)
2199             continue;
2200           else if (methods == OPTAB_LIB_WIDEN)
2201             {
2202               /* If we have been unable to open-code this by a rotation,
2203                  do it as the IOR of two shifts.  I.e., to rotate A
2204                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2205                  where C is the bitsize of A.
2206
2207                  It is theoretically possible that the target machine might
2208                  not be able to perform either shift and hence we would
2209                  be making two libcalls rather than just the one for the
2210                  shift (similarly if IOR could not be done).  We will allow
2211                  this extremely unlikely lossage to avoid complicating the
2212                  code below.  */
2213
2214               rtx subtarget = target == shifted ? 0 : target;
2215               rtx new_amount, other_amount;
2216               rtx temp1;
2217
2218               new_amount = op1;
2219               if (CONST_INT_P (op1))
2220                 other_amount = GEN_INT (GET_MODE_BITSIZE (mode)
2221                                         - INTVAL (op1));
2222               else
2223                 other_amount
2224                   = simplify_gen_binary (MINUS, GET_MODE (op1),
2225                                          GEN_INT (GET_MODE_PRECISION (mode)),
2226                                          op1);
2227
2228               shifted = force_reg (mode, shifted);
2229
2230               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2231                                      mode, shifted, new_amount, 0, 1);
2232               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2233                                       mode, shifted, other_amount,
2234                                       subtarget, 1);
2235               return expand_binop (mode, ior_optab, temp, temp1, target,
2236                                    unsignedp, methods);
2237             }
2238
2239           temp = expand_binop (mode,
2240                                left ? lrotate_optab : rrotate_optab,
2241                                shifted, op1, target, unsignedp, methods);
2242         }
2243       else if (unsignedp)
2244         temp = expand_binop (mode,
2245                              left ? lshift_optab : rshift_uns_optab,
2246                              shifted, op1, target, unsignedp, methods);
2247
2248       /* Do arithmetic shifts.
2249          Also, if we are going to widen the operand, we can just as well
2250          use an arithmetic right-shift instead of a logical one.  */
2251       if (temp == 0 && ! rotate
2252           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2253         {
2254           enum optab_methods methods1 = methods;
2255
2256           /* If trying to widen a log shift to an arithmetic shift,
2257              don't accept an arithmetic shift of the same size.  */
2258           if (unsignedp)
2259             methods1 = OPTAB_MUST_WIDEN;
2260
2261           /* Arithmetic shift */
2262
2263           temp = expand_binop (mode,
2264                                left ? lshift_optab : rshift_arith_optab,
2265                                shifted, op1, target, unsignedp, methods1);
2266         }
2267
2268       /* We used to try extzv here for logical right shifts, but that was
2269          only useful for one machine, the VAX, and caused poor code
2270          generation there for lshrdi3, so the code was deleted and a
2271          define_expand for lshrsi3 was added to vax.md.  */
2272     }
2273
2274   gcc_assert (temp);
2275   return temp;
2276 }
2277
2278 /* Output a shift instruction for expression code CODE,
2279    with SHIFTED being the rtx for the value to shift,
2280    and AMOUNT the amount to shift by.
2281    Store the result in the rtx TARGET, if that is convenient.
2282    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2283    Return the rtx for where the value is.  */
2284
2285 rtx
2286 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2287               int amount, rtx target, int unsignedp)
2288 {
2289   return expand_shift_1 (code, mode,
2290                          shifted, GEN_INT (amount), target, unsignedp);
2291 }
2292
2293 /* Output a shift instruction for expression code CODE,
2294    with SHIFTED being the rtx for the value to shift,
2295    and AMOUNT the tree for the amount to shift by.
2296    Store the result in the rtx TARGET, if that is convenient.
2297    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2298    Return the rtx for where the value is.  */
2299
2300 rtx
2301 expand_variable_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2302                        tree amount, rtx target, int unsignedp)
2303 {
2304   return expand_shift_1 (code, mode,
2305                          shifted, expand_normal (amount), target, unsignedp);
2306 }
2307
2308 \f
2309 /* Indicates the type of fixup needed after a constant multiplication.
2310    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2311    the result should be negated, and ADD_VARIANT means that the
2312    multiplicand should be added to the result.  */
2313 enum mult_variant {basic_variant, negate_variant, add_variant};
2314
2315 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2316                         const struct mult_cost *, enum machine_mode mode);
2317 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2318                                  struct algorithm *, enum mult_variant *, int);
2319 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2320                               const struct algorithm *, enum mult_variant);
2321 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2322                                                  int, rtx *, int *, int *);
2323 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2324 static rtx extract_high_half (enum machine_mode, rtx);
2325 static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2326 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2327                                        int, int);
2328 /* Compute and return the best algorithm for multiplying by T.
2329    The algorithm must cost less than cost_limit
2330    If retval.cost >= COST_LIMIT, no algorithm was found and all
2331    other field of the returned struct are undefined.
2332    MODE is the machine mode of the multiplication.  */
2333
2334 static void
2335 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2336             const struct mult_cost *cost_limit, enum machine_mode mode)
2337 {
2338   int m;
2339   struct algorithm *alg_in, *best_alg;
2340   struct mult_cost best_cost;
2341   struct mult_cost new_limit;
2342   int op_cost, op_latency;
2343   unsigned HOST_WIDE_INT orig_t = t;
2344   unsigned HOST_WIDE_INT q;
2345   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2346   int hash_index;
2347   bool cache_hit = false;
2348   enum alg_code cache_alg = alg_zero;
2349   bool speed = optimize_insn_for_speed_p ();
2350
2351   /* Indicate that no algorithm is yet found.  If no algorithm
2352      is found, this value will be returned and indicate failure.  */
2353   alg_out->cost.cost = cost_limit->cost + 1;
2354   alg_out->cost.latency = cost_limit->latency + 1;
2355
2356   if (cost_limit->cost < 0
2357       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2358     return;
2359
2360   /* Restrict the bits of "t" to the multiplication's mode.  */
2361   t &= GET_MODE_MASK (mode);
2362
2363   /* t == 1 can be done in zero cost.  */
2364   if (t == 1)
2365     {
2366       alg_out->ops = 1;
2367       alg_out->cost.cost = 0;
2368       alg_out->cost.latency = 0;
2369       alg_out->op[0] = alg_m;
2370       return;
2371     }
2372
2373   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2374      fail now.  */
2375   if (t == 0)
2376     {
2377       if (MULT_COST_LESS (cost_limit, zero_cost[speed]))
2378         return;
2379       else
2380         {
2381           alg_out->ops = 1;
2382           alg_out->cost.cost = zero_cost[speed];
2383           alg_out->cost.latency = zero_cost[speed];
2384           alg_out->op[0] = alg_zero;
2385           return;
2386         }
2387     }
2388
2389   /* We'll be needing a couple extra algorithm structures now.  */
2390
2391   alg_in = XALLOCA (struct algorithm);
2392   best_alg = XALLOCA (struct algorithm);
2393   best_cost = *cost_limit;
2394
2395   /* Compute the hash index.  */
2396   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2397
2398   /* See if we already know what to do for T.  */
2399   if (alg_hash[hash_index].t == t
2400       && alg_hash[hash_index].mode == mode
2401       && alg_hash[hash_index].mode == mode
2402       && alg_hash[hash_index].speed == speed
2403       && alg_hash[hash_index].alg != alg_unknown)
2404     {
2405       cache_alg = alg_hash[hash_index].alg;
2406
2407       if (cache_alg == alg_impossible)
2408         {
2409           /* The cache tells us that it's impossible to synthesize
2410              multiplication by T within alg_hash[hash_index].cost.  */
2411           if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit))
2412             /* COST_LIMIT is at least as restrictive as the one
2413                recorded in the hash table, in which case we have no
2414                hope of synthesizing a multiplication.  Just
2415                return.  */
2416             return;
2417
2418           /* If we get here, COST_LIMIT is less restrictive than the
2419              one recorded in the hash table, so we may be able to
2420              synthesize a multiplication.  Proceed as if we didn't
2421              have the cache entry.  */
2422         }
2423       else
2424         {
2425           if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost))
2426             /* The cached algorithm shows that this multiplication
2427                requires more cost than COST_LIMIT.  Just return.  This
2428                way, we don't clobber this cache entry with
2429                alg_impossible but retain useful information.  */
2430             return;
2431
2432           cache_hit = true;
2433
2434           switch (cache_alg)
2435             {
2436             case alg_shift:
2437               goto do_alg_shift;
2438
2439             case alg_add_t_m2:
2440             case alg_sub_t_m2:
2441               goto do_alg_addsub_t_m2;
2442
2443             case alg_add_factor:
2444             case alg_sub_factor:
2445               goto do_alg_addsub_factor;
2446
2447             case alg_add_t2_m:
2448               goto do_alg_add_t2_m;
2449
2450             case alg_sub_t2_m:
2451               goto do_alg_sub_t2_m;
2452
2453             default:
2454               gcc_unreachable ();
2455             }
2456         }
2457     }
2458
2459   /* If we have a group of zero bits at the low-order part of T, try
2460      multiplying by the remaining bits and then doing a shift.  */
2461
2462   if ((t & 1) == 0)
2463     {
2464     do_alg_shift:
2465       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2466       if (m < maxm)
2467         {
2468           q = t >> m;
2469           /* The function expand_shift will choose between a shift and
2470              a sequence of additions, so the observed cost is given as
2471              MIN (m * add_cost[speed][mode], shift_cost[speed][mode][m]).  */
2472           op_cost = m * add_cost[speed][mode];
2473           if (shift_cost[speed][mode][m] < op_cost)
2474             op_cost = shift_cost[speed][mode][m];
2475           new_limit.cost = best_cost.cost - op_cost;
2476           new_limit.latency = best_cost.latency - op_cost;
2477           synth_mult (alg_in, q, &new_limit, mode);
2478
2479           alg_in->cost.cost += op_cost;
2480           alg_in->cost.latency += op_cost;
2481           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2482             {
2483               struct algorithm *x;
2484               best_cost = alg_in->cost;
2485               x = alg_in, alg_in = best_alg, best_alg = x;
2486               best_alg->log[best_alg->ops] = m;
2487               best_alg->op[best_alg->ops] = alg_shift;
2488             }
2489
2490           /* See if treating ORIG_T as a signed number yields a better
2491              sequence.  Try this sequence only for a negative ORIG_T
2492              as it would be useless for a non-negative ORIG_T.  */
2493           if ((HOST_WIDE_INT) orig_t < 0)
2494             {
2495               /* Shift ORIG_T as follows because a right shift of a
2496                  negative-valued signed type is implementation
2497                  defined.  */
2498               q = ~(~orig_t >> m);
2499               /* The function expand_shift will choose between a shift
2500                  and a sequence of additions, so the observed cost is
2501                  given as MIN (m * add_cost[speed][mode],
2502                  shift_cost[speed][mode][m]).  */
2503               op_cost = m * add_cost[speed][mode];
2504               if (shift_cost[speed][mode][m] < op_cost)
2505                 op_cost = shift_cost[speed][mode][m];
2506               new_limit.cost = best_cost.cost - op_cost;
2507               new_limit.latency = best_cost.latency - op_cost;
2508               synth_mult (alg_in, q, &new_limit, mode);
2509
2510               alg_in->cost.cost += op_cost;
2511               alg_in->cost.latency += op_cost;
2512               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2513                 {
2514                   struct algorithm *x;
2515                   best_cost = alg_in->cost;
2516                   x = alg_in, alg_in = best_alg, best_alg = x;
2517                   best_alg->log[best_alg->ops] = m;
2518                   best_alg->op[best_alg->ops] = alg_shift;
2519                 }
2520             }
2521         }
2522       if (cache_hit)
2523         goto done;
2524     }
2525
2526   /* If we have an odd number, add or subtract one.  */
2527   if ((t & 1) != 0)
2528     {
2529       unsigned HOST_WIDE_INT w;
2530
2531     do_alg_addsub_t_m2:
2532       for (w = 1; (w & t) != 0; w <<= 1)
2533         ;
2534       /* If T was -1, then W will be zero after the loop.  This is another
2535          case where T ends with ...111.  Handling this with (T + 1) and
2536          subtract 1 produces slightly better code and results in algorithm
2537          selection much faster than treating it like the ...0111 case
2538          below.  */
2539       if (w == 0
2540           || (w > 2
2541               /* Reject the case where t is 3.
2542                  Thus we prefer addition in that case.  */
2543               && t != 3))
2544         {
2545           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2546
2547           op_cost = add_cost[speed][mode];
2548           new_limit.cost = best_cost.cost - op_cost;
2549           new_limit.latency = best_cost.latency - op_cost;
2550           synth_mult (alg_in, t + 1, &new_limit, mode);
2551
2552           alg_in->cost.cost += op_cost;
2553           alg_in->cost.latency += op_cost;
2554           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2555             {
2556               struct algorithm *x;
2557               best_cost = alg_in->cost;
2558               x = alg_in, alg_in = best_alg, best_alg = x;
2559               best_alg->log[best_alg->ops] = 0;
2560               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2561             }
2562         }
2563       else
2564         {
2565           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2566
2567           op_cost = add_cost[speed][mode];
2568           new_limit.cost = best_cost.cost - op_cost;
2569           new_limit.latency = best_cost.latency - op_cost;
2570           synth_mult (alg_in, t - 1, &new_limit, mode);
2571
2572           alg_in->cost.cost += op_cost;
2573           alg_in->cost.latency += op_cost;
2574           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2575             {
2576               struct algorithm *x;
2577               best_cost = alg_in->cost;
2578               x = alg_in, alg_in = best_alg, best_alg = x;
2579               best_alg->log[best_alg->ops] = 0;
2580               best_alg->op[best_alg->ops] = alg_add_t_m2;
2581             }
2582         }
2583
2584       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2585          quickly with a - a * n for some appropriate constant n.  */
2586       m = exact_log2 (-orig_t + 1);
2587       if (m >= 0 && m < maxm)
2588         {
2589           op_cost = shiftsub1_cost[speed][mode][m];
2590           new_limit.cost = best_cost.cost - op_cost;
2591           new_limit.latency = best_cost.latency - op_cost;
2592           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m, &new_limit, mode);
2593
2594           alg_in->cost.cost += op_cost;
2595           alg_in->cost.latency += op_cost;
2596           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2597             {
2598               struct algorithm *x;
2599               best_cost = alg_in->cost;
2600               x = alg_in, alg_in = best_alg, best_alg = x;
2601               best_alg->log[best_alg->ops] = m;
2602               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2603             }
2604         }
2605
2606       if (cache_hit)
2607         goto done;
2608     }
2609
2610   /* Look for factors of t of the form
2611      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2612      If we find such a factor, we can multiply by t using an algorithm that
2613      multiplies by q, shift the result by m and add/subtract it to itself.
2614
2615      We search for large factors first and loop down, even if large factors
2616      are less probable than small; if we find a large factor we will find a
2617      good sequence quickly, and therefore be able to prune (by decreasing
2618      COST_LIMIT) the search.  */
2619
2620  do_alg_addsub_factor:
2621   for (m = floor_log2 (t - 1); m >= 2; m--)
2622     {
2623       unsigned HOST_WIDE_INT d;
2624
2625       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2626       if (t % d == 0 && t > d && m < maxm
2627           && (!cache_hit || cache_alg == alg_add_factor))
2628         {
2629           /* If the target has a cheap shift-and-add instruction use
2630              that in preference to a shift insn followed by an add insn.
2631              Assume that the shift-and-add is "atomic" with a latency
2632              equal to its cost, otherwise assume that on superscalar
2633              hardware the shift may be executed concurrently with the
2634              earlier steps in the algorithm.  */
2635           op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2636           if (shiftadd_cost[speed][mode][m] < op_cost)
2637             {
2638               op_cost = shiftadd_cost[speed][mode][m];
2639               op_latency = op_cost;
2640             }
2641           else
2642             op_latency = add_cost[speed][mode];
2643
2644           new_limit.cost = best_cost.cost - op_cost;
2645           new_limit.latency = best_cost.latency - op_latency;
2646           synth_mult (alg_in, t / d, &new_limit, mode);
2647
2648           alg_in->cost.cost += op_cost;
2649           alg_in->cost.latency += op_latency;
2650           if (alg_in->cost.latency < op_cost)
2651             alg_in->cost.latency = op_cost;
2652           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2653             {
2654               struct algorithm *x;
2655               best_cost = alg_in->cost;
2656               x = alg_in, alg_in = best_alg, best_alg = x;
2657               best_alg->log[best_alg->ops] = m;
2658               best_alg->op[best_alg->ops] = alg_add_factor;
2659             }
2660           /* Other factors will have been taken care of in the recursion.  */
2661           break;
2662         }
2663
2664       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2665       if (t % d == 0 && t > d && m < maxm
2666           && (!cache_hit || cache_alg == alg_sub_factor))
2667         {
2668           /* If the target has a cheap shift-and-subtract insn use
2669              that in preference to a shift insn followed by a sub insn.
2670              Assume that the shift-and-sub is "atomic" with a latency
2671              equal to it's cost, otherwise assume that on superscalar
2672              hardware the shift may be executed concurrently with the
2673              earlier steps in the algorithm.  */
2674           op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2675           if (shiftsub0_cost[speed][mode][m] < op_cost)
2676             {
2677               op_cost = shiftsub0_cost[speed][mode][m];
2678               op_latency = op_cost;
2679             }
2680           else
2681             op_latency = add_cost[speed][mode];
2682
2683           new_limit.cost = best_cost.cost - op_cost;
2684           new_limit.latency = best_cost.latency - op_latency;
2685           synth_mult (alg_in, t / d, &new_limit, mode);
2686
2687           alg_in->cost.cost += op_cost;
2688           alg_in->cost.latency += op_latency;
2689           if (alg_in->cost.latency < op_cost)
2690             alg_in->cost.latency = op_cost;
2691           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2692             {
2693               struct algorithm *x;
2694               best_cost = alg_in->cost;
2695               x = alg_in, alg_in = best_alg, best_alg = x;
2696               best_alg->log[best_alg->ops] = m;
2697               best_alg->op[best_alg->ops] = alg_sub_factor;
2698             }
2699           break;
2700         }
2701     }
2702   if (cache_hit)
2703     goto done;
2704
2705   /* Try shift-and-add (load effective address) instructions,
2706      i.e. do a*3, a*5, a*9.  */
2707   if ((t & 1) != 0)
2708     {
2709     do_alg_add_t2_m:
2710       q = t - 1;
2711       q = q & -q;
2712       m = exact_log2 (q);
2713       if (m >= 0 && m < maxm)
2714         {
2715           op_cost = shiftadd_cost[speed][mode][m];
2716           new_limit.cost = best_cost.cost - op_cost;
2717           new_limit.latency = best_cost.latency - op_cost;
2718           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2719
2720           alg_in->cost.cost += op_cost;
2721           alg_in->cost.latency += op_cost;
2722           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2723             {
2724               struct algorithm *x;
2725               best_cost = alg_in->cost;
2726               x = alg_in, alg_in = best_alg, best_alg = x;
2727               best_alg->log[best_alg->ops] = m;
2728               best_alg->op[best_alg->ops] = alg_add_t2_m;
2729             }
2730         }
2731       if (cache_hit)
2732         goto done;
2733
2734     do_alg_sub_t2_m:
2735       q = t + 1;
2736       q = q & -q;
2737       m = exact_log2 (q);
2738       if (m >= 0 && m < maxm)
2739         {
2740           op_cost = shiftsub0_cost[speed][mode][m];
2741           new_limit.cost = best_cost.cost - op_cost;
2742           new_limit.latency = best_cost.latency - op_cost;
2743           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2744
2745           alg_in->cost.cost += op_cost;
2746           alg_in->cost.latency += op_cost;
2747           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2748             {
2749               struct algorithm *x;
2750               best_cost = alg_in->cost;
2751               x = alg_in, alg_in = best_alg, best_alg = x;
2752               best_alg->log[best_alg->ops] = m;
2753               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2754             }
2755         }
2756       if (cache_hit)
2757         goto done;
2758     }
2759
2760  done:
2761   /* If best_cost has not decreased, we have not found any algorithm.  */
2762   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2763     {
2764       /* We failed to find an algorithm.  Record alg_impossible for
2765          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2766          we are asked to find an algorithm for T within the same or
2767          lower COST_LIMIT, we can immediately return to the
2768          caller.  */
2769       alg_hash[hash_index].t = t;
2770       alg_hash[hash_index].mode = mode;
2771       alg_hash[hash_index].speed = speed;
2772       alg_hash[hash_index].alg = alg_impossible;
2773       alg_hash[hash_index].cost = *cost_limit;
2774       return;
2775     }
2776
2777   /* Cache the result.  */
2778   if (!cache_hit)
2779     {
2780       alg_hash[hash_index].t = t;
2781       alg_hash[hash_index].mode = mode;
2782       alg_hash[hash_index].speed = speed;
2783       alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2784       alg_hash[hash_index].cost.cost = best_cost.cost;
2785       alg_hash[hash_index].cost.latency = best_cost.latency;
2786     }
2787
2788   /* If we are getting a too long sequence for `struct algorithm'
2789      to record, make this search fail.  */
2790   if (best_alg->ops == MAX_BITS_PER_WORD)
2791     return;
2792
2793   /* Copy the algorithm from temporary space to the space at alg_out.
2794      We avoid using structure assignment because the majority of
2795      best_alg is normally undefined, and this is a critical function.  */
2796   alg_out->ops = best_alg->ops + 1;
2797   alg_out->cost = best_cost;
2798   memcpy (alg_out->op, best_alg->op,
2799           alg_out->ops * sizeof *alg_out->op);
2800   memcpy (alg_out->log, best_alg->log,
2801           alg_out->ops * sizeof *alg_out->log);
2802 }
2803 \f
2804 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2805    Try three variations:
2806
2807        - a shift/add sequence based on VAL itself
2808        - a shift/add sequence based on -VAL, followed by a negation
2809        - a shift/add sequence based on VAL - 1, followed by an addition.
2810
2811    Return true if the cheapest of these cost less than MULT_COST,
2812    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2813
2814 static bool
2815 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2816                      struct algorithm *alg, enum mult_variant *variant,
2817                      int mult_cost)
2818 {
2819   struct algorithm alg2;
2820   struct mult_cost limit;
2821   int op_cost;
2822   bool speed = optimize_insn_for_speed_p ();
2823
2824   /* Fail quickly for impossible bounds.  */
2825   if (mult_cost < 0)
2826     return false;
2827
2828   /* Ensure that mult_cost provides a reasonable upper bound.
2829      Any constant multiplication can be performed with less
2830      than 2 * bits additions.  */
2831   op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[speed][mode];
2832   if (mult_cost > op_cost)
2833     mult_cost = op_cost;
2834
2835   *variant = basic_variant;
2836   limit.cost = mult_cost;
2837   limit.latency = mult_cost;
2838   synth_mult (alg, val, &limit, mode);
2839
2840   /* This works only if the inverted value actually fits in an
2841      `unsigned int' */
2842   if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2843     {
2844       op_cost = neg_cost[speed][mode];
2845       if (MULT_COST_LESS (&alg->cost, mult_cost))
2846         {
2847           limit.cost = alg->cost.cost - op_cost;
2848           limit.latency = alg->cost.latency - op_cost;
2849         }
2850       else
2851         {
2852           limit.cost = mult_cost - op_cost;
2853           limit.latency = mult_cost - op_cost;
2854         }
2855
2856       synth_mult (&alg2, -val, &limit, mode);
2857       alg2.cost.cost += op_cost;
2858       alg2.cost.latency += op_cost;
2859       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2860         *alg = alg2, *variant = negate_variant;
2861     }
2862
2863   /* This proves very useful for division-by-constant.  */
2864   op_cost = add_cost[speed][mode];
2865   if (MULT_COST_LESS (&alg->cost, mult_cost))
2866     {
2867       limit.cost = alg->cost.cost - op_cost;
2868       limit.latency = alg->cost.latency - op_cost;
2869     }
2870   else
2871     {
2872       limit.cost = mult_cost - op_cost;
2873       limit.latency = mult_cost - op_cost;
2874     }
2875
2876   synth_mult (&alg2, val - 1, &limit, mode);
2877   alg2.cost.cost += op_cost;
2878   alg2.cost.latency += op_cost;
2879   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2880     *alg = alg2, *variant = add_variant;
2881
2882   return MULT_COST_LESS (&alg->cost, mult_cost);
2883 }
2884
2885 /* A subroutine of expand_mult, used for constant multiplications.
2886    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2887    convenient.  Use the shift/add sequence described by ALG and apply
2888    the final fixup specified by VARIANT.  */
2889
2890 static rtx
2891 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2892                    rtx target, const struct algorithm *alg,
2893                    enum mult_variant variant)
2894 {
2895   HOST_WIDE_INT val_so_far;
2896   rtx insn, accum, tem;
2897   int opno;
2898   enum machine_mode nmode;
2899
2900   /* Avoid referencing memory over and over and invalid sharing
2901      on SUBREGs.  */
2902   op0 = force_reg (mode, op0);
2903
2904   /* ACCUM starts out either as OP0 or as a zero, depending on
2905      the first operation.  */
2906
2907   if (alg->op[0] == alg_zero)
2908     {
2909       accum = copy_to_mode_reg (mode, const0_rtx);
2910       val_so_far = 0;
2911     }
2912   else if (alg->op[0] == alg_m)
2913     {
2914       accum = copy_to_mode_reg (mode, op0);
2915       val_so_far = 1;
2916     }
2917   else
2918     gcc_unreachable ();
2919
2920   for (opno = 1; opno < alg->ops; opno++)
2921     {
2922       int log = alg->log[opno];
2923       rtx shift_subtarget = optimize ? 0 : accum;
2924       rtx add_target
2925         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2926            && !optimize)
2927           ? target : 0;
2928       rtx accum_target = optimize ? 0 : accum;
2929
2930       switch (alg->op[opno])
2931         {
2932         case alg_shift:
2933           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2934           /* REG_EQUAL note will be attached to the following insn.  */
2935           emit_move_insn (accum, tem);
2936           val_so_far <<= log;
2937           break;
2938
2939         case alg_add_t_m2:
2940           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2941           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2942                                  add_target ? add_target : accum_target);
2943           val_so_far += (HOST_WIDE_INT) 1 << log;
2944           break;
2945
2946         case alg_sub_t_m2:
2947           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2948           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2949                                  add_target ? add_target : accum_target);
2950           val_so_far -= (HOST_WIDE_INT) 1 << log;
2951           break;
2952
2953         case alg_add_t2_m:
2954           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2955                                 log, shift_subtarget, 0);
2956           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2957                                  add_target ? add_target : accum_target);
2958           val_so_far = (val_so_far << log) + 1;
2959           break;
2960
2961         case alg_sub_t2_m:
2962           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2963                                 log, shift_subtarget, 0);
2964           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2965                                  add_target ? add_target : accum_target);
2966           val_so_far = (val_so_far << log) - 1;
2967           break;
2968
2969         case alg_add_factor:
2970           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2971           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2972                                  add_target ? add_target : accum_target);
2973           val_so_far += val_so_far << log;
2974           break;
2975
2976         case alg_sub_factor:
2977           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2978           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2979                                  (add_target
2980                                   ? add_target : (optimize ? 0 : tem)));
2981           val_so_far = (val_so_far << log) - val_so_far;
2982           break;
2983
2984         default:
2985           gcc_unreachable ();
2986         }
2987
2988       /* Write a REG_EQUAL note on the last insn so that we can cse
2989          multiplication sequences.  Note that if ACCUM is a SUBREG,
2990          we've set the inner register and must properly indicate
2991          that.  */
2992
2993       tem = op0, nmode = mode;
2994       if (GET_CODE (accum) == SUBREG)
2995         {
2996           nmode = GET_MODE (SUBREG_REG (accum));
2997           tem = gen_lowpart (nmode, op0);
2998         }
2999
3000       insn = get_last_insn ();
3001       set_unique_reg_note (insn, REG_EQUAL,
3002                            gen_rtx_MULT (nmode, tem,
3003                                          GEN_INT (val_so_far)));
3004     }
3005
3006   if (variant == negate_variant)
3007     {
3008       val_so_far = -val_so_far;
3009       accum = expand_unop (mode, neg_optab, accum, target, 0);
3010     }
3011   else if (variant == add_variant)
3012     {
3013       val_so_far = val_so_far + 1;
3014       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3015     }
3016
3017   /* Compare only the bits of val and val_so_far that are significant
3018      in the result mode, to avoid sign-/zero-extension confusion.  */
3019   val &= GET_MODE_MASK (mode);
3020   val_so_far &= GET_MODE_MASK (mode);
3021   gcc_assert (val == val_so_far);
3022
3023   return accum;
3024 }
3025
3026 /* Perform a multiplication and return an rtx for the result.
3027    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3028    TARGET is a suggestion for where to store the result (an rtx).
3029
3030    We check specially for a constant integer as OP1.
3031    If you want this check for OP0 as well, then before calling
3032    you should swap the two operands if OP0 would be constant.  */
3033
3034 rtx
3035 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3036              int unsignedp)
3037 {
3038   enum mult_variant variant;
3039   struct algorithm algorithm;
3040   int max_cost;
3041   bool speed = optimize_insn_for_speed_p ();
3042
3043   /* Handling const0_rtx here allows us to use zero as a rogue value for
3044      coeff below.  */
3045   if (op1 == const0_rtx)
3046     return const0_rtx;
3047   if (op1 == const1_rtx)
3048     return op0;
3049   if (op1 == constm1_rtx)
3050     return expand_unop (mode,
3051                         GET_MODE_CLASS (mode) == MODE_INT
3052                         && !unsignedp && flag_trapv
3053                         ? negv_optab : neg_optab,
3054                         op0, target, 0);
3055
3056   /* These are the operations that are potentially turned into a sequence
3057      of shifts and additions.  */
3058   if (SCALAR_INT_MODE_P (mode)
3059       && (unsignedp || !flag_trapv))
3060     {
3061       HOST_WIDE_INT coeff = 0;
3062       rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3063
3064       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3065          less than or equal in size to `unsigned int' this doesn't matter.
3066          If the mode is larger than `unsigned int', then synth_mult works
3067          only if the constant value exactly fits in an `unsigned int' without
3068          any truncation.  This means that multiplying by negative values does
3069          not work; results are off by 2^32 on a 32 bit machine.  */
3070
3071       if (CONST_INT_P (op1))
3072         {
3073           /* Attempt to handle multiplication of DImode values by negative
3074              coefficients, by performing the multiplication by a positive
3075              multiplier and then inverting the result.  */
3076           if (INTVAL (op1) < 0
3077               && GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3078             {
3079               /* Its safe to use -INTVAL (op1) even for INT_MIN, as the
3080                  result is interpreted as an unsigned coefficient.
3081                  Exclude cost of op0 from max_cost to match the cost
3082                  calculation of the synth_mult.  */
3083               max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3084                                         speed)
3085                           - neg_cost[speed][mode]);
3086               if (max_cost > 0
3087                   && choose_mult_variant (mode, -INTVAL (op1), &algorithm,
3088                                           &variant, max_cost))
3089                 {
3090                   rtx temp = expand_mult_const (mode, op0, -INTVAL (op1),
3091                                                 NULL_RTX, &algorithm,
3092                                                 variant);
3093                   return expand_unop (mode, neg_optab, temp, target, 0);
3094                 }
3095             }
3096           else coeff = INTVAL (op1);
3097         }
3098       else if (GET_CODE (op1) == CONST_DOUBLE)
3099         {
3100           /* If we are multiplying in DImode, it may still be a win
3101              to try to work with shifts and adds.  */
3102           if (CONST_DOUBLE_HIGH (op1) == 0
3103               && CONST_DOUBLE_LOW (op1) > 0)
3104             coeff = CONST_DOUBLE_LOW (op1);
3105           else if (CONST_DOUBLE_LOW (op1) == 0
3106                    && EXACT_POWER_OF_2_OR_ZERO_P (CONST_DOUBLE_HIGH (op1)))
3107             {
3108               int shift = floor_log2 (CONST_DOUBLE_HIGH (op1))
3109                           + HOST_BITS_PER_WIDE_INT;
3110               return expand_shift (LSHIFT_EXPR, mode, op0,
3111                                    shift, target, unsignedp);
3112             }
3113         }
3114
3115       /* We used to test optimize here, on the grounds that it's better to
3116          produce a smaller program when -O is not used.  But this causes
3117          such a terrible slowdown sometimes that it seems better to always
3118          use synth_mult.  */
3119       if (coeff != 0)
3120         {
3121           /* Special case powers of two.  */
3122           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3123             return expand_shift (LSHIFT_EXPR, mode, op0,
3124                                  floor_log2 (coeff), target, unsignedp);
3125
3126           /* Exclude cost of op0 from max_cost to match the cost
3127              calculation of the synth_mult.  */
3128           max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3129           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3130                                    max_cost))
3131             return expand_mult_const (mode, op0, coeff, target,
3132                                       &algorithm, variant);
3133         }
3134     }
3135
3136   if (GET_CODE (op0) == CONST_DOUBLE)
3137     {
3138       rtx temp = op0;
3139       op0 = op1;
3140       op1 = temp;
3141     }
3142
3143   /* Expand x*2.0 as x+x.  */
3144   if (GET_CODE (op1) == CONST_DOUBLE
3145       && SCALAR_FLOAT_MODE_P (mode))
3146     {
3147       REAL_VALUE_TYPE d;
3148       REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3149
3150       if (REAL_VALUES_EQUAL (d, dconst2))
3151         {
3152           op0 = force_reg (GET_MODE (op0), op0);
3153           return expand_binop (mode, add_optab, op0, op0,
3154                                target, unsignedp, OPTAB_LIB_WIDEN);
3155         }
3156     }
3157
3158   /* This used to use umul_optab if unsigned, but for non-widening multiply
3159      there is no difference between signed and unsigned.  */
3160   op0 = expand_binop (mode,
3161                       ! unsignedp
3162                       && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3163                       ? smulv_optab : smul_optab,
3164                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3165   gcc_assert (op0);
3166   return op0;
3167 }
3168
3169 /* Perform a widening multiplication and return an rtx for the result.
3170    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3171    TARGET is a suggestion for where to store the result (an rtx).
3172    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3173    or smul_widen_optab.
3174
3175    We check specially for a constant integer as OP1, comparing the
3176    cost of a widening multiply against the cost of a sequence of shifts
3177    and adds.  */
3178
3179 rtx
3180 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3181                       int unsignedp, optab this_optab)
3182 {
3183   bool speed = optimize_insn_for_speed_p ();
3184   rtx cop1;
3185
3186   if (CONST_INT_P (op1)
3187       && GET_MODE (op0) != VOIDmode
3188       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3189                                 this_optab == umul_widen_optab))
3190       && CONST_INT_P (cop1)
3191       && (INTVAL (cop1) >= 0
3192           || HWI_COMPUTABLE_MODE_P (mode)))
3193     {
3194       HOST_WIDE_INT coeff = INTVAL (cop1);
3195       int max_cost;
3196       enum mult_variant variant;
3197       struct algorithm algorithm;
3198
3199       /* Special case powers of two.  */
3200       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3201         {
3202           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3203           return expand_shift (LSHIFT_EXPR, mode, op0,
3204                                floor_log2 (coeff), target, unsignedp);
3205         }
3206
3207       /* Exclude cost of op0 from max_cost to match the cost
3208          calculation of the synth_mult.  */
3209       max_cost = mul_widen_cost[speed][mode];
3210       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3211                                max_cost))
3212         {
3213           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3214           return expand_mult_const (mode, op0, coeff, target,
3215                                     &algorithm, variant);
3216         }
3217     }
3218   return expand_binop (mode, this_optab, op0, op1, target,
3219                        unsignedp, OPTAB_LIB_WIDEN);
3220 }
3221 \f
3222 /* Return the smallest n such that 2**n >= X.  */
3223
3224 int
3225 ceil_log2 (unsigned HOST_WIDE_INT x)
3226 {
3227   return floor_log2 (x - 1) + 1;
3228 }
3229
3230 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3231    replace division by D, and put the least significant N bits of the result
3232    in *MULTIPLIER_PTR and return the most significant bit.
3233
3234    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3235    needed precision is in PRECISION (should be <= N).
3236
3237    PRECISION should be as small as possible so this function can choose
3238    multiplier more freely.
3239
3240    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3241    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3242
3243    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3244    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3245
3246 static
3247 unsigned HOST_WIDE_INT
3248 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3249                    rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr)
3250 {
3251   HOST_WIDE_INT mhigh_hi, mlow_hi;
3252   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3253   int lgup, post_shift;
3254   int pow, pow2;
3255   unsigned HOST_WIDE_INT nl, dummy1;
3256   HOST_WIDE_INT nh, dummy2;
3257
3258   /* lgup = ceil(log2(divisor)); */
3259   lgup = ceil_log2 (d);
3260
3261   gcc_assert (lgup <= n);
3262
3263   pow = n + lgup;
3264   pow2 = n + lgup - precision;
3265
3266   /* We could handle this with some effort, but this case is much
3267      better handled directly with a scc insn, so rely on caller using
3268      that.  */
3269   gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3270
3271   /* mlow = 2^(N + lgup)/d */
3272  if (pow >= HOST_BITS_PER_WIDE_INT)
3273     {
3274       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3275       nl = 0;
3276     }
3277   else
3278     {
3279       nh = 0;
3280       nl = (unsigned HOST_WIDE_INT) 1 << pow;
3281     }
3282   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3283                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3284
3285   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3286   if (pow2 >= HOST_BITS_PER_WIDE_INT)
3287     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3288   else
3289     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3290   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3291                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3292
3293   gcc_assert (!mhigh_hi || nh - d < d);
3294   gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3295   /* Assert that mlow < mhigh.  */
3296   gcc_assert (mlow_hi < mhigh_hi
3297               || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3298
3299   /* If precision == N, then mlow, mhigh exceed 2^N
3300      (but they do not exceed 2^(N+1)).  */
3301
3302   /* Reduce to lowest terms.  */
3303   for (post_shift = lgup; post_shift > 0; post_shift--)
3304     {
3305       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3306       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3307       if (ml_lo >= mh_lo)
3308         break;
3309
3310       mlow_hi = 0;
3311       mlow_lo = ml_lo;
3312       mhigh_hi = 0;
3313       mhigh_lo = mh_lo;
3314     }
3315
3316   *post_shift_ptr = post_shift;
3317   *lgup_ptr = lgup;
3318   if (n < HOST_BITS_PER_WIDE_INT)
3319     {
3320       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3321       *multiplier_ptr = GEN_INT (mhigh_lo & mask);
3322       return mhigh_lo >= mask;
3323     }
3324   else
3325     {
3326       *multiplier_ptr = GEN_INT (mhigh_lo);
3327       return mhigh_hi;
3328     }
3329 }
3330
3331 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3332    congruent to 1 (mod 2**N).  */
3333
3334 static unsigned HOST_WIDE_INT
3335 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3336 {
3337   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3338
3339   /* The algorithm notes that the choice y = x satisfies
3340      x*y == 1 mod 2^3, since x is assumed odd.
3341      Each iteration doubles the number of bits of significance in y.  */
3342
3343   unsigned HOST_WIDE_INT mask;
3344   unsigned HOST_WIDE_INT y = x;
3345   int nbit = 3;
3346
3347   mask = (n == HOST_BITS_PER_WIDE_INT
3348           ? ~(unsigned HOST_WIDE_INT) 0
3349           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3350
3351   while (nbit < n)
3352     {
3353       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3354       nbit *= 2;
3355     }
3356   return y;
3357 }
3358
3359 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3360    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3361    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3362    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3363    become signed.
3364
3365    The result is put in TARGET if that is convenient.
3366
3367    MODE is the mode of operation.  */
3368
3369 rtx
3370 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3371                              rtx op1, rtx target, int unsignedp)
3372 {
3373   rtx tem;
3374   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3375
3376   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3377                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3378   tem = expand_and (mode, tem, op1, NULL_RTX);
3379   adj_operand
3380     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3381                      adj_operand);
3382
3383   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3384                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3385   tem = expand_and (mode, tem, op0, NULL_RTX);
3386   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3387                           target);
3388
3389   return target;
3390 }
3391
3392 /* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */
3393
3394 static rtx
3395 extract_high_half (enum machine_mode mode, rtx op)
3396 {
3397   enum machine_mode wider_mode;
3398
3399   if (mode == word_mode)
3400     return gen_highpart (mode, op);
3401
3402   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3403
3404   wider_mode = GET_MODE_WIDER_MODE (mode);
3405   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3406                      GET_MODE_BITSIZE (mode), 0, 1);
3407   return convert_modes (mode, wider_mode, op, 0);
3408 }
3409
3410 /* Like expand_mult_highpart, but only consider using a multiplication
3411    optab.  OP1 is an rtx for the constant operand.  */
3412
3413 static rtx
3414 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3415                             rtx target, int unsignedp, int max_cost)
3416 {
3417   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3418   enum machine_mode wider_mode;
3419   optab moptab;
3420   rtx tem;
3421   int size;
3422   bool speed = optimize_insn_for_speed_p ();
3423
3424   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3425
3426   wider_mode = GET_MODE_WIDER_MODE (mode);
3427   size = GET_MODE_BITSIZE (mode);
3428
3429   /* Firstly, try using a multiplication insn that only generates the needed
3430      high part of the product, and in the sign flavor of unsignedp.  */
3431   if (mul_highpart_cost[speed][mode] < max_cost)
3432     {
3433       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3434       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3435                           unsignedp, OPTAB_DIRECT);
3436       if (tem)
3437         return tem;
3438     }
3439
3440   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3441      Need to adjust the result after the multiplication.  */
3442   if (size - 1 < BITS_PER_WORD
3443       && (mul_highpart_cost[speed][mode] + 2 * shift_cost[speed][mode][size-1]
3444           + 4 * add_cost[speed][mode] < max_cost))
3445     {
3446       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3447       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3448                           unsignedp, OPTAB_DIRECT);
3449       if (tem)
3450         /* We used the wrong signedness.  Adjust the result.  */
3451         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3452                                             tem, unsignedp);
3453     }
3454
3455   /* Try widening multiplication.  */
3456   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3457   if (optab_handler (moptab, wider_mode) != CODE_FOR_nothing
3458       && mul_widen_cost[speed][wider_mode] < max_cost)
3459     {
3460       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3461                           unsignedp, OPTAB_WIDEN);
3462       if (tem)
3463         return extract_high_half (mode, tem);
3464     }
3465
3466   /* Try widening the mode and perform a non-widening multiplication.  */
3467   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3468       && size - 1 < BITS_PER_WORD
3469       && mul_cost[speed][wider_mode] + shift_cost[speed][mode][size-1] < max_cost)
3470     {
3471       rtx insns, wop0, wop1;
3472
3473       /* We need to widen the operands, for example to ensure the
3474          constant multiplier is correctly sign or zero extended.
3475          Use a sequence to clean-up any instructions emitted by
3476          the conversions if things don't work out.  */
3477       start_sequence ();
3478       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3479       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3480       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3481                           unsignedp, OPTAB_WIDEN);
3482       insns = get_insns ();
3483       end_sequence ();
3484
3485       if (tem)
3486         {
3487           emit_insn (insns);
3488           return extract_high_half (mode, tem);
3489         }
3490     }
3491
3492   /* Try widening multiplication of opposite signedness, and adjust.  */
3493   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3494   if (optab_handler (moptab, wider_mode) != CODE_FOR_nothing
3495       && size - 1 < BITS_PER_WORD
3496       && (mul_widen_cost[speed][wider_mode] + 2 * shift_cost[speed][mode][size-1]
3497           + 4 * add_cost[speed][mode] < max_cost))
3498     {
3499       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3500                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3501       if (tem != 0)
3502         {
3503           tem = extract_high_half (mode, tem);
3504           /* We used the wrong signedness.  Adjust the result.  */
3505           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3506                                               target, unsignedp);
3507         }
3508     }
3509
3510   return 0;
3511 }
3512
3513 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3514    putting the high half of the result in TARGET if that is convenient,
3515    and return where the result is.  If the operation can not be performed,
3516    0 is returned.
3517
3518    MODE is the mode of operation and result.
3519
3520    UNSIGNEDP nonzero means unsigned multiply.
3521
3522    MAX_COST is the total allowed cost for the expanded RTL.  */
3523
3524 static rtx
3525 expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3526                       rtx target, int unsignedp, int max_cost)
3527 {
3528   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3529   unsigned HOST_WIDE_INT cnst1;
3530   int extra_cost;
3531   bool sign_adjust = false;
3532   enum mult_variant variant;
3533   struct algorithm alg;
3534   rtx tem;
3535   bool speed = optimize_insn_for_speed_p ();
3536
3537   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3538   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3539   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3540
3541   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3542
3543   /* We can't optimize modes wider than BITS_PER_WORD.
3544      ??? We might be able to perform double-word arithmetic if
3545      mode == word_mode, however all the cost calculations in
3546      synth_mult etc. assume single-word operations.  */
3547   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3548     return expand_mult_highpart_optab (mode, op0, op1, target,
3549                                        unsignedp, max_cost);
3550
3551   extra_cost = shift_cost[speed][mode][GET_MODE_BITSIZE (mode) - 1];
3552
3553   /* Check whether we try to multiply by a negative constant.  */
3554   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3555     {
3556       sign_adjust = true;
3557       extra_cost += add_cost[speed][mode];
3558     }
3559
3560   /* See whether shift/add multiplication is cheap enough.  */
3561   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3562                            max_cost - extra_cost))
3563     {
3564       /* See whether the specialized multiplication optabs are
3565          cheaper than the shift/add version.  */
3566       tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3567                                         alg.cost.cost + extra_cost);
3568       if (tem)
3569         return tem;
3570
3571       tem = convert_to_mode (wider_mode, op0, unsignedp);
3572       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3573       tem = extract_high_half (mode, tem);
3574
3575       /* Adjust result for signedness.  */
3576       if (sign_adjust)
3577         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3578
3579       return tem;
3580     }
3581   return expand_mult_highpart_optab (mode, op0, op1, target,
3582                                      unsignedp, max_cost);
3583 }
3584
3585
3586 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3587
3588 static rtx
3589 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3590 {
3591   unsigned HOST_WIDE_INT masklow, maskhigh;
3592   rtx result, temp, shift, label;
3593   int logd;
3594
3595   logd = floor_log2 (d);
3596   result = gen_reg_rtx (mode);
3597
3598   /* Avoid conditional branches when they're expensive.  */
3599   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3600       && optimize_insn_for_speed_p ())
3601     {
3602       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3603                                       mode, 0, -1);
3604       if (signmask)
3605         {
3606           signmask = force_reg (mode, signmask);
3607           masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3608           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3609
3610           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3611              which instruction sequence to use.  If logical right shifts
3612              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3613              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3614
3615           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3616           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3617               || (set_src_cost (temp, optimize_insn_for_speed_p ())
3618                   > COSTS_N_INSNS (2)))
3619             {
3620               temp = expand_binop (mode, xor_optab, op0, signmask,
3621                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3622               temp = expand_binop (mode, sub_optab, temp, signmask,
3623                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3624               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3625                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3626               temp = expand_binop (mode, xor_optab, temp, signmask,
3627                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3628               temp = expand_binop (mode, sub_optab, temp, signmask,
3629                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3630             }
3631           else
3632             {
3633               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3634                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3635               signmask = force_reg (mode, signmask);
3636
3637               temp = expand_binop (mode, add_optab, op0, signmask,
3638                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3639               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3640                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3641               temp = expand_binop (mode, sub_optab, temp, signmask,
3642                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3643             }
3644           return temp;
3645         }
3646     }
3647
3648   /* Mask contains the mode's signbit and the significant bits of the
3649      modulus.  By including the signbit in the operation, many targets
3650      can avoid an explicit compare operation in the following comparison
3651      against zero.  */
3652
3653   masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3654   if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3655     {
3656       masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3657       maskhigh = -1;
3658     }
3659   else
3660     maskhigh = (HOST_WIDE_INT) -1
3661                  << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3662
3663   temp = expand_binop (mode, and_optab, op0,
3664                        immed_double_const (masklow, maskhigh, mode),
3665                        result, 1, OPTAB_LIB_WIDEN);
3666   if (temp != result)
3667     emit_move_insn (result, temp);
3668
3669   label = gen_label_rtx ();
3670   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3671
3672   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3673                        0, OPTAB_LIB_WIDEN);
3674   masklow = (HOST_WIDE_INT) -1 << logd;
3675   maskhigh = -1;
3676   temp = expand_binop (mode, ior_optab, temp,
3677                        immed_double_const (masklow, maskhigh, mode),
3678                        result, 1, OPTAB_LIB_WIDEN);
3679   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3680                        0, OPTAB_LIB_WIDEN);
3681   if (temp != result)
3682     emit_move_insn (result, temp);
3683   emit_label (label);
3684   return result;
3685 }
3686
3687 /* Expand signed division of OP0 by a power of two D in mode MODE.
3688    This routine is only called for positive values of D.  */
3689
3690 static rtx
3691 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3692 {
3693   rtx temp, label;
3694   int logd;
3695
3696   logd = floor_log2 (d);
3697
3698   if (d == 2
3699       && BRANCH_COST (optimize_insn_for_speed_p (),
3700                       false) >= 1)
3701     {
3702       temp = gen_reg_rtx (mode);
3703       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3704       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3705                            0, OPTAB_LIB_WIDEN);
3706       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3707     }
3708
3709 #ifdef HAVE_conditional_move
3710   if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3711       >= 2)
3712     {
3713       rtx temp2;
3714
3715       /* ??? emit_conditional_move forces a stack adjustment via
3716          compare_from_rtx so, if the sequence is discarded, it will
3717          be lost.  Do it now instead.  */
3718       do_pending_stack_adjust ();
3719
3720       start_sequence ();
3721       temp2 = copy_to_mode_reg (mode, op0);
3722       temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3723                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3724       temp = force_reg (mode, temp);
3725
3726       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3727       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3728                                      mode, temp, temp2, mode, 0);
3729       if (temp2)
3730         {
3731           rtx seq = get_insns ();
3732           end_sequence ();
3733           emit_insn (seq);
3734           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3735         }
3736       end_sequence ();
3737     }
3738 #endif
3739
3740   if (BRANCH_COST (optimize_insn_for_speed_p (),
3741                    false) >= 2)
3742     {
3743       int ushift = GET_MODE_BITSIZE (mode) - logd;
3744
3745       temp = gen_reg_rtx (mode);
3746       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3747       if (shift_cost[optimize_insn_for_speed_p ()][mode][ushift] > COSTS_N_INSNS (1))
3748         temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3749                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3750       else
3751         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3752                              ushift, NULL_RTX, 1);
3753       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3754                            0, OPTAB_LIB_WIDEN);
3755       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3756     }
3757
3758   label = gen_label_rtx ();
3759   temp = copy_to_mode_reg (mode, op0);
3760   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3761   expand_inc (temp, GEN_INT (d - 1));
3762   emit_label (label);
3763   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3764 }
3765 \f
3766 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3767    if that is convenient, and returning where the result is.
3768    You may request either the quotient or the remainder as the result;
3769    specify REM_FLAG nonzero to get the remainder.
3770
3771    CODE is the expression code for which kind of division this is;
3772    it controls how rounding is done.  MODE is the machine mode to use.
3773    UNSIGNEDP nonzero means do unsigned division.  */
3774
3775 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3776    and then correct it by or'ing in missing high bits
3777    if result of ANDI is nonzero.
3778    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3779    This could optimize to a bfexts instruction.
3780    But C doesn't use these operations, so their optimizations are
3781    left for later.  */
3782 /* ??? For modulo, we don't actually need the highpart of the first product,
3783    the low part will do nicely.  And for small divisors, the second multiply
3784    can also be a low-part only multiply or even be completely left out.
3785    E.g. to calculate the remainder of a division by 3 with a 32 bit
3786    multiply, multiply with 0x55555556 and extract the upper two bits;
3787    the result is exact for inputs up to 0x1fffffff.
3788    The input range can be reduced by using cross-sum rules.
3789    For odd divisors >= 3, the following table gives right shift counts
3790    so that if a number is shifted by an integer multiple of the given
3791    amount, the remainder stays the same:
3792    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3793    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3794    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3795    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3796    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3797
3798    Cross-sum rules for even numbers can be derived by leaving as many bits
3799    to the right alone as the divisor has zeros to the right.
3800    E.g. if x is an unsigned 32 bit number:
3801    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3802    */
3803
3804 rtx
3805 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3806                rtx op0, rtx op1, rtx target, int unsignedp)
3807 {
3808   enum machine_mode compute_mode;
3809   rtx tquotient;
3810   rtx quotient = 0, remainder = 0;
3811   rtx last;
3812   int size;
3813   rtx insn, set;
3814   optab optab1, optab2;
3815   int op1_is_constant, op1_is_pow2 = 0;
3816   int max_cost, extra_cost;
3817   static HOST_WIDE_INT last_div_const = 0;
3818   static HOST_WIDE_INT ext_op1;
3819   bool speed = optimize_insn_for_speed_p ();
3820
3821   op1_is_constant = CONST_INT_P (op1);
3822   if (op1_is_constant)
3823     {
3824       ext_op1 = INTVAL (op1);
3825       if (unsignedp)
3826         ext_op1 &= GET_MODE_MASK (mode);
3827       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3828                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3829     }
3830
3831   /*
3832      This is the structure of expand_divmod:
3833
3834      First comes code to fix up the operands so we can perform the operations
3835      correctly and efficiently.
3836
3837      Second comes a switch statement with code specific for each rounding mode.
3838      For some special operands this code emits all RTL for the desired
3839      operation, for other cases, it generates only a quotient and stores it in
3840      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3841      to indicate that it has not done anything.
3842
3843      Last comes code that finishes the operation.  If QUOTIENT is set and
3844      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3845      QUOTIENT is not set, it is computed using trunc rounding.
3846
3847      We try to generate special code for division and remainder when OP1 is a
3848      constant.  If |OP1| = 2**n we can use shifts and some other fast
3849      operations.  For other values of OP1, we compute a carefully selected
3850      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3851      by m.
3852
3853      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3854      half of the product.  Different strategies for generating the product are
3855      implemented in expand_mult_highpart.
3856
3857      If what we actually want is the remainder, we generate that by another
3858      by-constant multiplication and a subtraction.  */
3859
3860   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3861      code below will malfunction if we are, so check here and handle
3862      the special case if so.  */
3863   if (op1 == const1_rtx)
3864     return rem_flag ? const0_rtx : op0;
3865
3866     /* When dividing by -1, we could get an overflow.
3867      negv_optab can handle overflows.  */
3868   if (! unsignedp && op1 == constm1_rtx)
3869     {
3870       if (rem_flag)
3871         return const0_rtx;
3872       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3873                           ? negv_optab : neg_optab, op0, target, 0);
3874     }
3875
3876   if (target
3877       /* Don't use the function value register as a target
3878          since we have to read it as well as write it,
3879          and function-inlining gets confused by this.  */
3880       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3881           /* Don't clobber an operand while doing a multi-step calculation.  */
3882           || ((rem_flag || op1_is_constant)
3883               && (reg_mentioned_p (target, op0)
3884                   || (MEM_P (op0) && MEM_P (target))))
3885           || reg_mentioned_p (target, op1)
3886           || (MEM_P (op1) && MEM_P (target))))
3887     target = 0;
3888
3889   /* Get the mode in which to perform this computation.  Normally it will
3890      be MODE, but sometimes we can't do the desired operation in MODE.
3891      If so, pick a wider mode in which we can do the operation.  Convert
3892      to that mode at the start to avoid repeated conversions.
3893
3894      First see what operations we need.  These depend on the expression
3895      we are evaluating.  (We assume that divxx3 insns exist under the
3896      same conditions that modxx3 insns and that these insns don't normally
3897      fail.  If these assumptions are not correct, we may generate less
3898      efficient code in some cases.)
3899
3900      Then see if we find a mode in which we can open-code that operation
3901      (either a division, modulus, or shift).  Finally, check for the smallest
3902      mode for which we can do the operation with a library call.  */
3903
3904   /* We might want to refine this now that we have division-by-constant
3905      optimization.  Since expand_mult_highpart tries so many variants, it is
3906      not straightforward to generalize this.  Maybe we should make an array
3907      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3908
3909   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3910             ? (unsignedp ? lshr_optab : ashr_optab)
3911             : (unsignedp ? udiv_optab : sdiv_optab));
3912   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3913             ? optab1
3914             : (unsignedp ? udivmod_optab : sdivmod_optab));
3915
3916   for (compute_mode = mode; compute_mode != VOIDmode;
3917        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3918     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3919         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
3920       break;
3921
3922   if (compute_mode == VOIDmode)
3923     for (compute_mode = mode; compute_mode != VOIDmode;
3924          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3925       if (optab_libfunc (optab1, compute_mode)
3926           || optab_libfunc (optab2, compute_mode))
3927         break;
3928
3929   /* If we still couldn't find a mode, use MODE, but expand_binop will
3930      probably die.  */
3931   if (compute_mode == VOIDmode)
3932     compute_mode = mode;
3933
3934   if (target && GET_MODE (target) == compute_mode)
3935     tquotient = target;
3936   else
3937     tquotient = gen_reg_rtx (compute_mode);
3938
3939   size = GET_MODE_BITSIZE (compute_mode);
3940 #if 0
3941   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3942      (mode), and thereby get better code when OP1 is a constant.  Do that
3943      later.  It will require going over all usages of SIZE below.  */
3944   size = GET_MODE_BITSIZE (mode);
3945 #endif
3946
3947   /* Only deduct something for a REM if the last divide done was
3948      for a different constant.   Then set the constant of the last
3949      divide.  */
3950   max_cost = unsignedp ? udiv_cost[speed][compute_mode] : sdiv_cost[speed][compute_mode];
3951   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
3952                      && INTVAL (op1) == last_div_const))
3953     max_cost -= mul_cost[speed][compute_mode] + add_cost[speed][compute_mode];
3954
3955   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3956
3957   /* Now convert to the best mode to use.  */
3958   if (compute_mode != mode)
3959     {
3960       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3961       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3962
3963       /* convert_modes may have placed op1 into a register, so we
3964          must recompute the following.  */
3965       op1_is_constant = CONST_INT_P (op1);
3966       op1_is_pow2 = (op1_is_constant
3967                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3968                           || (! unsignedp
3969                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3970     }
3971
3972   /* If one of the operands is a volatile MEM, copy it into a register.  */
3973
3974   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3975     op0 = force_reg (compute_mode, op0);
3976   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3977     op1 = force_reg (compute_mode, op1);
3978
3979   /* If we need the remainder or if OP1 is constant, we need to
3980      put OP0 in a register in case it has any queued subexpressions.  */
3981   if (rem_flag || op1_is_constant)
3982     op0 = force_reg (compute_mode, op0);
3983
3984   last = get_last_insn ();
3985
3986   /* Promote floor rounding to trunc rounding for unsigned operations.  */
3987   if (unsignedp)
3988     {
3989       if (code == FLOOR_DIV_EXPR)
3990         code = TRUNC_DIV_EXPR;
3991       if (code == FLOOR_MOD_EXPR)
3992         code = TRUNC_MOD_EXPR;
3993       if (code == EXACT_DIV_EXPR && op1_is_pow2)
3994         code = TRUNC_DIV_EXPR;
3995     }
3996
3997   if (op1 != const0_rtx)
3998     switch (code)
3999       {
4000       case TRUNC_MOD_EXPR:
4001       case TRUNC_DIV_EXPR:
4002         if (op1_is_constant)
4003           {
4004             if (unsignedp)
4005               {
4006                 unsigned HOST_WIDE_INT mh;
4007                 int pre_shift, post_shift;
4008                 int dummy;
4009                 rtx ml;
4010                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4011                                             & GET_MODE_MASK (compute_mode));
4012
4013                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4014                   {
4015                     pre_shift = floor_log2 (d);
4016                     if (rem_flag)
4017                       {
4018                         remainder
4019                           = expand_binop (compute_mode, and_optab, op0,
4020                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4021                                           remainder, 1,
4022                                           OPTAB_LIB_WIDEN);
4023                         if (remainder)
4024                           return gen_lowpart (mode, remainder);
4025                       }
4026                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4027                                              pre_shift, tquotient, 1);
4028                   }
4029                 else if (size <= HOST_BITS_PER_WIDE_INT)
4030                   {
4031                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4032                       {
4033                         /* Most significant bit of divisor is set; emit an scc
4034                            insn.  */
4035                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4036                                                           compute_mode, 1, 1);
4037                       }
4038                     else
4039                       {
4040                         /* Find a suitable multiplier and right shift count
4041                            instead of multiplying with D.  */
4042
4043                         mh = choose_multiplier (d, size, size,
4044                                                 &ml, &post_shift, &dummy);
4045
4046                         /* If the suggested multiplier is more than SIZE bits,
4047                            we can do better for even divisors, using an
4048                            initial right shift.  */
4049                         if (mh != 0 && (d & 1) == 0)
4050                           {
4051                             pre_shift = floor_log2 (d & -d);
4052                             mh = choose_multiplier (d >> pre_shift, size,
4053                                                     size - pre_shift,
4054                                                     &ml, &post_shift, &dummy);
4055                             gcc_assert (!mh);
4056                           }
4057                         else
4058                           pre_shift = 0;
4059
4060                         if (mh != 0)
4061                           {
4062                             rtx t1, t2, t3, t4;
4063
4064                             if (post_shift - 1 >= BITS_PER_WORD)
4065                               goto fail1;
4066
4067                             extra_cost
4068                               = (shift_cost[speed][compute_mode][post_shift - 1]
4069                                  + shift_cost[speed][compute_mode][1]
4070                                  + 2 * add_cost[speed][compute_mode]);
4071                             t1 = expand_mult_highpart (compute_mode, op0, ml,
4072                                                        NULL_RTX, 1,
4073                                                        max_cost - extra_cost);
4074                             if (t1 == 0)
4075                               goto fail1;
4076                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4077                                                                op0, t1),
4078                                                 NULL_RTX);
4079                             t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4080                                                t2, 1, NULL_RTX, 1);
4081                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4082                                                               t1, t3),
4083                                                 NULL_RTX);
4084                             quotient = expand_shift
4085                               (RSHIFT_EXPR, compute_mode, t4,
4086                                post_shift - 1, tquotient, 1);
4087                           }
4088                         else
4089                           {
4090                             rtx t1, t2;
4091
4092                             if (pre_shift >= BITS_PER_WORD
4093                                 || post_shift >= BITS_PER_WORD)
4094                               goto fail1;
4095
4096                             t1 = expand_shift
4097                               (RSHIFT_EXPR, compute_mode, op0,
4098                                pre_shift, NULL_RTX, 1);
4099                             extra_cost
4100                               = (shift_cost[speed][compute_mode][pre_shift]
4101                                  + shift_cost[speed][compute_mode][post_shift]);
4102                             t2 = expand_mult_highpart (compute_mode, t1, ml,
4103                                                        NULL_RTX, 1,
4104                                                        max_cost - extra_cost);
4105                             if (t2 == 0)
4106                               goto fail1;
4107                             quotient = expand_shift
4108                               (RSHIFT_EXPR, compute_mode, t2,
4109                                post_shift, tquotient, 1);
4110                           }
4111                       }
4112                   }
4113                 else            /* Too wide mode to use tricky code */
4114                   break;
4115
4116                 insn = get_last_insn ();
4117                 if (insn != last
4118                     && (set = single_set (insn)) != 0
4119                     && SET_DEST (set) == quotient)
4120                   set_unique_reg_note (insn,
4121                                        REG_EQUAL,
4122                                        gen_rtx_UDIV (compute_mode, op0, op1));
4123               }
4124             else                /* TRUNC_DIV, signed */
4125               {
4126                 unsigned HOST_WIDE_INT ml;
4127                 int lgup, post_shift;
4128                 rtx mlr;
4129                 HOST_WIDE_INT d = INTVAL (op1);
4130                 unsigned HOST_WIDE_INT abs_d;
4131
4132                 /* Since d might be INT_MIN, we have to cast to
4133                    unsigned HOST_WIDE_INT before negating to avoid
4134                    undefined signed overflow.  */
4135                 abs_d = (d >= 0
4136                          ? (unsigned HOST_WIDE_INT) d
4137                          : - (unsigned HOST_WIDE_INT) d);
4138
4139                 /* n rem d = n rem -d */
4140                 if (rem_flag && d < 0)
4141                   {
4142                     d = abs_d;
4143                     op1 = gen_int_mode (abs_d, compute_mode);
4144                   }
4145
4146                 if (d == 1)
4147                   quotient = op0;
4148                 else if (d == -1)
4149                   quotient = expand_unop (compute_mode, neg_optab, op0,
4150                                           tquotient, 0);
4151                 else if (HOST_BITS_PER_WIDE_INT >= size
4152                          && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4153                   {
4154                     /* This case is not handled correctly below.  */
4155                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4156                                                 compute_mode, 1, 1);
4157                     if (quotient == 0)
4158                       goto fail1;
4159                   }
4160                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4161                          && (rem_flag ? smod_pow2_cheap[speed][compute_mode]
4162                                       : sdiv_pow2_cheap[speed][compute_mode])
4163                          /* We assume that cheap metric is true if the
4164                             optab has an expander for this mode.  */
4165                          && ((optab_handler ((rem_flag ? smod_optab
4166                                               : sdiv_optab),
4167                                              compute_mode)
4168                               != CODE_FOR_nothing)
4169                              || (optab_handler (sdivmod_optab,
4170                                                 compute_mode)
4171                                  != CODE_FOR_nothing)))
4172                   ;
4173                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4174                   {
4175                     if (rem_flag)
4176                       {
4177                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4178                         if (remainder)
4179                           return gen_lowpart (mode, remainder);
4180                       }
4181
4182                     if (sdiv_pow2_cheap[speed][compute_mode]
4183                         && ((optab_handler (sdiv_optab, compute_mode)
4184                              != CODE_FOR_nothing)
4185                             || (optab_handler (sdivmod_optab, compute_mode)
4186                                 != CODE_FOR_nothing)))
4187                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4188                                                 compute_mode, op0,
4189                                                 gen_int_mode (abs_d,
4190                                                               compute_mode),
4191                                                 NULL_RTX, 0);
4192                     else
4193                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4194
4195                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4196                        negate the quotient.  */
4197                     if (d < 0)
4198                       {
4199                         insn = get_last_insn ();
4200                         if (insn != last
4201                             && (set = single_set (insn)) != 0
4202                             && SET_DEST (set) == quotient
4203                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4204                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4205                           set_unique_reg_note (insn,
4206                                                REG_EQUAL,
4207                                                gen_rtx_DIV (compute_mode,
4208                                                             op0,
4209                                                             GEN_INT
4210                                                             (trunc_int_for_mode
4211                                                              (abs_d,
4212                                                               compute_mode))));
4213
4214                         quotient = expand_unop (compute_mode, neg_optab,
4215                                                 quotient, quotient, 0);
4216                       }
4217                   }
4218                 else if (size <= HOST_BITS_PER_WIDE_INT)
4219                   {
4220                     choose_multiplier (abs_d, size, size - 1,
4221                                        &mlr, &post_shift, &lgup);
4222                     ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
4223                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4224                       {
4225                         rtx t1, t2, t3;
4226
4227                         if (post_shift >= BITS_PER_WORD
4228                             || size - 1 >= BITS_PER_WORD)
4229                           goto fail1;
4230
4231                         extra_cost = (shift_cost[speed][compute_mode][post_shift]
4232                                       + shift_cost[speed][compute_mode][size - 1]
4233                                       + add_cost[speed][compute_mode]);
4234                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4235                                                    NULL_RTX, 0,
4236                                                    max_cost - extra_cost);
4237                         if (t1 == 0)
4238                           goto fail1;
4239                         t2 = expand_shift
4240                           (RSHIFT_EXPR, compute_mode, t1,
4241                            post_shift, NULL_RTX, 0);
4242                         t3 = expand_shift
4243                           (RSHIFT_EXPR, compute_mode, op0,
4244                            size - 1, NULL_RTX, 0);
4245                         if (d < 0)
4246                           quotient
4247                             = force_operand (gen_rtx_MINUS (compute_mode,
4248                                                             t3, t2),
4249                                              tquotient);
4250                         else
4251                           quotient
4252                             = force_operand (gen_rtx_MINUS (compute_mode,
4253                                                             t2, t3),
4254                                              tquotient);
4255                       }
4256                     else
4257                       {
4258                         rtx t1, t2, t3, t4;
4259
4260                         if (post_shift >= BITS_PER_WORD
4261                             || size - 1 >= BITS_PER_WORD)
4262                           goto fail1;
4263
4264                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4265                         mlr = gen_int_mode (ml, compute_mode);
4266                         extra_cost = (shift_cost[speed][compute_mode][post_shift]
4267                                       + shift_cost[speed][compute_mode][size - 1]
4268                                       + 2 * add_cost[speed][compute_mode]);
4269                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4270                                                    NULL_RTX, 0,
4271                                                    max_cost - extra_cost);
4272                         if (t1 == 0)
4273                           goto fail1;
4274                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4275                                                           t1, op0),
4276                                             NULL_RTX);
4277                         t3 = expand_shift
4278                           (RSHIFT_EXPR, compute_mode, t2,
4279                            post_shift, NULL_RTX, 0);
4280                         t4 = expand_shift
4281                           (RSHIFT_EXPR, compute_mode, op0,
4282                            size - 1, NULL_RTX, 0);
4283                         if (d < 0)
4284                           quotient
4285                             = force_operand (gen_rtx_MINUS (compute_mode,
4286                                                             t4, t3),
4287                                              tquotient);
4288                         else
4289                           quotient
4290                             = force_operand (gen_rtx_MINUS (compute_mode,
4291                                                             t3, t4),
4292                                              tquotient);
4293                       }
4294                   }
4295                 else            /* Too wide mode to use tricky code */
4296                   break;
4297
4298                 insn = get_last_insn ();
4299                 if (insn != last
4300                     && (set = single_set (insn)) != 0
4301                     && SET_DEST (set) == quotient)
4302                   set_unique_reg_note (insn,
4303                                        REG_EQUAL,
4304                                        gen_rtx_DIV (compute_mode, op0, op1));
4305               }
4306             break;
4307           }
4308       fail1:
4309         delete_insns_since (last);
4310         break;
4311
4312       case FLOOR_DIV_EXPR:
4313       case FLOOR_MOD_EXPR:
4314       /* We will come here only for signed operations.  */
4315         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4316           {
4317             unsigned HOST_WIDE_INT mh;
4318             int pre_shift, lgup, post_shift;
4319             HOST_WIDE_INT d = INTVAL (op1);
4320             rtx ml;
4321
4322             if (d > 0)
4323               {
4324                 /* We could just as easily deal with negative constants here,
4325                    but it does not seem worth the trouble for GCC 2.6.  */
4326                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4327                   {
4328                     pre_shift = floor_log2 (d);
4329                     if (rem_flag)
4330                       {
4331                         remainder = expand_binop (compute_mode, and_optab, op0,
4332                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4333                                                   remainder, 0, OPTAB_LIB_WIDEN);
4334                         if (remainder)
4335                           return gen_lowpart (mode, remainder);
4336                       }
4337                     quotient = expand_shift
4338                       (RSHIFT_EXPR, compute_mode, op0,
4339                        pre_shift, tquotient, 0);
4340                   }
4341                 else
4342                   {
4343                     rtx t1, t2, t3, t4;
4344
4345                     mh = choose_multiplier (d, size, size - 1,
4346                                             &ml, &post_shift, &lgup);
4347                     gcc_assert (!mh);
4348
4349                     if (post_shift < BITS_PER_WORD
4350                         && size - 1 < BITS_PER_WORD)
4351                       {
4352                         t1 = expand_shift
4353                           (RSHIFT_EXPR, compute_mode, op0,
4354                            size - 1, NULL_RTX, 0);
4355                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4356                                            NULL_RTX, 0, OPTAB_WIDEN);
4357                         extra_cost = (shift_cost[speed][compute_mode][post_shift]
4358                                       + shift_cost[speed][compute_mode][size - 1]
4359                                       + 2 * add_cost[speed][compute_mode]);
4360                         t3 = expand_mult_highpart (compute_mode, t2, ml,
4361                                                    NULL_RTX, 1,
4362                                                    max_cost - extra_cost);
4363                         if (t3 != 0)
4364                           {
4365                             t4 = expand_shift
4366                               (RSHIFT_EXPR, compute_mode, t3,
4367                                post_shift, NULL_RTX, 1);
4368                             quotient = expand_binop (compute_mode, xor_optab,
4369                                                      t4, t1, tquotient, 0,
4370                                                      OPTAB_WIDEN);
4371                           }
4372                       }
4373                   }
4374               }
4375             else
4376               {
4377                 rtx nsign, t1, t2, t3, t4;
4378                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4379                                                   op0, constm1_rtx), NULL_RTX);
4380                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4381                                    0, OPTAB_WIDEN);
4382                 nsign = expand_shift
4383                   (RSHIFT_EXPR, compute_mode, t2,
4384                    size - 1, NULL_RTX, 0);
4385                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4386                                     NULL_RTX);
4387                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4388                                     NULL_RTX, 0);
4389                 if (t4)
4390                   {
4391                     rtx t5;
4392                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4393                                       NULL_RTX, 0);
4394                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4395                                                             t4, t5),
4396                                               tquotient);
4397                   }
4398               }
4399           }
4400
4401         if (quotient != 0)
4402           break;
4403         delete_insns_since (last);
4404
4405         /* Try using an instruction that produces both the quotient and
4406            remainder, using truncation.  We can easily compensate the quotient
4407            or remainder to get floor rounding, once we have the remainder.
4408            Notice that we compute also the final remainder value here,
4409            and return the result right away.  */
4410         if (target == 0 || GET_MODE (target) != compute_mode)
4411           target = gen_reg_rtx (compute_mode);
4412
4413         if (rem_flag)
4414           {
4415             remainder
4416               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4417             quotient = gen_reg_rtx (compute_mode);
4418           }
4419         else
4420           {
4421             quotient
4422               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4423             remainder = gen_reg_rtx (compute_mode);
4424           }
4425
4426         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4427                                  quotient, remainder, 0))
4428           {
4429             /* This could be computed with a branch-less sequence.
4430                Save that for later.  */
4431             rtx tem;
4432             rtx label = gen_label_rtx ();
4433             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4434             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4435                                 NULL_RTX, 0, OPTAB_WIDEN);
4436             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4437             expand_dec (quotient, const1_rtx);
4438             expand_inc (remainder, op1);
4439             emit_label (label);
4440             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4441           }
4442
4443         /* No luck with division elimination or divmod.  Have to do it
4444            by conditionally adjusting op0 *and* the result.  */
4445         {
4446           rtx label1, label2, label3, label4, label5;
4447           rtx adjusted_op0;
4448           rtx tem;
4449
4450           quotient = gen_reg_rtx (compute_mode);
4451           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4452           label1 = gen_label_rtx ();
4453           label2 = gen_label_rtx ();
4454           label3 = gen_label_rtx ();
4455           label4 = gen_label_rtx ();
4456           label5 = gen_label_rtx ();
4457           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4458           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4459           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4460                               quotient, 0, OPTAB_LIB_WIDEN);
4461           if (tem != quotient)
4462             emit_move_insn (quotient, tem);
4463           emit_jump_insn (gen_jump (label5));
4464           emit_barrier ();
4465           emit_label (label1);
4466           expand_inc (adjusted_op0, const1_rtx);
4467           emit_jump_insn (gen_jump (label4));
4468           emit_barrier ();
4469           emit_label (label2);
4470           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4471           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4472                               quotient, 0, OPTAB_LIB_WIDEN);
4473           if (tem != quotient)
4474             emit_move_insn (quotient, tem);
4475           emit_jump_insn (gen_jump (label5));
4476           emit_barrier ();
4477           emit_label (label3);
4478           expand_dec (adjusted_op0, const1_rtx);
4479           emit_label (label4);
4480           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4481                               quotient, 0, OPTAB_LIB_WIDEN);
4482           if (tem != quotient)
4483             emit_move_insn (quotient, tem);
4484           expand_dec (quotient, const1_rtx);
4485           emit_label (label5);
4486         }
4487         break;
4488
4489       case CEIL_DIV_EXPR:
4490       case CEIL_MOD_EXPR:
4491         if (unsignedp)
4492           {
4493             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4494               {
4495                 rtx t1, t2, t3;
4496                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4497                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4498                                    floor_log2 (d), tquotient, 1);
4499                 t2 = expand_binop (compute_mode, and_optab, op0,
4500                                    GEN_INT (d - 1),
4501                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4502                 t3 = gen_reg_rtx (compute_mode);
4503                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4504                                       compute_mode, 1, 1);
4505                 if (t3 == 0)
4506                   {
4507                     rtx lab;
4508                     lab = gen_label_rtx ();
4509                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4510                     expand_inc (t1, const1_rtx);
4511                     emit_label (lab);
4512                     quotient = t1;
4513                   }
4514                 else
4515                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4516                                                           t1, t3),
4517                                             tquotient);
4518                 break;
4519               }
4520
4521             /* Try using an instruction that produces both the quotient and
4522                remainder, using truncation.  We can easily compensate the
4523                quotient or remainder to get ceiling rounding, once we have the
4524                remainder.  Notice that we compute also the final remainder
4525                value here, and return the result right away.  */
4526             if (target == 0 || GET_MODE (target) != compute_mode)
4527               target = gen_reg_rtx (compute_mode);
4528
4529             if (rem_flag)
4530               {
4531                 remainder = (REG_P (target)
4532                              ? target : gen_reg_rtx (compute_mode));
4533                 quotient = gen_reg_rtx (compute_mode);
4534               }
4535             else
4536               {
4537                 quotient = (REG_P (target)
4538                             ? target : gen_reg_rtx (compute_mode));
4539                 remainder = gen_reg_rtx (compute_mode);
4540               }
4541
4542             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4543                                      remainder, 1))
4544               {
4545                 /* This could be computed with a branch-less sequence.
4546                    Save that for later.  */
4547                 rtx label = gen_label_rtx ();
4548                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4549                                  compute_mode, label);
4550                 expand_inc (quotient, const1_rtx);
4551                 expand_dec (remainder, op1);
4552                 emit_label (label);
4553                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4554               }
4555
4556             /* No luck with division elimination or divmod.  Have to do it
4557                by conditionally adjusting op0 *and* the result.  */
4558             {
4559               rtx label1, label2;
4560               rtx adjusted_op0, tem;
4561
4562               quotient = gen_reg_rtx (compute_mode);
4563               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4564               label1 = gen_label_rtx ();
4565               label2 = gen_label_rtx ();
4566               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4567                                compute_mode, label1);
4568               emit_move_insn  (quotient, const0_rtx);
4569               emit_jump_insn (gen_jump (label2));
4570               emit_barrier ();
4571               emit_label (label1);
4572               expand_dec (adjusted_op0, const1_rtx);
4573               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4574                                   quotient, 1, OPTAB_LIB_WIDEN);
4575               if (tem != quotient)
4576                 emit_move_insn (quotient, tem);
4577               expand_inc (quotient, const1_rtx);
4578               emit_label (label2);
4579             }
4580           }
4581         else /* signed */
4582           {
4583             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4584                 && INTVAL (op1) >= 0)
4585               {
4586                 /* This is extremely similar to the code for the unsigned case
4587                    above.  For 2.7 we should merge these variants, but for
4588                    2.6.1 I don't want to touch the code for unsigned since that
4589                    get used in C.  The signed case will only be used by other
4590                    languages (Ada).  */
4591
4592                 rtx t1, t2, t3;
4593                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4594                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4595                                    floor_log2 (d), tquotient, 0);
4596                 t2 = expand_binop (compute_mode, and_optab, op0,
4597                                    GEN_INT (d - 1),
4598                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4599                 t3 = gen_reg_rtx (compute_mode);
4600                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4601                                       compute_mode, 1, 1);
4602                 if (t3 == 0)
4603                   {
4604                     rtx lab;
4605                     lab = gen_label_rtx ();
4606                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4607                     expand_inc (t1, const1_rtx);
4608                     emit_label (lab);
4609                     quotient = t1;
4610                   }
4611                 else
4612                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4613                                                           t1, t3),
4614                                             tquotient);
4615                 break;
4616               }
4617
4618             /* Try using an instruction that produces both the quotient and
4619                remainder, using truncation.  We can easily compensate the
4620                quotient or remainder to get ceiling rounding, once we have the
4621                remainder.  Notice that we compute also the final remainder
4622                value here, and return the result right away.  */
4623             if (target == 0 || GET_MODE (target) != compute_mode)
4624               target = gen_reg_rtx (compute_mode);
4625             if (rem_flag)
4626               {
4627                 remainder= (REG_P (target)
4628                             ? target : gen_reg_rtx (compute_mode));
4629                 quotient = gen_reg_rtx (compute_mode);
4630               }
4631             else
4632               {
4633                 quotient = (REG_P (target)
4634                             ? target : gen_reg_rtx (compute_mode));
4635                 remainder = gen_reg_rtx (compute_mode);
4636               }
4637
4638             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4639                                      remainder, 0))
4640               {
4641                 /* This could be computed with a branch-less sequence.
4642                    Save that for later.  */
4643                 rtx tem;
4644                 rtx label = gen_label_rtx ();
4645                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4646                                  compute_mode, label);
4647                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4648                                     NULL_RTX, 0, OPTAB_WIDEN);
4649                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4650                 expand_inc (quotient, const1_rtx);
4651                 expand_dec (remainder, op1);
4652                 emit_label (label);
4653                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4654               }
4655
4656             /* No luck with division elimination or divmod.  Have to do it
4657                by conditionally adjusting op0 *and* the result.  */
4658             {
4659               rtx label1, label2, label3, label4, label5;
4660               rtx adjusted_op0;
4661               rtx tem;
4662
4663               quotient = gen_reg_rtx (compute_mode);
4664               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4665               label1 = gen_label_rtx ();
4666               label2 = gen_label_rtx ();
4667               label3 = gen_label_rtx ();
4668               label4 = gen_label_rtx ();
4669               label5 = gen_label_rtx ();
4670               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4671               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4672                                compute_mode, label1);
4673               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4674                                   quotient, 0, OPTAB_LIB_WIDEN);
4675               if (tem != quotient)
4676                 emit_move_insn (quotient, tem);
4677               emit_jump_insn (gen_jump (label5));
4678               emit_barrier ();
4679               emit_label (label1);
4680               expand_dec (adjusted_op0, const1_rtx);
4681               emit_jump_insn (gen_jump (label4));
4682               emit_barrier ();
4683               emit_label (label2);
4684               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4685                                compute_mode, label3);
4686               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4687                                   quotient, 0, OPTAB_LIB_WIDEN);
4688               if (tem != quotient)
4689                 emit_move_insn (quotient, tem);
4690               emit_jump_insn (gen_jump (label5));
4691               emit_barrier ();
4692               emit_label (label3);
4693               expand_inc (adjusted_op0, const1_rtx);
4694               emit_label (label4);
4695               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4696                                   quotient, 0, OPTAB_LIB_WIDEN);
4697               if (tem != quotient)
4698                 emit_move_insn (quotient, tem);
4699               expand_inc (quotient, const1_rtx);
4700               emit_label (label5);
4701             }
4702           }
4703         break;
4704
4705       case EXACT_DIV_EXPR:
4706         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4707           {
4708             HOST_WIDE_INT d = INTVAL (op1);
4709             unsigned HOST_WIDE_INT ml;
4710             int pre_shift;
4711             rtx t1;
4712
4713             pre_shift = floor_log2 (d & -d);
4714             ml = invert_mod2n (d >> pre_shift, size);
4715             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4716                                pre_shift, NULL_RTX, unsignedp);
4717             quotient = expand_mult (compute_mode, t1,
4718                                     gen_int_mode (ml, compute_mode),
4719                                     NULL_RTX, 1);
4720
4721             insn = get_last_insn ();
4722             set_unique_reg_note (insn,
4723                                  REG_EQUAL,
4724                                  gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4725                                                  compute_mode,
4726                                                  op0, op1));
4727           }
4728         break;
4729
4730       case ROUND_DIV_EXPR:
4731       case ROUND_MOD_EXPR:
4732         if (unsignedp)
4733           {
4734             rtx tem;
4735             rtx label;
4736             label = gen_label_rtx ();
4737             quotient = gen_reg_rtx (compute_mode);
4738             remainder = gen_reg_rtx (compute_mode);
4739             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4740               {
4741                 rtx tem;
4742                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4743                                          quotient, 1, OPTAB_LIB_WIDEN);
4744                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4745                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4746                                           remainder, 1, OPTAB_LIB_WIDEN);
4747               }
4748             tem = plus_constant (op1, -1);
4749             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4750             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4751             expand_inc (quotient, const1_rtx);
4752             expand_dec (remainder, op1);
4753             emit_label (label);
4754           }
4755         else
4756           {
4757             rtx abs_rem, abs_op1, tem, mask;
4758             rtx label;
4759             label = gen_label_rtx ();
4760             quotient = gen_reg_rtx (compute_mode);
4761             remainder = gen_reg_rtx (compute_mode);
4762             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4763               {
4764                 rtx tem;
4765                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4766                                          quotient, 0, OPTAB_LIB_WIDEN);
4767                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4768                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4769                                           remainder, 0, OPTAB_LIB_WIDEN);
4770               }
4771             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4772             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4773             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4774                                 1, NULL_RTX, 1);
4775             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4776             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4777                                 NULL_RTX, 0, OPTAB_WIDEN);
4778             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4779                                  size - 1, NULL_RTX, 0);
4780             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4781                                 NULL_RTX, 0, OPTAB_WIDEN);
4782             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4783                                 NULL_RTX, 0, OPTAB_WIDEN);
4784             expand_inc (quotient, tem);
4785             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4786                                 NULL_RTX, 0, OPTAB_WIDEN);
4787             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4788                                 NULL_RTX, 0, OPTAB_WIDEN);
4789             expand_dec (remainder, tem);
4790             emit_label (label);
4791           }
4792         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4793
4794       default:
4795         gcc_unreachable ();
4796       }
4797
4798   if (quotient == 0)
4799     {
4800       if (target && GET_MODE (target) != compute_mode)
4801         target = 0;
4802
4803       if (rem_flag)
4804         {
4805           /* Try to produce the remainder without producing the quotient.
4806              If we seem to have a divmod pattern that does not require widening,
4807              don't try widening here.  We should really have a WIDEN argument
4808              to expand_twoval_binop, since what we'd really like to do here is
4809              1) try a mod insn in compute_mode
4810              2) try a divmod insn in compute_mode
4811              3) try a div insn in compute_mode and multiply-subtract to get
4812                 remainder
4813              4) try the same things with widening allowed.  */
4814           remainder
4815             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4816                                  op0, op1, target,
4817                                  unsignedp,
4818                                  ((optab_handler (optab2, compute_mode)
4819                                    != CODE_FOR_nothing)
4820                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4821           if (remainder == 0)
4822             {
4823               /* No luck there.  Can we do remainder and divide at once
4824                  without a library call?  */
4825               remainder = gen_reg_rtx (compute_mode);
4826               if (! expand_twoval_binop ((unsignedp
4827                                           ? udivmod_optab
4828                                           : sdivmod_optab),
4829                                          op0, op1,
4830                                          NULL_RTX, remainder, unsignedp))
4831                 remainder = 0;
4832             }
4833
4834           if (remainder)
4835             return gen_lowpart (mode, remainder);
4836         }
4837
4838       /* Produce the quotient.  Try a quotient insn, but not a library call.
4839          If we have a divmod in this mode, use it in preference to widening
4840          the div (for this test we assume it will not fail). Note that optab2
4841          is set to the one of the two optabs that the call below will use.  */
4842       quotient
4843         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4844                              op0, op1, rem_flag ? NULL_RTX : target,
4845                              unsignedp,
4846                              ((optab_handler (optab2, compute_mode)
4847                                != CODE_FOR_nothing)
4848                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4849
4850       if (quotient == 0)
4851         {
4852           /* No luck there.  Try a quotient-and-remainder insn,
4853              keeping the quotient alone.  */
4854           quotient = gen_reg_rtx (compute_mode);
4855           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4856                                      op0, op1,
4857                                      quotient, NULL_RTX, unsignedp))
4858             {
4859               quotient = 0;
4860               if (! rem_flag)
4861                 /* Still no luck.  If we are not computing the remainder,
4862                    use a library call for the quotient.  */
4863                 quotient = sign_expand_binop (compute_mode,
4864                                               udiv_optab, sdiv_optab,
4865                                               op0, op1, target,
4866                                               unsignedp, OPTAB_LIB_WIDEN);
4867             }
4868         }
4869     }
4870
4871   if (rem_flag)
4872     {
4873       if (target && GET_MODE (target) != compute_mode)
4874         target = 0;
4875
4876       if (quotient == 0)
4877         {
4878           /* No divide instruction either.  Use library for remainder.  */
4879           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4880                                          op0, op1, target,
4881                                          unsignedp, OPTAB_LIB_WIDEN);
4882           /* No remainder function.  Try a quotient-and-remainder
4883              function, keeping the remainder.  */
4884           if (!remainder)
4885             {
4886               remainder = gen_reg_rtx (compute_mode);
4887               if (!expand_twoval_binop_libfunc
4888                   (unsignedp ? udivmod_optab : sdivmod_optab,
4889                    op0, op1,
4890                    NULL_RTX, remainder,
4891                    unsignedp ? UMOD : MOD))
4892                 remainder = NULL_RTX;
4893             }
4894         }
4895       else
4896         {
4897           /* We divided.  Now finish doing X - Y * (X / Y).  */
4898           remainder = expand_mult (compute_mode, quotient, op1,
4899                                    NULL_RTX, unsignedp);
4900           remainder = expand_binop (compute_mode, sub_optab, op0,
4901                                     remainder, target, unsignedp,
4902                                     OPTAB_LIB_WIDEN);
4903         }
4904     }
4905
4906   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4907 }
4908 \f
4909 /* Return a tree node with data type TYPE, describing the value of X.
4910    Usually this is an VAR_DECL, if there is no obvious better choice.
4911    X may be an expression, however we only support those expressions
4912    generated by loop.c.  */
4913
4914 tree
4915 make_tree (tree type, rtx x)
4916 {
4917   tree t;
4918
4919   switch (GET_CODE (x))
4920     {
4921     case CONST_INT:
4922       {
4923         HOST_WIDE_INT hi = 0;
4924
4925         if (INTVAL (x) < 0
4926             && !(TYPE_UNSIGNED (type)
4927                  && (GET_MODE_BITSIZE (TYPE_MODE (type))
4928                      < HOST_BITS_PER_WIDE_INT)))
4929           hi = -1;
4930
4931         t = build_int_cst_wide (type, INTVAL (x), hi);
4932
4933         return t;
4934       }
4935
4936     case CONST_DOUBLE:
4937       if (GET_MODE (x) == VOIDmode)
4938         t = build_int_cst_wide (type,
4939                                 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4940       else
4941         {
4942           REAL_VALUE_TYPE d;
4943
4944           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4945           t = build_real (type, d);
4946         }
4947
4948       return t;
4949
4950     case CONST_VECTOR:
4951       {
4952         int units = CONST_VECTOR_NUNITS (x);
4953         tree itype = TREE_TYPE (type);
4954         tree t = NULL_TREE;
4955         int i;
4956
4957
4958         /* Build a tree with vector elements.  */
4959         for (i = units - 1; i >= 0; --i)
4960           {
4961             rtx elt = CONST_VECTOR_ELT (x, i);
4962             t = tree_cons (NULL_TREE, make_tree (itype, elt), t);
4963           }
4964
4965         return build_vector (type, t);
4966       }
4967
4968     case PLUS:
4969       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4970                           make_tree (type, XEXP (x, 1)));
4971
4972     case MINUS:
4973       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4974                           make_tree (type, XEXP (x, 1)));
4975
4976     case NEG:
4977       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
4978
4979     case MULT:
4980       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4981                           make_tree (type, XEXP (x, 1)));
4982
4983     case ASHIFT:
4984       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4985                           make_tree (type, XEXP (x, 1)));
4986
4987     case LSHIFTRT:
4988       t = unsigned_type_for (type);
4989       return fold_convert (type, build2 (RSHIFT_EXPR, t,
4990                                          make_tree (t, XEXP (x, 0)),
4991                                          make_tree (type, XEXP (x, 1))));
4992
4993     case ASHIFTRT:
4994       t = signed_type_for (type);
4995       return fold_convert (type, build2 (RSHIFT_EXPR, t,
4996                                          make_tree (t, XEXP (x, 0)),
4997                                          make_tree (type, XEXP (x, 1))));
4998
4999     case DIV:
5000       if (TREE_CODE (type) != REAL_TYPE)
5001         t = signed_type_for (type);
5002       else
5003         t = type;
5004
5005       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5006                                          make_tree (t, XEXP (x, 0)),
5007                                          make_tree (t, XEXP (x, 1))));
5008     case UDIV:
5009       t = unsigned_type_for (type);
5010       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5011                                          make_tree (t, XEXP (x, 0)),
5012                                          make_tree (t, XEXP (x, 1))));
5013
5014     case SIGN_EXTEND:
5015     case ZERO_EXTEND:
5016       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5017                                           GET_CODE (x) == ZERO_EXTEND);
5018       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5019
5020     case CONST:
5021       return make_tree (type, XEXP (x, 0));
5022
5023     case SYMBOL_REF:
5024       t = SYMBOL_REF_DECL (x);
5025       if (t)
5026         return fold_convert (type, build_fold_addr_expr (t));
5027       /* else fall through.  */
5028
5029     default:
5030       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5031
5032       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5033          address mode to pointer mode.  */
5034       if (POINTER_TYPE_P (type))
5035         x = convert_memory_address_addr_space
5036               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5037
5038       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5039          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5040       t->decl_with_rtl.rtl = x;
5041
5042       return t;
5043     }
5044 }
5045 \f
5046 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5047    and returning TARGET.
5048
5049    If TARGET is 0, a pseudo-register or constant is returned.  */
5050
5051 rtx
5052 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5053 {
5054   rtx tem = 0;
5055
5056   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5057     tem = simplify_binary_operation (AND, mode, op0, op1);
5058   if (tem == 0)
5059     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5060
5061   if (target == 0)
5062     target = tem;
5063   else if (tem != target)
5064     emit_move_insn (target, tem);
5065   return target;
5066 }
5067
5068 /* Helper function for emit_store_flag.  */
5069 static rtx
5070 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5071              enum machine_mode mode, enum machine_mode compare_mode,
5072              int unsignedp, rtx x, rtx y, int normalizep,
5073              enum machine_mode target_mode)
5074 {
5075   struct expand_operand ops[4];
5076   rtx op0, last, comparison, subtarget;
5077   enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode;
5078
5079   last = get_last_insn ();
5080   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5081   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5082   if (!x || !y)
5083     {
5084       delete_insns_since (last);
5085       return NULL_RTX;
5086     }
5087
5088   if (target_mode == VOIDmode)
5089     target_mode = result_mode;
5090   if (!target)
5091     target = gen_reg_rtx (target_mode);
5092
5093   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5094
5095   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5096   create_fixed_operand (&ops[1], comparison);
5097   create_fixed_operand (&ops[2], x);
5098   create_fixed_operand (&ops[3], y);
5099   if (!maybe_expand_insn (icode, 4, ops))
5100     {
5101       delete_insns_since (last);
5102       return NULL_RTX;
5103     }
5104   subtarget = ops[0].value;
5105
5106   /* If we are converting to a wider mode, first convert to
5107      TARGET_MODE, then normalize.  This produces better combining
5108      opportunities on machines that have a SIGN_EXTRACT when we are
5109      testing a single bit.  This mostly benefits the 68k.
5110
5111      If STORE_FLAG_VALUE does not have the sign bit set when
5112      interpreted in MODE, we can do this conversion as unsigned, which
5113      is usually more efficient.  */
5114   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5115     {
5116       convert_move (target, subtarget,
5117                     val_signbit_known_clear_p (result_mode,
5118                                                STORE_FLAG_VALUE));
5119       op0 = target;
5120       result_mode = target_mode;
5121     }
5122   else
5123     op0 = subtarget;
5124
5125   /* If we want to keep subexpressions around, don't reuse our last
5126      target.  */
5127   if (optimize)
5128     subtarget = 0;
5129
5130   /* Now normalize to the proper value in MODE.  Sometimes we don't
5131      have to do anything.  */
5132   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5133     ;
5134   /* STORE_FLAG_VALUE might be the most negative number, so write
5135      the comparison this way to avoid a compiler-time warning.  */
5136   else if (- normalizep == STORE_FLAG_VALUE)
5137     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5138
5139   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5140      it hard to use a value of just the sign bit due to ANSI integer
5141      constant typing rules.  */
5142   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5143     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5144                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5145                         normalizep == 1);
5146   else
5147     {
5148       gcc_assert (STORE_FLAG_VALUE & 1);
5149
5150       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5151       if (normalizep == -1)
5152         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5153     }
5154
5155   /* If we were converting to a smaller mode, do the conversion now.  */
5156   if (target_mode != result_mode)
5157     {
5158       convert_move (target, op0, 0);
5159       return target;
5160     }
5161   else
5162     return op0;
5163 }
5164
5165
5166 /* A subroutine of emit_store_flag only including "tricks" that do not
5167    need a recursive call.  These are kept separate to avoid infinite
5168    loops.  */
5169
5170 static rtx
5171 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5172                    enum machine_mode mode, int unsignedp, int normalizep,
5173                    enum machine_mode target_mode)
5174 {
5175   rtx subtarget;
5176   enum insn_code icode;
5177   enum machine_mode compare_mode;
5178   enum mode_class mclass;
5179   enum rtx_code scode;
5180   rtx tem;
5181
5182   if (unsignedp)
5183     code = unsigned_condition (code);
5184   scode = swap_condition (code);
5185
5186   /* If one operand is constant, make it the second one.  Only do this
5187      if the other operand is not constant as well.  */
5188
5189   if (swap_commutative_operands_p (op0, op1))
5190     {
5191       tem = op0;
5192       op0 = op1;
5193       op1 = tem;
5194       code = swap_condition (code);
5195     }
5196
5197   if (mode == VOIDmode)
5198     mode = GET_MODE (op0);
5199
5200   /* For some comparisons with 1 and -1, we can convert this to
5201      comparisons with zero.  This will often produce more opportunities for
5202      store-flag insns.  */
5203
5204   switch (code)
5205     {
5206     case LT:
5207       if (op1 == const1_rtx)
5208         op1 = const0_rtx, code = LE;
5209       break;
5210     case LE:
5211       if (op1 == constm1_rtx)
5212         op1 = const0_rtx, code = LT;
5213       break;
5214     case GE:
5215       if (op1 == const1_rtx)
5216         op1 = const0_rtx, code = GT;
5217       break;
5218     case GT:
5219       if (op1 == constm1_rtx)
5220         op1 = const0_rtx, code = GE;
5221       break;
5222     case GEU:
5223       if (op1 == const1_rtx)
5224         op1 = const0_rtx, code = NE;
5225       break;
5226     case LTU:
5227       if (op1 == const1_rtx)
5228         op1 = const0_rtx, code = EQ;
5229       break;
5230     default:
5231       break;
5232     }
5233
5234   /* If we are comparing a double-word integer with zero or -1, we can
5235      convert the comparison into one involving a single word.  */
5236   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5237       && GET_MODE_CLASS (mode) == MODE_INT
5238       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5239     {
5240       if ((code == EQ || code == NE)
5241           && (op1 == const0_rtx || op1 == constm1_rtx))
5242         {
5243           rtx op00, op01;
5244
5245           /* Do a logical OR or AND of the two words and compare the
5246              result.  */
5247           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5248           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5249           tem = expand_binop (word_mode,
5250                               op1 == const0_rtx ? ior_optab : and_optab,
5251                               op00, op01, NULL_RTX, unsignedp,
5252                               OPTAB_DIRECT);
5253
5254           if (tem != 0)
5255             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5256                                    unsignedp, normalizep);
5257         }
5258       else if ((code == LT || code == GE) && op1 == const0_rtx)
5259         {
5260           rtx op0h;
5261
5262           /* If testing the sign bit, can just test on high word.  */
5263           op0h = simplify_gen_subreg (word_mode, op0, mode,
5264                                       subreg_highpart_offset (word_mode,
5265                                                               mode));
5266           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5267                                  unsignedp, normalizep);
5268         }
5269       else
5270         tem = NULL_RTX;
5271
5272       if (tem)
5273         {
5274           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5275             return tem;
5276           if (!target)
5277             target = gen_reg_rtx (target_mode);
5278
5279           convert_move (target, tem,
5280                         !val_signbit_known_set_p (word_mode,
5281                                                   (normalizep ? normalizep
5282                                                    : STORE_FLAG_VALUE)));
5283           return target;
5284         }
5285     }
5286
5287   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5288      complement of A (for GE) and shifting the sign bit to the low bit.  */
5289   if (op1 == const0_rtx && (code == LT || code == GE)
5290       && GET_MODE_CLASS (mode) == MODE_INT
5291       && (normalizep || STORE_FLAG_VALUE == 1
5292           || val_signbit_p (mode, STORE_FLAG_VALUE)))
5293     {
5294       subtarget = target;
5295
5296       if (!target)
5297         target_mode = mode;
5298
5299       /* If the result is to be wider than OP0, it is best to convert it
5300          first.  If it is to be narrower, it is *incorrect* to convert it
5301          first.  */
5302       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5303         {
5304           op0 = convert_modes (target_mode, mode, op0, 0);
5305           mode = target_mode;
5306         }
5307
5308       if (target_mode != mode)
5309         subtarget = 0;
5310
5311       if (code == GE)
5312         op0 = expand_unop (mode, one_cmpl_optab, op0,
5313                            ((STORE_FLAG_VALUE == 1 || normalizep)
5314                             ? 0 : subtarget), 0);
5315
5316       if (STORE_FLAG_VALUE == 1 || normalizep)
5317         /* If we are supposed to produce a 0/1 value, we want to do
5318            a logical shift from the sign bit to the low-order bit; for
5319            a -1/0 value, we do an arithmetic shift.  */
5320         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5321                             GET_MODE_BITSIZE (mode) - 1,
5322                             subtarget, normalizep != -1);
5323
5324       if (mode != target_mode)
5325         op0 = convert_modes (target_mode, mode, op0, 0);
5326
5327       return op0;
5328     }
5329
5330   mclass = GET_MODE_CLASS (mode);
5331   for (compare_mode = mode; compare_mode != VOIDmode;
5332        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5333     {
5334      enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5335      icode = optab_handler (cstore_optab, optab_mode);
5336      if (icode != CODE_FOR_nothing)
5337         {
5338           do_pending_stack_adjust ();
5339           tem = emit_cstore (target, icode, code, mode, compare_mode,
5340                              unsignedp, op0, op1, normalizep, target_mode);
5341           if (tem)
5342             return tem;
5343
5344           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5345             {
5346               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5347                                  unsignedp, op1, op0, normalizep, target_mode);
5348               if (tem)
5349                 return tem;
5350             }
5351           break;
5352         }
5353     }
5354
5355   return 0;
5356 }
5357
5358 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5359    and storing in TARGET.  Normally return TARGET.
5360    Return 0 if that cannot be done.
5361
5362    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5363    it is VOIDmode, they cannot both be CONST_INT.
5364
5365    UNSIGNEDP is for the case where we have to widen the operands
5366    to perform the operation.  It says to use zero-extension.
5367
5368    NORMALIZEP is 1 if we should convert the result to be either zero
5369    or one.  Normalize is -1 if we should convert the result to be
5370    either zero or -1.  If NORMALIZEP is zero, the result will be left
5371    "raw" out of the scc insn.  */
5372
5373 rtx
5374 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5375                  enum machine_mode mode, int unsignedp, int normalizep)
5376 {
5377   enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5378   enum rtx_code rcode;
5379   rtx subtarget;
5380   rtx tem, last, trueval;
5381
5382   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5383                            target_mode);
5384   if (tem)
5385     return tem;
5386
5387   /* If we reached here, we can't do this with a scc insn, however there
5388      are some comparisons that can be done in other ways.  Don't do any
5389      of these cases if branches are very cheap.  */
5390   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5391     return 0;
5392
5393   /* See what we need to return.  We can only return a 1, -1, or the
5394      sign bit.  */
5395
5396   if (normalizep == 0)
5397     {
5398       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5399         normalizep = STORE_FLAG_VALUE;
5400
5401       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5402         ;
5403       else
5404         return 0;
5405     }
5406
5407   last = get_last_insn ();
5408
5409   /* If optimizing, use different pseudo registers for each insn, instead
5410      of reusing the same pseudo.  This leads to better CSE, but slows
5411      down the compiler, since there are more pseudos */
5412   subtarget = (!optimize
5413                && (target_mode == mode)) ? target : NULL_RTX;
5414   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5415
5416   /* For floating-point comparisons, try the reverse comparison or try
5417      changing the "orderedness" of the comparison.  */
5418   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5419     {
5420       enum rtx_code first_code;
5421       bool and_them;
5422
5423       rcode = reverse_condition_maybe_unordered (code);
5424       if (can_compare_p (rcode, mode, ccp_store_flag)
5425           && (code == ORDERED || code == UNORDERED
5426               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5427               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5428         {
5429           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5430                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5431
5432           /* For the reverse comparison, use either an addition or a XOR.  */
5433           if (want_add
5434               && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5435                            optimize_insn_for_speed_p ()) == 0)
5436             {
5437               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5438                                        STORE_FLAG_VALUE, target_mode);
5439               if (tem)
5440                 return expand_binop (target_mode, add_optab, tem,
5441                                      GEN_INT (normalizep),
5442                                      target, 0, OPTAB_WIDEN);
5443             }
5444           else if (!want_add
5445                    && rtx_cost (trueval, XOR, 1,
5446                                 optimize_insn_for_speed_p ()) == 0)
5447             {
5448               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5449                                        normalizep, target_mode);
5450               if (tem)
5451                 return expand_binop (target_mode, xor_optab, tem, trueval,
5452                                      target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5453             }
5454         }
5455
5456       delete_insns_since (last);
5457
5458       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5459       if (code == ORDERED || code == UNORDERED)
5460         return 0;
5461
5462       and_them = split_comparison (code, mode, &first_code, &code);
5463
5464       /* If there are no NaNs, the first comparison should always fall through.
5465          Effectively change the comparison to the other one.  */
5466       if (!HONOR_NANS (mode))
5467         {
5468           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5469           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5470                                     target_mode);
5471         }
5472
5473 #ifdef HAVE_conditional_move
5474       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5475          conditional move.  */
5476       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5477                                normalizep, target_mode);
5478       if (tem == 0)
5479         return 0;
5480
5481       if (and_them)
5482         tem = emit_conditional_move (target, code, op0, op1, mode,
5483                                      tem, const0_rtx, GET_MODE (tem), 0);
5484       else
5485         tem = emit_conditional_move (target, code, op0, op1, mode,
5486                                      trueval, tem, GET_MODE (tem), 0);
5487
5488       if (tem == 0)
5489         delete_insns_since (last);
5490       return tem;
5491 #else
5492       return 0;
5493 #endif
5494     }
5495
5496   /* The remaining tricks only apply to integer comparisons.  */
5497
5498   if (GET_MODE_CLASS (mode) != MODE_INT)
5499     return 0;
5500
5501   /* If this is an equality comparison of integers, we can try to exclusive-or
5502      (or subtract) the two operands and use a recursive call to try the
5503      comparison with zero.  Don't do any of these cases if branches are
5504      very cheap.  */
5505
5506   if ((code == EQ || code == NE) && op1 != const0_rtx)
5507     {
5508       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5509                           OPTAB_WIDEN);
5510
5511       if (tem == 0)
5512         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5513                             OPTAB_WIDEN);
5514       if (tem != 0)
5515         tem = emit_store_flag (target, code, tem, const0_rtx,
5516                                mode, unsignedp, normalizep);
5517       if (tem != 0)
5518         return tem;
5519
5520       delete_insns_since (last);
5521     }
5522
5523   /* For integer comparisons, try the reverse comparison.  However, for
5524      small X and if we'd have anyway to extend, implementing "X != 0"
5525      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5526   rcode = reverse_condition (code);
5527   if (can_compare_p (rcode, mode, ccp_store_flag)
5528       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5529             && code == NE
5530             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5531             && op1 == const0_rtx))
5532     {
5533       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5534                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5535
5536       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5537       if (want_add
5538           && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5539                        optimize_insn_for_speed_p ()) == 0)
5540         {
5541           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5542                                    STORE_FLAG_VALUE, target_mode);
5543           if (tem != 0)
5544             tem = expand_binop (target_mode, add_optab, tem,
5545                                 GEN_INT (normalizep), target, 0, OPTAB_WIDEN);
5546         }
5547       else if (!want_add
5548                && rtx_cost (trueval, XOR, 1,
5549                             optimize_insn_for_speed_p ()) == 0)
5550         {
5551           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5552                                    normalizep, target_mode);
5553           if (tem != 0)
5554             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5555                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5556         }
5557
5558       if (tem != 0)
5559         return tem;
5560       delete_insns_since (last);
5561     }
5562
5563   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5564      the constant zero.  Reject all other comparisons at this point.  Only
5565      do LE and GT if branches are expensive since they are expensive on
5566      2-operand machines.  */
5567
5568   if (op1 != const0_rtx
5569       || (code != EQ && code != NE
5570           && (BRANCH_COST (optimize_insn_for_speed_p (),
5571                            false) <= 1 || (code != LE && code != GT))))
5572     return 0;
5573
5574   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5575      do the necessary operation below.  */
5576
5577   tem = 0;
5578
5579   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5580      the sign bit set.  */
5581
5582   if (code == LE)
5583     {
5584       /* This is destructive, so SUBTARGET can't be OP0.  */
5585       if (rtx_equal_p (subtarget, op0))
5586         subtarget = 0;
5587
5588       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5589                           OPTAB_WIDEN);
5590       if (tem)
5591         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5592                             OPTAB_WIDEN);
5593     }
5594
5595   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5596      number of bits in the mode of OP0, minus one.  */
5597
5598   if (code == GT)
5599     {
5600       if (rtx_equal_p (subtarget, op0))
5601         subtarget = 0;
5602
5603       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5604                           GET_MODE_BITSIZE (mode) - 1,
5605                           subtarget, 0);
5606       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5607                           OPTAB_WIDEN);
5608     }
5609
5610   if (code == EQ || code == NE)
5611     {
5612       /* For EQ or NE, one way to do the comparison is to apply an operation
5613          that converts the operand into a positive number if it is nonzero
5614          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5615          for NE we negate.  This puts the result in the sign bit.  Then we
5616          normalize with a shift, if needed.
5617
5618          Two operations that can do the above actions are ABS and FFS, so try
5619          them.  If that doesn't work, and MODE is smaller than a full word,
5620          we can use zero-extension to the wider mode (an unsigned conversion)
5621          as the operation.  */
5622
5623       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5624          that is compensated by the subsequent overflow when subtracting
5625          one / negating.  */
5626
5627       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5628         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5629       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5630         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5631       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5632         {
5633           tem = convert_modes (word_mode, mode, op0, 1);
5634           mode = word_mode;
5635         }
5636
5637       if (tem != 0)
5638         {
5639           if (code == EQ)
5640             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5641                                 0, OPTAB_WIDEN);
5642           else
5643             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5644         }
5645
5646       /* If we couldn't do it that way, for NE we can "or" the two's complement
5647          of the value with itself.  For EQ, we take the one's complement of
5648          that "or", which is an extra insn, so we only handle EQ if branches
5649          are expensive.  */
5650
5651       if (tem == 0
5652           && (code == NE
5653               || BRANCH_COST (optimize_insn_for_speed_p (),
5654                               false) > 1))
5655         {
5656           if (rtx_equal_p (subtarget, op0))
5657             subtarget = 0;
5658
5659           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5660           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5661                               OPTAB_WIDEN);
5662
5663           if (tem && code == EQ)
5664             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5665         }
5666     }
5667
5668   if (tem && normalizep)
5669     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5670                         GET_MODE_BITSIZE (mode) - 1,
5671                         subtarget, normalizep == 1);
5672
5673   if (tem)
5674     {
5675       if (!target)
5676         ;
5677       else if (GET_MODE (tem) != target_mode)
5678         {
5679           convert_move (target, tem, 0);
5680           tem = target;
5681         }
5682       else if (!subtarget)
5683         {
5684           emit_move_insn (target, tem);
5685           tem = target;
5686         }
5687     }
5688   else
5689     delete_insns_since (last);
5690
5691   return tem;
5692 }
5693
5694 /* Like emit_store_flag, but always succeeds.  */
5695
5696 rtx
5697 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5698                        enum machine_mode mode, int unsignedp, int normalizep)
5699 {
5700   rtx tem, label;
5701   rtx trueval, falseval;
5702
5703   /* First see if emit_store_flag can do the job.  */
5704   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5705   if (tem != 0)
5706     return tem;
5707
5708   if (!target)
5709     target = gen_reg_rtx (word_mode);
5710
5711   /* If this failed, we have to do this with set/compare/jump/set code.
5712      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5713   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5714   if (code == NE
5715       && GET_MODE_CLASS (mode) == MODE_INT
5716       && REG_P (target)
5717       && op0 == target
5718       && op1 == const0_rtx)
5719     {
5720       label = gen_label_rtx ();
5721       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5722                                mode, NULL_RTX, NULL_RTX, label, -1);
5723       emit_move_insn (target, trueval);
5724       emit_label (label);
5725       return target;
5726     }
5727
5728   if (!REG_P (target)
5729       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5730     target = gen_reg_rtx (GET_MODE (target));
5731
5732   /* Jump in the right direction if the target cannot implement CODE
5733      but can jump on its reverse condition.  */
5734   falseval = const0_rtx;
5735   if (! can_compare_p (code, mode, ccp_jump)
5736       && (! FLOAT_MODE_P (mode)
5737           || code == ORDERED || code == UNORDERED
5738           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5739           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5740     {
5741       enum rtx_code rcode;
5742       if (FLOAT_MODE_P (mode))
5743         rcode = reverse_condition_maybe_unordered (code);
5744       else
5745         rcode = reverse_condition (code);
5746
5747       /* Canonicalize to UNORDERED for the libcall.  */
5748       if (can_compare_p (rcode, mode, ccp_jump)
5749           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5750         {
5751           falseval = trueval;
5752           trueval = const0_rtx;
5753           code = rcode;
5754         }
5755     }
5756
5757   emit_move_insn (target, trueval);
5758   label = gen_label_rtx ();
5759   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5760                            NULL_RTX, label, -1);
5761
5762   emit_move_insn (target, falseval);
5763   emit_label (label);
5764
5765   return target;
5766 }
5767 \f
5768 /* Perform possibly multi-word comparison and conditional jump to LABEL
5769    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5770    now a thin wrapper around do_compare_rtx_and_jump.  */
5771
5772 static void
5773 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5774                  rtx label)
5775 {
5776   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5777   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5778                            NULL_RTX, NULL_RTX, label, -1);
5779 }