gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
   5    Free Software Foundation, Inc.
   6
   7 This file is part of GCC.
   8
   9 GCC is free software; you can redistribute it and/or modify it under
  10 the terms of the GNU General Public License as published by the Free
  11 Software Foundation; either version 3, or (at your option) any later
  12 version.
  13
  14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  16 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  17 for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with GCC; see the file COPYING3.  If not see
  21 <http://www.gnu.org/licenses/>.  */
  22
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "coretypes.h"
  27 #include "tm.h"
  28 #include "toplev.h"
  29 #include "rtl.h"
  30 #include "tree.h"
  31 #include "tm_p.h"
  32 #include "flags.h"
  33 #include "insn-config.h"
  34 #include "expr.h"
  35 #include "optabs.h"
  36 #include "real.h"
  37 #include "recog.h"
  38 #include "langhooks.h"
  39 #include "df.h"
  40 #include "target.h"
  41
  42 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  43                                    unsigned HOST_WIDE_INT,
  44                                    unsigned HOST_WIDE_INT, rtx);
  45 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  46                                    unsigned HOST_WIDE_INT, rtx);
  47 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  48                                     unsigned HOST_WIDE_INT,
  49                                     unsigned HOST_WIDE_INT,
  50                                     unsigned HOST_WIDE_INT, rtx, int);
  51 static rtx mask_rtx (enum machine_mode, int, int, int);
  52 static rtx lshift_value (enum machine_mode, rtx, int, int);
  53 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  54                                     unsigned HOST_WIDE_INT, int);
  55 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  56 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  57 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  58
  59 /* Test whether a value is zero of a power of two.  */
  60 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
  61
  62 /* Nonzero means divides or modulus operations are relatively cheap for
  63    powers of two, so don't use branches; emit the operation instead.
  64    Usually, this will mean that the MD file will emit non-branch
  65    sequences.  */
  66
  67 static bool sdiv_pow2_cheap[2][NUM_MACHINE_MODES];
  68 static bool smod_pow2_cheap[2][NUM_MACHINE_MODES];
  69
  70 #ifndef SLOW_UNALIGNED_ACCESS
  71 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  72 #endif
  73
  74 /* For compilers that support multiple targets with different word sizes,
  75    MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
  76    is the H8/300(H) compiler.  */
  77
  78 #ifndef MAX_BITS_PER_WORD
  79 #define MAX_BITS_PER_WORD BITS_PER_WORD
  80 #endif
  81
  82 /* Reduce conditional compilation elsewhere.  */
  83 #ifndef HAVE_insv
  84 #define HAVE_insv       0
  85 #define CODE_FOR_insv   CODE_FOR_nothing
  86 #define gen_insv(a,b,c,d) NULL_RTX
  87 #endif
  88 #ifndef HAVE_extv
  89 #define HAVE_extv       0
  90 #define CODE_FOR_extv   CODE_FOR_nothing
  91 #define gen_extv(a,b,c,d) NULL_RTX
  92 #endif
  93 #ifndef HAVE_extzv
  94 #define HAVE_extzv      0
  95 #define CODE_FOR_extzv  CODE_FOR_nothing
  96 #define gen_extzv(a,b,c,d) NULL_RTX
  97 #endif
  98
  99 /* Cost of various pieces of RTL.  Note that some of these are indexed by
 100    shift count and some by mode.  */
 101 static int zero_cost[2];
 102 static int add_cost[2][NUM_MACHINE_MODES];
 103 static int neg_cost[2][NUM_MACHINE_MODES];
 104 static int shift_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 105 static int shiftadd_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 106 static int shiftsub0_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 107 static int shiftsub1_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 108 static int mul_cost[2][NUM_MACHINE_MODES];
 109 static int sdiv_cost[2][NUM_MACHINE_MODES];
 110 static int udiv_cost[2][NUM_MACHINE_MODES];
 111 static int mul_widen_cost[2][NUM_MACHINE_MODES];
 112 static int mul_highpart_cost[2][NUM_MACHINE_MODES];
 113
 114 void
 115 init_expmed (void)
 116 {
 117   struct
 118   {
 119     struct rtx_def reg;         rtunion reg_fld[2];
 120     struct rtx_def plus;        rtunion plus_fld1;
 121     struct rtx_def neg;
 122     struct rtx_def mult;        rtunion mult_fld1;
 123     struct rtx_def sdiv;        rtunion sdiv_fld1;
 124     struct rtx_def udiv;        rtunion udiv_fld1;
 125     struct rtx_def zext;
 126     struct rtx_def sdiv_32;     rtunion sdiv_32_fld1;
 127     struct rtx_def smod_32;     rtunion smod_32_fld1;
 128     struct rtx_def wide_mult;   rtunion wide_mult_fld1;
 129     struct rtx_def wide_lshr;   rtunion wide_lshr_fld1;
 130     struct rtx_def wide_trunc;
 131     struct rtx_def shift;       rtunion shift_fld1;
 132     struct rtx_def shift_mult;  rtunion shift_mult_fld1;
 133     struct rtx_def shift_add;   rtunion shift_add_fld1;
 134     struct rtx_def shift_sub0;  rtunion shift_sub0_fld1;
 135     struct rtx_def shift_sub1;  rtunion shift_sub1_fld1;
 136   } all;
 137
 138   rtx pow2[MAX_BITS_PER_WORD];
 139   rtx cint[MAX_BITS_PER_WORD];
 140   int m, n;
 141   enum machine_mode mode, wider_mode;
 142   int speed;
 143
 144
 145   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 146     {
 147       pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 148       cint[m] = GEN_INT (m);
 149     }
 150   memset (&all, 0, sizeof all);
 151
 152   PUT_CODE (&all.reg, REG);
 153   /* Avoid using hard regs in ways which may be unsupported.  */
 154   SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
 155
 156   PUT_CODE (&all.plus, PLUS);
 157   XEXP (&all.plus, 0) = &all.reg;
 158   XEXP (&all.plus, 1) = &all.reg;
 159
 160   PUT_CODE (&all.neg, NEG);
 161   XEXP (&all.neg, 0) = &all.reg;
 162
 163   PUT_CODE (&all.mult, MULT);
 164   XEXP (&all.mult, 0) = &all.reg;
 165   XEXP (&all.mult, 1) = &all.reg;
 166
 167   PUT_CODE (&all.sdiv, DIV);
 168   XEXP (&all.sdiv, 0) = &all.reg;
 169   XEXP (&all.sdiv, 1) = &all.reg;
 170
 171   PUT_CODE (&all.udiv, UDIV);
 172   XEXP (&all.udiv, 0) = &all.reg;
 173   XEXP (&all.udiv, 1) = &all.reg;
 174
 175   PUT_CODE (&all.sdiv_32, DIV);
 176   XEXP (&all.sdiv_32, 0) = &all.reg;
 177   XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
 178
 179   PUT_CODE (&all.smod_32, MOD);
 180   XEXP (&all.smod_32, 0) = &all.reg;
 181   XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
 182
 183   PUT_CODE (&all.zext, ZERO_EXTEND);
 184   XEXP (&all.zext, 0) = &all.reg;
 185
 186   PUT_CODE (&all.wide_mult, MULT);
 187   XEXP (&all.wide_mult, 0) = &all.zext;
 188   XEXP (&all.wide_mult, 1) = &all.zext;
 189
 190   PUT_CODE (&all.wide_lshr, LSHIFTRT);
 191   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
 192
 193   PUT_CODE (&all.wide_trunc, TRUNCATE);
 194   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
 195
 196   PUT_CODE (&all.shift, ASHIFT);
 197   XEXP (&all.shift, 0) = &all.reg;
 198
 199   PUT_CODE (&all.shift_mult, MULT);
 200   XEXP (&all.shift_mult, 0) = &all.reg;
 201
 202   PUT_CODE (&all.shift_add, PLUS);
 203   XEXP (&all.shift_add, 0) = &all.shift_mult;
 204   XEXP (&all.shift_add, 1) = &all.reg;
 205
 206   PUT_CODE (&all.shift_sub0, MINUS);
 207   XEXP (&all.shift_sub0, 0) = &all.shift_mult;
 208   XEXP (&all.shift_sub0, 1) = &all.reg;
 209
 210   PUT_CODE (&all.shift_sub1, MINUS);
 211   XEXP (&all.shift_sub1, 0) = &all.reg;
 212   XEXP (&all.shift_sub1, 1) = &all.shift_mult;
 213
 214   for (speed = 0; speed < 2; speed++)
 215     {
 216       crtl->maybe_hot_insn_p = speed;
 217       zero_cost[speed] = rtx_cost (const0_rtx, SET, speed);
 218
 219       for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 220            mode != VOIDmode;
 221            mode = GET_MODE_WIDER_MODE (mode))
 222         {
 223           PUT_MODE (&all.reg, mode);
 224           PUT_MODE (&all.plus, mode);
 225           PUT_MODE (&all.neg, mode);
 226           PUT_MODE (&all.mult, mode);
 227           PUT_MODE (&all.sdiv, mode);
 228           PUT_MODE (&all.udiv, mode);
 229           PUT_MODE (&all.sdiv_32, mode);
 230           PUT_MODE (&all.smod_32, mode);
 231           PUT_MODE (&all.wide_trunc, mode);
 232           PUT_MODE (&all.shift, mode);
 233           PUT_MODE (&all.shift_mult, mode);
 234           PUT_MODE (&all.shift_add, mode);
 235           PUT_MODE (&all.shift_sub0, mode);
 236           PUT_MODE (&all.shift_sub1, mode);
 237
 238           add_cost[speed][mode] = rtx_cost (&all.plus, SET, speed);
 239           neg_cost[speed][mode] = rtx_cost (&all.neg, SET, speed);
 240           mul_cost[speed][mode] = rtx_cost (&all.mult, SET, speed);
 241           sdiv_cost[speed][mode] = rtx_cost (&all.sdiv, SET, speed);
 242           udiv_cost[speed][mode] = rtx_cost (&all.udiv, SET, speed);
 243
 244           sdiv_pow2_cheap[speed][mode] = (rtx_cost (&all.sdiv_32, SET, speed)
 245                                           <= 2 * add_cost[speed][mode]);
 246           smod_pow2_cheap[speed][mode] = (rtx_cost (&all.smod_32, SET, speed)
 247                                           <= 4 * add_cost[speed][mode]);
 248
 249           wider_mode = GET_MODE_WIDER_MODE (mode);
 250           if (wider_mode != VOIDmode)
 251             {
 252               PUT_MODE (&all.zext, wider_mode);
 253               PUT_MODE (&all.wide_mult, wider_mode);
 254               PUT_MODE (&all.wide_lshr, wider_mode);
 255               XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
 256
 257               mul_widen_cost[speed][wider_mode]
 258                 = rtx_cost (&all.wide_mult, SET, speed);
 259               mul_highpart_cost[speed][mode]
 260                 = rtx_cost (&all.wide_trunc, SET, speed);
 261             }
 262
 263           shift_cost[speed][mode][0] = 0;
 264           shiftadd_cost[speed][mode][0] = shiftsub0_cost[speed][mode][0]
 265             = shiftsub1_cost[speed][mode][0] = add_cost[speed][mode];
 266
 267           n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
 268           for (m = 1; m < n; m++)
 269             {
 270               XEXP (&all.shift, 1) = cint[m];
 271               XEXP (&all.shift_mult, 1) = pow2[m];
 272
 273               shift_cost[speed][mode][m] = rtx_cost (&all.shift, SET, speed);
 274               shiftadd_cost[speed][mode][m] = rtx_cost (&all.shift_add, SET, speed);
 275               shiftsub0_cost[speed][mode][m] = rtx_cost (&all.shift_sub0, SET, speed);
 276               shiftsub1_cost[speed][mode][m] = rtx_cost (&all.shift_sub1, SET, speed);
 277             }
 278         }
 279     }
 280   default_rtl_profile ();
 281 }
 282
 283 /* Return an rtx representing minus the value of X.
 284    MODE is the intended mode of the result,
 285    useful if X is a CONST_INT.  */
 286
 287 rtx
 288 negate_rtx (enum machine_mode mode, rtx x)
 289 {
 290   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 291
 292   if (result == 0)
 293     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 294
 295   return result;
 296 }
 297
 298 /* Report on the availability of insv/extv/extzv and the desired mode
 299    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
 300    is false; else the mode of the specified operand.  If OPNO is -1,
 301    all the caller cares about is whether the insn is available.  */
 302 enum machine_mode
 303 mode_for_extraction (enum extraction_pattern pattern, int opno)
 304 {
 305   const struct insn_data *data;
 306
 307   switch (pattern)
 308     {
 309     case EP_insv:
 310       if (HAVE_insv)
 311         {
 312           data = &insn_data[CODE_FOR_insv];
 313           break;
 314         }
 315       return MAX_MACHINE_MODE;
 316
 317     case EP_extv:
 318       if (HAVE_extv)
 319         {
 320           data = &insn_data[CODE_FOR_extv];
 321           break;
 322         }
 323       return MAX_MACHINE_MODE;
 324
 325     case EP_extzv:
 326       if (HAVE_extzv)
 327         {
 328           data = &insn_data[CODE_FOR_extzv];
 329           break;
 330         }
 331       return MAX_MACHINE_MODE;
 332
 333     default:
 334       gcc_unreachable ();
 335     }
 336
 337   if (opno == -1)
 338     return VOIDmode;
 339
 340   /* Everyone who uses this function used to follow it with
 341      if (result == VOIDmode) result = word_mode; */
 342   if (data->operand[opno].mode == VOIDmode)
 343     return word_mode;
 344   return data->operand[opno].mode;
 345 }
 346
 347 /* Return true if X, of mode MODE, matches the predicate for operand
 348    OPNO of instruction ICODE.  Allow volatile memories, regardless of
 349    the ambient volatile_ok setting.  */
 350
 351 static bool
 352 check_predicate_volatile_ok (enum insn_code icode, int opno,
 353                              rtx x, enum machine_mode mode)
 354 {
 355   bool save_volatile_ok, result;
 356
 357   save_volatile_ok = volatile_ok;
 358   result = insn_data[(int) icode].operand[opno].predicate (x, mode);
 359   volatile_ok = save_volatile_ok;
 360   return result;
 361 }
 362 \f
 363 /* A subroutine of store_bit_field, with the same arguments.  Return true
 364    if the operation could be implemented.
 365
 366    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 367    no other way of implementing the operation.  If FALLBACK_P is false,
 368    return false instead.  */
 369
 370 static bool
 371 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 372                    unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
 373                    rtx value, bool fallback_p)
 374 {
 375   unsigned int unit
 376     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
 377   unsigned HOST_WIDE_INT offset, bitpos;
 378   rtx op0 = str_rtx;
 379   int byte_offset;
 380   rtx orig_value;
 381
 382   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
 383
 384   while (GET_CODE (op0) == SUBREG)
 385     {
 386       /* The following line once was done only if WORDS_BIG_ENDIAN,
 387          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 388          meaningful at a much higher level; when structures are copied
 389          between memory and regs, the higher-numbered regs
 390          always get higher addresses.  */
 391       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 392       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 393
 394       byte_offset = 0;
 395
 396       /* Paradoxical subregs need special handling on big endian machines.  */
 397       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 398         {
 399           int difference = inner_mode_size - outer_mode_size;
 400
 401           if (WORDS_BIG_ENDIAN)
 402             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 403           if (BYTES_BIG_ENDIAN)
 404             byte_offset += difference % UNITS_PER_WORD;
 405         }
 406       else
 407         byte_offset = SUBREG_BYTE (op0);
 408
 409       bitnum += byte_offset * BITS_PER_UNIT;
 410       op0 = SUBREG_REG (op0);
 411     }
 412
 413   /* No action is needed if the target is a register and if the field
 414      lies completely outside that register.  This can occur if the source
 415      code contains an out-of-bounds access to a small array.  */
 416   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 417     return true;
 418
 419   /* Use vec_set patterns for inserting parts of vectors whenever
 420      available.  */
 421   if (VECTOR_MODE_P (GET_MODE (op0))
 422       && !MEM_P (op0)
 423       && (optab_handler (vec_set_optab, GET_MODE (op0))->insn_code
 424           != CODE_FOR_nothing)
 425       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 426       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 427       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 428     {
 429       enum machine_mode outermode = GET_MODE (op0);
 430       enum machine_mode innermode = GET_MODE_INNER (outermode);
 431       int icode = (int) optab_handler (vec_set_optab, outermode)->insn_code;
 432       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 433       rtx rtxpos = GEN_INT (pos);
 434       rtx src = value;
 435       rtx dest = op0;
 436       rtx pat, seq;
 437       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
 438       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
 439       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
 440
 441       start_sequence ();
 442
 443       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
 444         src = copy_to_mode_reg (mode1, src);
 445
 446       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
 447         rtxpos = copy_to_mode_reg (mode1, rtxpos);
 448
 449       /* We could handle this, but we should always be called with a pseudo
 450          for our targets and all insns should take them as outputs.  */
 451       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
 452                   && (*insn_data[icode].operand[1].predicate) (src, mode1)
 453                   && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
 454       pat = GEN_FCN (icode) (dest, src, rtxpos);
 455       seq = get_insns ();
 456       end_sequence ();
 457       if (pat)
 458         {
 459           emit_insn (seq);
 460           emit_insn (pat);
 461           return true;
 462         }
 463     }
 464
 465   /* If the target is a register, overwriting the entire object, or storing
 466      a full-word or multi-word field can be done with just a SUBREG.
 467
 468      If the target is memory, storing any naturally aligned field can be
 469      done with a simple store.  For targets that support fast unaligned
 470      memory, any naturally sized, unit aligned field can be done directly.  */
 471
 472   offset = bitnum / unit;
 473   bitpos = bitnum % unit;
 474   byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 475                 + (offset * UNITS_PER_WORD);
 476
 477   if (bitpos == 0
 478       && bitsize == GET_MODE_BITSIZE (fieldmode)
 479       && (!MEM_P (op0)
 480           ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 481              || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
 482              && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
 483           : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 484              || (offset * BITS_PER_UNIT % bitsize == 0
 485                  && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
 486     {
 487       if (MEM_P (op0))
 488         op0 = adjust_address (op0, fieldmode, offset);
 489       else if (GET_MODE (op0) != fieldmode)
 490         op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 491                                    byte_offset);
 492       emit_move_insn (op0, value);
 493       return true;
 494     }
 495
 496   /* Make sure we are playing with integral modes.  Pun with subregs
 497      if we aren't.  This must come after the entire register case above,
 498      since that case is valid for any mode.  The following cases are only
 499      valid for integral modes.  */
 500   {
 501     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 502     if (imode != GET_MODE (op0))
 503       {
 504         if (MEM_P (op0))
 505           op0 = adjust_address (op0, imode, 0);
 506         else
 507           {
 508             gcc_assert (imode != BLKmode);
 509             op0 = gen_lowpart (imode, op0);
 510           }
 511       }
 512   }
 513
 514   /* We may be accessing data outside the field, which means
 515      we can alias adjacent data.  */
 516   if (MEM_P (op0))
 517     {
 518       op0 = shallow_copy_rtx (op0);
 519       set_mem_alias_set (op0, 0);
 520       set_mem_expr (op0, 0);
 521     }
 522
 523   /* If OP0 is a register, BITPOS must count within a word.
 524      But as we have it, it counts within whatever size OP0 now has.
 525      On a bigendian machine, these are not the same, so convert.  */
 526   if (BYTES_BIG_ENDIAN
 527       && !MEM_P (op0)
 528       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 529     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 530
 531   /* Storing an lsb-aligned field in a register
 532      can be done with a movestrict instruction.  */
 533
 534   if (!MEM_P (op0)
 535       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 536       && bitsize == GET_MODE_BITSIZE (fieldmode)
 537       && (optab_handler (movstrict_optab, fieldmode)->insn_code
 538           != CODE_FOR_nothing))
 539     {
 540       int icode = optab_handler (movstrict_optab, fieldmode)->insn_code;
 541       rtx insn;
 542       rtx start = get_last_insn ();
 543       rtx arg0 = op0;
 544
 545       /* Get appropriate low part of the value being stored.  */
 546       if (CONST_INT_P (value) || REG_P (value))
 547         value = gen_lowpart (fieldmode, value);
 548       else if (!(GET_CODE (value) == SYMBOL_REF
 549                  || GET_CODE (value) == LABEL_REF
 550                  || GET_CODE (value) == CONST))
 551         value = convert_to_mode (fieldmode, value, 0);
 552
 553       if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
 554         value = copy_to_mode_reg (fieldmode, value);
 555
 556       if (GET_CODE (op0) == SUBREG)
 557         {
 558           /* Else we've got some float mode source being extracted into
 559              a different float mode destination -- this combination of
 560              subregs results in Severe Tire Damage.  */
 561           gcc_assert (GET_MODE (SUBREG_REG (op0)) == fieldmode
 562                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 563                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 564           arg0 = SUBREG_REG (op0);
 565         }
 566
 567       insn = (GEN_FCN (icode)
 568                  (gen_rtx_SUBREG (fieldmode, arg0,
 569                                   (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 570                                   + (offset * UNITS_PER_WORD)),
 571                                   value));
 572       if (insn)
 573         {
 574           emit_insn (insn);
 575           return true;
 576         }
 577       delete_insns_since (start);
 578     }
 579
 580   /* Handle fields bigger than a word.  */
 581
 582   if (bitsize > BITS_PER_WORD)
 583     {
 584       /* Here we transfer the words of the field
 585          in the order least significant first.
 586          This is because the most significant word is the one which may
 587          be less than full.
 588          However, only do that if the value is not BLKmode.  */
 589
 590       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 591       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 592       unsigned int i;
 593       rtx last;
 594
 595       /* This is the mode we must force value to, so that there will be enough
 596          subwords to extract.  Note that fieldmode will often (always?) be
 597          VOIDmode, because that is what store_field uses to indicate that this
 598          is a bit field, but passing VOIDmode to operand_subword_force
 599          is not allowed.  */
 600       fieldmode = GET_MODE (value);
 601       if (fieldmode == VOIDmode)
 602         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 603
 604       last = get_last_insn ();
 605       for (i = 0; i < nwords; i++)
 606         {
 607           /* If I is 0, use the low-order word in both field and target;
 608              if I is 1, use the next to lowest word; and so on.  */
 609           unsigned int wordnum = (backwards ? nwords - i - 1 : i);
 610           unsigned int bit_offset = (backwards
 611                                      ? MAX ((int) bitsize - ((int) i + 1)
 612                                             * BITS_PER_WORD,
 613                                             0)
 614                                      : (int) i * BITS_PER_WORD);
 615           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 616
 617           if (!store_bit_field_1 (op0, MIN (BITS_PER_WORD,
 618                                             bitsize - i * BITS_PER_WORD),
 619                                   bitnum + bit_offset, word_mode,
 620                                   value_word, fallback_p))
 621             {
 622               delete_insns_since (last);
 623               return false;
 624             }
 625         }
 626       return true;
 627     }
 628
 629   /* From here on we can assume that the field to be stored in is
 630      a full-word (whatever type that is), since it is shorter than a word.  */
 631
 632   /* OFFSET is the number of words or bytes (UNIT says which)
 633      from STR_RTX to the first word or byte containing part of the field.  */
 634
 635   if (!MEM_P (op0))
 636     {
 637       if (offset != 0
 638           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 639         {
 640           if (!REG_P (op0))
 641             {
 642               /* Since this is a destination (lvalue), we can't copy
 643                  it to a pseudo.  We can remove a SUBREG that does not
 644                  change the size of the operand.  Such a SUBREG may
 645                  have been added above.  */
 646               gcc_assert (GET_CODE (op0) == SUBREG
 647                           && (GET_MODE_SIZE (GET_MODE (op0))
 648                               == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
 649               op0 = SUBREG_REG (op0);
 650             }
 651           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 652                                 op0, (offset * UNITS_PER_WORD));
 653         }
 654       offset = 0;
 655     }
 656
 657   /* If VALUE has a floating-point or complex mode, access it as an
 658      integer of the corresponding size.  This can occur on a machine
 659      with 64 bit registers that uses SFmode for float.  It can also
 660      occur for unaligned float or complex fields.  */
 661   orig_value = value;
 662   if (GET_MODE (value) != VOIDmode
 663       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 664       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 665     {
 666       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 667       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 668     }
 669
 670   /* Now OFFSET is nonzero only if OP0 is memory
 671      and is therefore always measured in bytes.  */
 672
 673   if (HAVE_insv
 674       && GET_MODE (value) != BLKmode
 675       && bitsize > 0
 676       && GET_MODE_BITSIZE (op_mode) >= bitsize
 677       && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
 678             && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode)))
 679       && insn_data[CODE_FOR_insv].operand[1].predicate (GEN_INT (bitsize),
 680                                                         VOIDmode)
 681       && check_predicate_volatile_ok (CODE_FOR_insv, 0, op0, VOIDmode))
 682     {
 683       int xbitpos = bitpos;
 684       rtx value1;
 685       rtx xop0 = op0;
 686       rtx last = get_last_insn ();
 687       rtx pat;
 688       bool copy_back = false;
 689
 690       /* Add OFFSET into OP0's address.  */
 691       if (MEM_P (xop0))
 692         xop0 = adjust_address (xop0, byte_mode, offset);
 693
 694       /* If xop0 is a register, we need it in OP_MODE
 695          to make it acceptable to the format of insv.  */
 696       if (GET_CODE (xop0) == SUBREG)
 697         /* We can't just change the mode, because this might clobber op0,
 698            and we will need the original value of op0 if insv fails.  */
 699         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 700       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 701         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 702
 703       /* If the destination is a paradoxical subreg such that we need a
 704          truncate to the inner mode, perform the insertion on a temporary and
 705          truncate the result to the original destination.  Note that we can't
 706          just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 707          X) 0)) is (reg:N X).  */
 708       if (GET_CODE (xop0) == SUBREG
 709           && REG_P (SUBREG_REG (xop0))
 710           && (!TRULY_NOOP_TRUNCATION
 711               (GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (xop0))),
 712                GET_MODE_BITSIZE (op_mode))))
 713         {
 714           rtx tem = gen_reg_rtx (op_mode);
 715           emit_move_insn (tem, xop0);
 716           xop0 = tem;
 717           copy_back = true;
 718         }
 719
 720       /* On big-endian machines, we count bits from the most significant.
 721          If the bit field insn does not, we must invert.  */
 722
 723       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 724         xbitpos = unit - bitsize - xbitpos;
 725
 726       /* We have been counting XBITPOS within UNIT.
 727          Count instead within the size of the register.  */
 728       if (BITS_BIG_ENDIAN && !MEM_P (xop0))
 729         xbitpos += GET_MODE_BITSIZE (op_mode) - unit;
 730
 731       unit = GET_MODE_BITSIZE (op_mode);
 732
 733       /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 734       value1 = value;
 735       if (GET_MODE (value) != op_mode)
 736         {
 737           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 738             {
 739               /* Optimization: Don't bother really extending VALUE
 740                  if it has all the bits we will actually use.  However,
 741                  if we must narrow it, be sure we do it correctly.  */
 742
 743               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 744                 {
 745                   rtx tmp;
 746
 747                   tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 748                   if (! tmp)
 749                     tmp = simplify_gen_subreg (op_mode,
 750                                                force_reg (GET_MODE (value),
 751                                                           value1),
 752                                                GET_MODE (value), 0);
 753                   value1 = tmp;
 754                 }
 755               else
 756                 value1 = gen_lowpart (op_mode, value1);
 757             }
 758           else if (CONST_INT_P (value))
 759             value1 = gen_int_mode (INTVAL (value), op_mode);
 760           else
 761             /* Parse phase is supposed to make VALUE's data type
 762                match that of the component reference, which is a type
 763                at least as wide as the field; so VALUE should have
 764                a mode that corresponds to that type.  */
 765             gcc_assert (CONSTANT_P (value));
 766         }
 767
 768       /* If this machine's insv insists on a register,
 769          get VALUE1 into a register.  */
 770       if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
 771              (value1, op_mode)))
 772         value1 = force_reg (op_mode, value1);
 773
 774       pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
 775       if (pat)
 776         {
 777           emit_insn (pat);
 778
 779           if (copy_back)
 780             convert_move (op0, xop0, true);
 781           return true;
 782         }
 783       delete_insns_since (last);
 784     }
 785
 786   /* If OP0 is a memory, try copying it to a register and seeing if a
 787      cheap register alternative is available.  */
 788   if (HAVE_insv && MEM_P (op0))
 789     {
 790       enum machine_mode bestmode;
 791
 792       /* Get the mode to use for inserting into this field.  If OP0 is
 793          BLKmode, get the smallest mode consistent with the alignment. If
 794          OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
 795          mode. Otherwise, use the smallest mode containing the field.  */
 796
 797       if (GET_MODE (op0) == BLKmode
 798           || (op_mode != MAX_MACHINE_MODE
 799               && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode)))
 800         bestmode = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0),
 801                                   (op_mode == MAX_MACHINE_MODE
 802                                    ? VOIDmode : op_mode),
 803                                   MEM_VOLATILE_P (op0));
 804       else
 805         bestmode = GET_MODE (op0);
 806
 807       if (bestmode != VOIDmode
 808           && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode)
 809           && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
 810                && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
 811         {
 812           rtx last, tempreg, xop0;
 813           unsigned HOST_WIDE_INT xoffset, xbitpos;
 814
 815           last = get_last_insn ();
 816
 817           /* Adjust address to point to the containing unit of
 818              that mode.  Compute the offset as a multiple of this unit,
 819              counting in bytes.  */
 820           unit = GET_MODE_BITSIZE (bestmode);
 821           xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 822           xbitpos = bitnum % unit;
 823           xop0 = adjust_address (op0, bestmode, xoffset);
 824
 825           /* Fetch that unit, store the bitfield in it, then store
 826              the unit.  */
 827           tempreg = copy_to_reg (xop0);
 828           if (store_bit_field_1 (tempreg, bitsize, xbitpos,
 829                                  fieldmode, orig_value, false))
 830             {
 831               emit_move_insn (xop0, tempreg);
 832               return true;
 833             }
 834           delete_insns_since (last);
 835         }
 836     }
 837
 838   if (!fallback_p)
 839     return false;
 840
 841   store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 842   return true;
 843 }
 844
 845 /* Generate code to store value from rtx VALUE
 846    into a bit-field within structure STR_RTX
 847    containing BITSIZE bits starting at bit BITNUM.
 848    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.  */
 849
 850 void
 851 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 852                  unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
 853                  rtx value)
 854 {
 855   if (!store_bit_field_1 (str_rtx, bitsize, bitnum, fieldmode, value, true))
 856     gcc_unreachable ();
 857 }
 858 \f
 859 /* Use shifts and boolean operations to store VALUE
 860    into a bit field of width BITSIZE
 861    in a memory location specified by OP0 except offset by OFFSET bytes.
 862      (OFFSET must be 0 if OP0 is a register.)
 863    The field starts at position BITPOS within the byte.
 864     (If OP0 is a register, it may be a full word or a narrower mode,
 865      but BITPOS still counts within a full word,
 866      which is significant on bigendian machines.)  */
 867
 868 static void
 869 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
 870                        unsigned HOST_WIDE_INT bitsize,
 871                        unsigned HOST_WIDE_INT bitpos, rtx value)
 872 {
 873   enum machine_mode mode;
 874   unsigned int total_bits = BITS_PER_WORD;
 875   rtx temp;
 876   int all_zero = 0;
 877   int all_one = 0;
 878
 879   /* There is a case not handled here:
 880      a structure with a known alignment of just a halfword
 881      and a field split across two aligned halfwords within the structure.
 882      Or likewise a structure with a known alignment of just a byte
 883      and a field split across two bytes.
 884      Such cases are not supposed to be able to occur.  */
 885
 886   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 887     {
 888       gcc_assert (!offset);
 889       /* Special treatment for a bit field split across two registers.  */
 890       if (bitsize + bitpos > BITS_PER_WORD)
 891         {
 892           store_split_bit_field (op0, bitsize, bitpos, value);
 893           return;
 894         }
 895     }
 896   else
 897     {
 898       /* Get the proper mode to use for this field.  We want a mode that
 899          includes the entire field.  If such a mode would be larger than
 900          a word, we won't be doing the extraction the normal way.
 901          We don't want a mode bigger than the destination.  */
 902
 903       mode = GET_MODE (op0);
 904       if (GET_MODE_BITSIZE (mode) == 0
 905           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 906         mode = word_mode;
 907       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 908                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 909
 910       if (mode == VOIDmode)
 911         {
 912           /* The only way this should occur is if the field spans word
 913              boundaries.  */
 914           store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
 915                                  value);
 916           return;
 917         }
 918
 919       total_bits = GET_MODE_BITSIZE (mode);
 920
 921       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 922          be in the range 0 to total_bits-1, and put any excess bytes in
 923          OFFSET.  */
 924       if (bitpos >= total_bits)
 925         {
 926           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 927           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 928                      * BITS_PER_UNIT);
 929         }
 930
 931       /* Get ref to an aligned byte, halfword, or word containing the field.
 932          Adjust BITPOS to be position within a word,
 933          and OFFSET to be the offset of that word.
 934          Then alter OP0 to refer to that word.  */
 935       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 936       offset -= (offset % (total_bits / BITS_PER_UNIT));
 937       op0 = adjust_address (op0, mode, offset);
 938     }
 939
 940   mode = GET_MODE (op0);
 941
 942   /* Now MODE is either some integral mode for a MEM as OP0,
 943      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 944      The bit field is contained entirely within OP0.
 945      BITPOS is the starting bit number within OP0.
 946      (OP0's mode may actually be narrower than MODE.)  */
 947
 948   if (BYTES_BIG_ENDIAN)
 949       /* BITPOS is the distance between our msb
 950          and that of the containing datum.
 951          Convert it to the distance from the lsb.  */
 952       bitpos = total_bits - bitsize - bitpos;
 953
 954   /* Now BITPOS is always the distance between our lsb
 955      and that of OP0.  */
 956
 957   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 958      we must first convert its mode to MODE.  */
 959
 960   if (CONST_INT_P (value))
 961     {
 962       HOST_WIDE_INT v = INTVAL (value);
 963
 964       if (bitsize < HOST_BITS_PER_WIDE_INT)
 965         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 966
 967       if (v == 0)
 968         all_zero = 1;
 969       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 970                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 971                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 972         all_one = 1;
 973
 974       value = lshift_value (mode, value, bitpos, bitsize);
 975     }
 976   else
 977     {
 978       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 979                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 980
 981       if (GET_MODE (value) != mode)
 982         value = convert_to_mode (mode, value, 1);
 983
 984       if (must_and)
 985         value = expand_binop (mode, and_optab, value,
 986                               mask_rtx (mode, 0, bitsize, 0),
 987                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
 988       if (bitpos > 0)
 989         value = expand_shift (LSHIFT_EXPR, mode, value,
 990                               build_int_cst (NULL_TREE, bitpos), NULL_RTX, 1);
 991     }
 992
 993   /* Now clear the chosen bits in OP0,
 994      except that if VALUE is -1 we need not bother.  */
 995   /* We keep the intermediates in registers to allow CSE to combine
 996      consecutive bitfield assignments.  */
 997
 998   temp = force_reg (mode, op0);
 999
1000   if (! all_one)
1001     {
1002       temp = expand_binop (mode, and_optab, temp,
1003                            mask_rtx (mode, bitpos, bitsize, 1),
1004                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1005       temp = force_reg (mode, temp);
1006     }
1007
1008   /* Now logical-or VALUE into OP0, unless it is zero.  */
1009
1010   if (! all_zero)
1011     {
1012       temp = expand_binop (mode, ior_optab, temp, value,
1013                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1014       temp = force_reg (mode, temp);
1015     }
1016
1017   if (op0 != temp)
1018     {
1019       op0 = copy_rtx (op0);
1020       emit_move_insn (op0, temp);
1021     }
1022 }
1023 \f
1024 /* Store a bit field that is split across multiple accessible memory objects.
1025
1026    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1027    BITSIZE is the field width; BITPOS the position of its first bit
1028    (within the word).
1029    VALUE is the value to store.
1030
1031    This does not yet handle fields wider than BITS_PER_WORD.  */
1032
1033 static void
1034 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1035                        unsigned HOST_WIDE_INT bitpos, rtx value)
1036 {
1037   unsigned int unit;
1038   unsigned int bitsdone = 0;
1039
1040   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1041      much at a time.  */
1042   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1043     unit = BITS_PER_WORD;
1044   else
1045     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1046
1047   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1048      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1049      that VALUE might be a floating-point constant.  */
1050   if (CONSTANT_P (value) && !CONST_INT_P (value))
1051     {
1052       rtx word = gen_lowpart_common (word_mode, value);
1053
1054       if (word && (value != word))
1055         value = word;
1056       else
1057         value = gen_lowpart_common (word_mode,
1058                                     force_reg (GET_MODE (value) != VOIDmode
1059                                                ? GET_MODE (value)
1060                                                : word_mode, value));
1061     }
1062
1063   while (bitsdone < bitsize)
1064     {
1065       unsigned HOST_WIDE_INT thissize;
1066       rtx part, word;
1067       unsigned HOST_WIDE_INT thispos;
1068       unsigned HOST_WIDE_INT offset;
1069
1070       offset = (bitpos + bitsdone) / unit;
1071       thispos = (bitpos + bitsdone) % unit;
1072
1073       /* THISSIZE must not overrun a word boundary.  Otherwise,
1074          store_fixed_bit_field will call us again, and we will mutually
1075          recurse forever.  */
1076       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1077       thissize = MIN (thissize, unit - thispos);
1078
1079       if (BYTES_BIG_ENDIAN)
1080         {
1081           int total_bits;
1082
1083           /* We must do an endian conversion exactly the same way as it is
1084              done in extract_bit_field, so that the two calls to
1085              extract_fixed_bit_field will have comparable arguments.  */
1086           if (!MEM_P (value) || GET_MODE (value) == BLKmode)
1087             total_bits = BITS_PER_WORD;
1088           else
1089             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1090
1091           /* Fetch successively less significant portions.  */
1092           if (CONST_INT_P (value))
1093             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1094                              >> (bitsize - bitsdone - thissize))
1095                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1096           else
1097             /* The args are chosen so that the last part includes the
1098                lsb.  Give extract_bit_field the value it needs (with
1099                endianness compensation) to fetch the piece we want.  */
1100             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1101                                             total_bits - bitsize + bitsdone,
1102                                             NULL_RTX, 1);
1103         }
1104       else
1105         {
1106           /* Fetch successively more significant portions.  */
1107           if (CONST_INT_P (value))
1108             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1109                              >> bitsdone)
1110                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1111           else
1112             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1113                                             bitsdone, NULL_RTX, 1);
1114         }
1115
1116       /* If OP0 is a register, then handle OFFSET here.
1117
1118          When handling multiword bitfields, extract_bit_field may pass
1119          down a word_mode SUBREG of a larger REG for a bitfield that actually
1120          crosses a word boundary.  Thus, for a SUBREG, we must find
1121          the current word starting from the base register.  */
1122       if (GET_CODE (op0) == SUBREG)
1123         {
1124           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1125           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1126                                         GET_MODE (SUBREG_REG (op0)));
1127           offset = 0;
1128         }
1129       else if (REG_P (op0))
1130         {
1131           word = operand_subword_force (op0, offset, GET_MODE (op0));
1132           offset = 0;
1133         }
1134       else
1135         word = op0;
1136
1137       /* OFFSET is in UNITs, and UNIT is in bits.
1138          store_fixed_bit_field wants offset in bytes.  */
1139       store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1140                              thispos, part);
1141       bitsdone += thissize;
1142     }
1143 }
1144 \f
1145 /* A subroutine of extract_bit_field_1 that converts return value X
1146    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1147    to extract_bit_field.  */
1148
1149 static rtx
1150 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1151                              enum machine_mode tmode, bool unsignedp)
1152 {
1153   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1154     return x;
1155
1156   /* If the x mode is not a scalar integral, first convert to the
1157      integer mode of that size and then access it as a floating-point
1158      value via a SUBREG.  */
1159   if (!SCALAR_INT_MODE_P (tmode))
1160     {
1161       enum machine_mode smode;
1162
1163       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1164       x = convert_to_mode (smode, x, unsignedp);
1165       x = force_reg (smode, x);
1166       return gen_lowpart (tmode, x);
1167     }
1168
1169   return convert_to_mode (tmode, x, unsignedp);
1170 }
1171
1172 /* A subroutine of extract_bit_field, with the same arguments.
1173    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1174    if we can find no other means of implementing the operation.
1175    if FALLBACK_P is false, return NULL instead.  */
1176
1177 static rtx
1178 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1179                      unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1180                      enum machine_mode mode, enum machine_mode tmode,
1181                      bool fallback_p)
1182 {
1183   unsigned int unit
1184     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1185   unsigned HOST_WIDE_INT offset, bitpos;
1186   rtx op0 = str_rtx;
1187   enum machine_mode int_mode;
1188   enum machine_mode ext_mode;
1189   enum machine_mode mode1;
1190   enum insn_code icode;
1191   int byte_offset;
1192
1193   if (tmode == VOIDmode)
1194     tmode = mode;
1195
1196   while (GET_CODE (op0) == SUBREG)
1197     {
1198       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1199       op0 = SUBREG_REG (op0);
1200     }
1201
1202   /* If we have an out-of-bounds access to a register, just return an
1203      uninitialized register of the required mode.  This can occur if the
1204      source code contains an out-of-bounds access to a small array.  */
1205   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1206     return gen_reg_rtx (tmode);
1207
1208   if (REG_P (op0)
1209       && mode == GET_MODE (op0)
1210       && bitnum == 0
1211       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1212     {
1213       /* We're trying to extract a full register from itself.  */
1214       return op0;
1215     }
1216
1217   /* See if we can get a better vector mode before extracting.  */
1218   if (VECTOR_MODE_P (GET_MODE (op0))
1219       && !MEM_P (op0)
1220       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1221     {
1222       enum machine_mode new_mode;
1223       int nunits = GET_MODE_NUNITS (GET_MODE (op0));
1224
1225       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1226         new_mode = MIN_MODE_VECTOR_FLOAT;
1227       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1228         new_mode = MIN_MODE_VECTOR_FRACT;
1229       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1230         new_mode = MIN_MODE_VECTOR_UFRACT;
1231       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1232         new_mode = MIN_MODE_VECTOR_ACCUM;
1233       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1234         new_mode = MIN_MODE_VECTOR_UACCUM;
1235       else
1236         new_mode = MIN_MODE_VECTOR_INT;
1237
1238       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1239         if (GET_MODE_NUNITS (new_mode) == nunits
1240             && GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1241             && targetm.vector_mode_supported_p (new_mode))
1242           break;
1243       if (new_mode != VOIDmode)
1244         op0 = gen_lowpart (new_mode, op0);
1245     }
1246
1247   /* Use vec_extract patterns for extracting parts of vectors whenever
1248      available.  */
1249   if (VECTOR_MODE_P (GET_MODE (op0))
1250       && !MEM_P (op0)
1251       && (optab_handler (vec_extract_optab, GET_MODE (op0))->insn_code
1252           != CODE_FOR_nothing)
1253       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1254           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1255     {
1256       enum machine_mode outermode = GET_MODE (op0);
1257       enum machine_mode innermode = GET_MODE_INNER (outermode);
1258       int icode = (int) optab_handler (vec_extract_optab, outermode)->insn_code;
1259       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1260       rtx rtxpos = GEN_INT (pos);
1261       rtx src = op0;
1262       rtx dest = NULL, pat, seq;
1263       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
1264       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
1265       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
1266
1267       if (innermode == tmode || innermode == mode)
1268         dest = target;
1269
1270       if (!dest)
1271         dest = gen_reg_rtx (innermode);
1272
1273       start_sequence ();
1274
1275       if (! (*insn_data[icode].operand[0].predicate) (dest, mode0))
1276         dest = copy_to_mode_reg (mode0, dest);
1277
1278       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
1279         src = copy_to_mode_reg (mode1, src);
1280
1281       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1282         rtxpos = copy_to_mode_reg (mode1, rtxpos);
1283
1284       /* We could handle this, but we should always be called with a pseudo
1285          for our targets and all insns should take them as outputs.  */
1286       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
1287                   && (*insn_data[icode].operand[1].predicate) (src, mode1)
1288                   && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
1289
1290       pat = GEN_FCN (icode) (dest, src, rtxpos);
1291       seq = get_insns ();
1292       end_sequence ();
1293       if (pat)
1294         {
1295           emit_insn (seq);
1296           emit_insn (pat);
1297           if (mode0 != mode)
1298             return gen_lowpart (tmode, dest);
1299           return dest;
1300         }
1301     }
1302
1303   /* Make sure we are playing with integral modes.  Pun with subregs
1304      if we aren't.  */
1305   {
1306     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1307     if (imode != GET_MODE (op0))
1308       {
1309         if (MEM_P (op0))
1310           op0 = adjust_address (op0, imode, 0);
1311         else if (imode != BLKmode)
1312           {
1313             op0 = gen_lowpart (imode, op0);
1314
1315             /* If we got a SUBREG, force it into a register since we
1316                aren't going to be able to do another SUBREG on it.  */
1317             if (GET_CODE (op0) == SUBREG)
1318               op0 = force_reg (imode, op0);
1319           }
1320         else if (REG_P (op0))
1321           {
1322             rtx reg, subreg;
1323             imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1324                                             MODE_INT);
1325             reg = gen_reg_rtx (imode);
1326             subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1327             emit_move_insn (subreg, op0);
1328             op0 = reg;
1329             bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1330           }
1331         else
1332           {
1333             rtx mem = assign_stack_temp (GET_MODE (op0),
1334                                          GET_MODE_SIZE (GET_MODE (op0)), 0);
1335             emit_move_insn (mem, op0);
1336             op0 = adjust_address (mem, BLKmode, 0);
1337           }
1338       }
1339   }
1340
1341   /* We may be accessing data outside the field, which means
1342      we can alias adjacent data.  */
1343   if (MEM_P (op0))
1344     {
1345       op0 = shallow_copy_rtx (op0);
1346       set_mem_alias_set (op0, 0);
1347       set_mem_expr (op0, 0);
1348     }
1349
1350   /* Extraction of a full-word or multi-word value from a structure
1351      in a register or aligned memory can be done with just a SUBREG.
1352      A subword value in the least significant part of a register
1353      can also be extracted with a SUBREG.  For this, we need the
1354      byte offset of the value in op0.  */
1355
1356   bitpos = bitnum % unit;
1357   offset = bitnum / unit;
1358   byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1359
1360   /* If OP0 is a register, BITPOS must count within a word.
1361      But as we have it, it counts within whatever size OP0 now has.
1362      On a bigendian machine, these are not the same, so convert.  */
1363   if (BYTES_BIG_ENDIAN
1364       && !MEM_P (op0)
1365       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1366     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1367
1368   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1369      If that's wrong, the solution is to test for it and set TARGET to 0
1370      if needed.  */
1371
1372   /* Only scalar integer modes can be converted via subregs.  There is an
1373      additional problem for FP modes here in that they can have a precision
1374      which is different from the size.  mode_for_size uses precision, but
1375      we want a mode based on the size, so we must avoid calling it for FP
1376      modes.  */
1377   mode1  = (SCALAR_INT_MODE_P (tmode)
1378             ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1379             : mode);
1380
1381   if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1382         && bitpos % BITS_PER_WORD == 0)
1383        || (mode1 != BLKmode
1384            /* ??? The big endian test here is wrong.  This is correct
1385               if the value is in a register, and if mode_for_size is not
1386               the same mode as op0.  This causes us to get unnecessarily
1387               inefficient code from the Thumb port when -mbig-endian.  */
1388            && (BYTES_BIG_ENDIAN
1389                ? bitpos + bitsize == BITS_PER_WORD
1390                : bitpos == 0)))
1391       && ((!MEM_P (op0)
1392            && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode1),
1393                                      GET_MODE_BITSIZE (GET_MODE (op0)))
1394            && GET_MODE_SIZE (mode1) != 0
1395            && byte_offset % GET_MODE_SIZE (mode1) == 0)
1396           || (MEM_P (op0)
1397               && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1398                   || (offset * BITS_PER_UNIT % bitsize == 0
1399                       && MEM_ALIGN (op0) % bitsize == 0)))))
1400     {
1401       if (MEM_P (op0))
1402         op0 = adjust_address (op0, mode1, offset);
1403       else if (mode1 != GET_MODE (op0))
1404         {
1405           rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1406                                          byte_offset);
1407           if (sub == NULL)
1408             goto no_subreg_mode_swap;
1409           op0 = sub;
1410         }
1411       if (mode1 != mode)
1412         return convert_to_mode (tmode, op0, unsignedp);
1413       return op0;
1414     }
1415  no_subreg_mode_swap:
1416
1417   /* Handle fields bigger than a word.  */
1418
1419   if (bitsize > BITS_PER_WORD)
1420     {
1421       /* Here we transfer the words of the field
1422          in the order least significant first.
1423          This is because the most significant word is the one which may
1424          be less than full.  */
1425
1426       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1427       unsigned int i;
1428
1429       if (target == 0 || !REG_P (target))
1430         target = gen_reg_rtx (mode);
1431
1432       /* Indicate for flow that the entire target reg is being set.  */
1433       emit_clobber (target);
1434
1435       for (i = 0; i < nwords; i++)
1436         {
1437           /* If I is 0, use the low-order word in both field and target;
1438              if I is 1, use the next to lowest word; and so on.  */
1439           /* Word number in TARGET to use.  */
1440           unsigned int wordnum
1441             = (WORDS_BIG_ENDIAN
1442                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1443                : i);
1444           /* Offset from start of field in OP0.  */
1445           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1446                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1447                                                 * (int) BITS_PER_WORD))
1448                                      : (int) i * BITS_PER_WORD);
1449           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1450           rtx result_part
1451             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1452                                            bitsize - i * BITS_PER_WORD),
1453                                  bitnum + bit_offset, 1, target_part, mode,
1454                                  word_mode);
1455
1456           gcc_assert (target_part);
1457
1458           if (result_part != target_part)
1459             emit_move_insn (target_part, result_part);
1460         }
1461
1462       if (unsignedp)
1463         {
1464           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1465              need to be zero'd out.  */
1466           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1467             {
1468               unsigned int i, total_words;
1469
1470               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1471               for (i = nwords; i < total_words; i++)
1472                 emit_move_insn
1473                   (operand_subword (target,
1474                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1475                                     1, VOIDmode),
1476                    const0_rtx);
1477             }
1478           return target;
1479         }
1480
1481       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1482       target = expand_shift (LSHIFT_EXPR, mode, target,
1483                              build_int_cst (NULL_TREE,
1484                                             GET_MODE_BITSIZE (mode) - bitsize),
1485                              NULL_RTX, 0);
1486       return expand_shift (RSHIFT_EXPR, mode, target,
1487                            build_int_cst (NULL_TREE,
1488                                           GET_MODE_BITSIZE (mode) - bitsize),
1489                            NULL_RTX, 0);
1490     }
1491
1492   /* From here on we know the desired field is smaller than a word.  */
1493
1494   /* Check if there is a correspondingly-sized integer field, so we can
1495      safely extract it as one size of integer, if necessary; then
1496      truncate or extend to the size that is wanted; then use SUBREGs or
1497      convert_to_mode to get one of the modes we really wanted.  */
1498
1499   int_mode = int_mode_for_mode (tmode);
1500   if (int_mode == BLKmode)
1501     int_mode = int_mode_for_mode (mode);
1502   /* Should probably push op0 out to memory and then do a load.  */
1503   gcc_assert (int_mode != BLKmode);
1504
1505   /* OFFSET is the number of words or bytes (UNIT says which)
1506      from STR_RTX to the first word or byte containing part of the field.  */
1507   if (!MEM_P (op0))
1508     {
1509       if (offset != 0
1510           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1511         {
1512           if (!REG_P (op0))
1513             op0 = copy_to_reg (op0);
1514           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1515                                 op0, (offset * UNITS_PER_WORD));
1516         }
1517       offset = 0;
1518     }
1519
1520   /* Now OFFSET is nonzero only for memory operands.  */
1521   ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0);
1522   icode = unsignedp ? CODE_FOR_extzv : CODE_FOR_extv;
1523   if (ext_mode != MAX_MACHINE_MODE
1524       && bitsize > 0
1525       && GET_MODE_BITSIZE (ext_mode) >= bitsize
1526       /* If op0 is a register, we need it in EXT_MODE to make it
1527          acceptable to the format of ext(z)v.  */
1528       && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1529       && !((REG_P (op0) || GET_CODE (op0) == SUBREG)
1530            && (bitsize + bitpos > GET_MODE_BITSIZE (ext_mode)))
1531       && check_predicate_volatile_ok (icode, 1, op0, GET_MODE (op0)))
1532     {
1533       unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1534       rtx bitsize_rtx, bitpos_rtx;
1535       rtx last = get_last_insn ();
1536       rtx xop0 = op0;
1537       rtx xtarget = target;
1538       rtx xspec_target = target;
1539       rtx xspec_target_subreg = 0;
1540       rtx pat;
1541
1542       /* If op0 is a register, we need it in EXT_MODE to make it
1543          acceptable to the format of ext(z)v.  */
1544       if (REG_P (xop0) && GET_MODE (xop0) != ext_mode)
1545         xop0 = gen_lowpart_SUBREG (ext_mode, xop0);
1546       if (MEM_P (xop0))
1547         /* Get ref to first byte containing part of the field.  */
1548         xop0 = adjust_address (xop0, byte_mode, xoffset);
1549
1550       /* On big-endian machines, we count bits from the most significant.
1551          If the bit field insn does not, we must invert.  */
1552       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1553         xbitpos = unit - bitsize - xbitpos;
1554
1555       /* Now convert from counting within UNIT to counting in EXT_MODE.  */
1556       if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1557         xbitpos += GET_MODE_BITSIZE (ext_mode) - unit;
1558
1559       unit = GET_MODE_BITSIZE (ext_mode);
1560
1561       if (xtarget == 0)
1562         xtarget = xspec_target = gen_reg_rtx (tmode);
1563
1564       if (GET_MODE (xtarget) != ext_mode)
1565         {
1566           /* Don't use LHS paradoxical subreg if explicit truncation is needed
1567              between the mode of the extraction (word_mode) and the target
1568              mode.  Instead, create a temporary and use convert_move to set
1569              the target.  */
1570           if (REG_P (xtarget)
1571               && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (GET_MODE (xtarget)),
1572                                         GET_MODE_BITSIZE (ext_mode)))
1573             {
1574               xtarget = gen_lowpart (ext_mode, xtarget);
1575               if (GET_MODE_SIZE (ext_mode)
1576                   > GET_MODE_SIZE (GET_MODE (xspec_target)))
1577                 xspec_target_subreg = xtarget;
1578             }
1579           else
1580             xtarget = gen_reg_rtx (ext_mode);
1581         }
1582
1583       /* If this machine's ext(z)v insists on a register target,
1584          make sure we have one.  */
1585       if (!insn_data[(int) icode].operand[0].predicate (xtarget, ext_mode))
1586         xtarget = gen_reg_rtx (ext_mode);
1587
1588       bitsize_rtx = GEN_INT (bitsize);
1589       bitpos_rtx = GEN_INT (xbitpos);
1590
1591       pat = (unsignedp
1592              ? gen_extzv (xtarget, xop0, bitsize_rtx, bitpos_rtx)
1593              : gen_extv (xtarget, xop0, bitsize_rtx, bitpos_rtx));
1594       if (pat)
1595         {
1596           emit_insn (pat);
1597           if (xtarget == xspec_target)
1598             return xtarget;
1599           if (xtarget == xspec_target_subreg)
1600             return xspec_target;
1601           return convert_extracted_bit_field (xtarget, mode, tmode, unsignedp);
1602         }
1603       delete_insns_since (last);
1604     }
1605
1606   /* If OP0 is a memory, try copying it to a register and seeing if a
1607      cheap register alternative is available.  */
1608   if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0))
1609     {
1610       enum machine_mode bestmode;
1611
1612       /* Get the mode to use for inserting into this field.  If
1613          OP0 is BLKmode, get the smallest mode consistent with the
1614          alignment. If OP0 is a non-BLKmode object that is no
1615          wider than EXT_MODE, use its mode. Otherwise, use the
1616          smallest mode containing the field.  */
1617
1618       if (GET_MODE (op0) == BLKmode
1619           || (ext_mode != MAX_MACHINE_MODE
1620               && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode)))
1621         bestmode = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0),
1622                                   (ext_mode == MAX_MACHINE_MODE
1623                                    ? VOIDmode : ext_mode),
1624                                   MEM_VOLATILE_P (op0));
1625       else
1626         bestmode = GET_MODE (op0);
1627
1628       if (bestmode != VOIDmode
1629           && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
1630                && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
1631         {
1632           unsigned HOST_WIDE_INT xoffset, xbitpos;
1633
1634           /* Compute the offset as a multiple of this unit,
1635              counting in bytes.  */
1636           unit = GET_MODE_BITSIZE (bestmode);
1637           xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1638           xbitpos = bitnum % unit;
1639
1640           /* Make sure the register is big enough for the whole field.  */
1641           if (xoffset * BITS_PER_UNIT + unit
1642               >= offset * BITS_PER_UNIT + bitsize)
1643             {
1644               rtx last, result, xop0;
1645
1646               last = get_last_insn ();
1647
1648               /* Fetch it to a register in that size.  */
1649               xop0 = adjust_address (op0, bestmode, xoffset);
1650               xop0 = force_reg (bestmode, xop0);
1651               result = extract_bit_field_1 (xop0, bitsize, xbitpos,
1652                                             unsignedp, target,
1653                                             mode, tmode, false);
1654               if (result)
1655                 return result;
1656
1657               delete_insns_since (last);
1658             }
1659         }
1660     }
1661
1662   if (!fallback_p)
1663     return NULL;
1664
1665   target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1666                                     bitpos, target, unsignedp);
1667   return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1668 }
1669
1670 /* Generate code to extract a byte-field from STR_RTX
1671    containing BITSIZE bits, starting at BITNUM,
1672    and put it in TARGET if possible (if TARGET is nonzero).
1673    Regardless of TARGET, we return the rtx for where the value is placed.
1674
1675    STR_RTX is the structure containing the byte (a REG or MEM).
1676    UNSIGNEDP is nonzero if this is an unsigned bit field.
1677    MODE is the natural mode of the field value once extracted.
1678    TMODE is the mode the caller would like the value to have;
1679    but the value may be returned with type MODE instead.
1680
1681    If a TARGET is specified and we can store in it at no extra cost,
1682    we do so, and return TARGET.
1683    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1684    if they are equally easy.  */
1685
1686 rtx
1687 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1688                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1689                    enum machine_mode mode, enum machine_mode tmode)
1690 {
1691   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1692                               target, mode, tmode, true);
1693 }
1694 \f
1695 /* Extract a bit field using shifts and boolean operations
1696    Returns an rtx to represent the value.
1697    OP0 addresses a register (word) or memory (byte).
1698    BITPOS says which bit within the word or byte the bit field starts in.
1699    OFFSET says how many bytes farther the bit field starts;
1700     it is 0 if OP0 is a register.
1701    BITSIZE says how many bits long the bit field is.
1702     (If OP0 is a register, it may be narrower than a full word,
1703      but BITPOS still counts within a full word,
1704      which is significant on bigendian machines.)
1705
1706    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1707    If TARGET is nonzero, attempts to store the value there
1708    and return TARGET, but this is not guaranteed.
1709    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1710
1711 static rtx
1712 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1713                          unsigned HOST_WIDE_INT offset,
1714                          unsigned HOST_WIDE_INT bitsize,
1715                          unsigned HOST_WIDE_INT bitpos, rtx target,
1716                          int unsignedp)
1717 {
1718   unsigned int total_bits = BITS_PER_WORD;
1719   enum machine_mode mode;
1720
1721   if (GET_CODE (op0) == SUBREG || REG_P (op0))
1722     {
1723       /* Special treatment for a bit field split across two registers.  */
1724       if (bitsize + bitpos > BITS_PER_WORD)
1725         return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1726     }
1727   else
1728     {
1729       /* Get the proper mode to use for this field.  We want a mode that
1730          includes the entire field.  If such a mode would be larger than
1731          a word, we won't be doing the extraction the normal way.  */
1732
1733       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1734                             MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1735
1736       if (mode == VOIDmode)
1737         /* The only way this should occur is if the field spans word
1738            boundaries.  */
1739         return extract_split_bit_field (op0, bitsize,
1740                                         bitpos + offset * BITS_PER_UNIT,
1741                                         unsignedp);
1742
1743       total_bits = GET_MODE_BITSIZE (mode);
1744
1745       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1746          be in the range 0 to total_bits-1, and put any excess bytes in
1747          OFFSET.  */
1748       if (bitpos >= total_bits)
1749         {
1750           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1751           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1752                      * BITS_PER_UNIT);
1753         }
1754
1755       /* Get ref to an aligned byte, halfword, or word containing the field.
1756          Adjust BITPOS to be position within a word,
1757          and OFFSET to be the offset of that word.
1758          Then alter OP0 to refer to that word.  */
1759       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1760       offset -= (offset % (total_bits / BITS_PER_UNIT));
1761       op0 = adjust_address (op0, mode, offset);
1762     }
1763
1764   mode = GET_MODE (op0);
1765
1766   if (BYTES_BIG_ENDIAN)
1767     /* BITPOS is the distance between our msb and that of OP0.
1768        Convert it to the distance from the lsb.  */
1769     bitpos = total_bits - bitsize - bitpos;
1770
1771   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1772      We have reduced the big-endian case to the little-endian case.  */
1773
1774   if (unsignedp)
1775     {
1776       if (bitpos)
1777         {
1778           /* If the field does not already start at the lsb,
1779              shift it so it does.  */
1780           tree amount = build_int_cst (NULL_TREE, bitpos);
1781           /* Maybe propagate the target for the shift.  */
1782           /* But not if we will return it--could confuse integrate.c.  */
1783           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1784           if (tmode != mode) subtarget = 0;
1785           op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1786         }
1787       /* Convert the value to the desired mode.  */
1788       if (mode != tmode)
1789         op0 = convert_to_mode (tmode, op0, 1);
1790
1791       /* Unless the msb of the field used to be the msb when we shifted,
1792          mask out the upper bits.  */
1793
1794       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1795         return expand_binop (GET_MODE (op0), and_optab, op0,
1796                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1797                              target, 1, OPTAB_LIB_WIDEN);
1798       return op0;
1799     }
1800
1801   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1802      then arithmetic-shift its lsb to the lsb of the word.  */
1803   op0 = force_reg (mode, op0);
1804   if (mode != tmode)
1805     target = 0;
1806
1807   /* Find the narrowest integer mode that contains the field.  */
1808
1809   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1810        mode = GET_MODE_WIDER_MODE (mode))
1811     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1812       {
1813         op0 = convert_to_mode (mode, op0, 0);
1814         break;
1815       }
1816
1817   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1818     {
1819       tree amount
1820         = build_int_cst (NULL_TREE,
1821                          GET_MODE_BITSIZE (mode) - (bitsize + bitpos));
1822       /* Maybe propagate the target for the shift.  */
1823       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1824       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1825     }
1826
1827   return expand_shift (RSHIFT_EXPR, mode, op0,
1828                        build_int_cst (NULL_TREE,
1829                                       GET_MODE_BITSIZE (mode) - bitsize),
1830                        target, 0);
1831 }
1832 \f
1833 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1834    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1835    complement of that if COMPLEMENT.  The mask is truncated if
1836    necessary to the width of mode MODE.  The mask is zero-extended if
1837    BITSIZE+BITPOS is too small for MODE.  */
1838
1839 static rtx
1840 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1841 {
1842   double_int mask;
1843
1844   mask = double_int_mask (bitsize);
1845   mask = double_int_lshift (mask, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1846
1847   if (complement)
1848     mask = double_int_not (mask);
1849
1850   return immed_double_const (mask.low, mask.high, mode);
1851 }
1852
1853 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1854    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1855
1856 static rtx
1857 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1858 {
1859   double_int val;
1860
1861   val = double_int_zext (uhwi_to_double_int (INTVAL (value)), bitsize);
1862   val = double_int_lshift (val, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1863
1864   return immed_double_const (val.low, val.high, mode);
1865 }
1866 \f
1867 /* Extract a bit field that is split across two words
1868    and return an RTX for the result.
1869
1870    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1871    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1872    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1873
1874 static rtx
1875 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1876                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1877 {
1878   unsigned int unit;
1879   unsigned int bitsdone = 0;
1880   rtx result = NULL_RTX;
1881   int first = 1;
1882
1883   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1884      much at a time.  */
1885   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1886     unit = BITS_PER_WORD;
1887   else
1888     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1889
1890   while (bitsdone < bitsize)
1891     {
1892       unsigned HOST_WIDE_INT thissize;
1893       rtx part, word;
1894       unsigned HOST_WIDE_INT thispos;
1895       unsigned HOST_WIDE_INT offset;
1896
1897       offset = (bitpos + bitsdone) / unit;
1898       thispos = (bitpos + bitsdone) % unit;
1899
1900       /* THISSIZE must not overrun a word boundary.  Otherwise,
1901          extract_fixed_bit_field will call us again, and we will mutually
1902          recurse forever.  */
1903       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1904       thissize = MIN (thissize, unit - thispos);
1905
1906       /* If OP0 is a register, then handle OFFSET here.
1907
1908          When handling multiword bitfields, extract_bit_field may pass
1909          down a word_mode SUBREG of a larger REG for a bitfield that actually
1910          crosses a word boundary.  Thus, for a SUBREG, we must find
1911          the current word starting from the base register.  */
1912       if (GET_CODE (op0) == SUBREG)
1913         {
1914           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1915           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1916                                         GET_MODE (SUBREG_REG (op0)));
1917           offset = 0;
1918         }
1919       else if (REG_P (op0))
1920         {
1921           word = operand_subword_force (op0, offset, GET_MODE (op0));
1922           offset = 0;
1923         }
1924       else
1925         word = op0;
1926
1927       /* Extract the parts in bit-counting order,
1928          whose meaning is determined by BYTES_PER_UNIT.
1929          OFFSET is in UNITs, and UNIT is in bits.
1930          extract_fixed_bit_field wants offset in bytes.  */
1931       part = extract_fixed_bit_field (word_mode, word,
1932                                       offset * unit / BITS_PER_UNIT,
1933                                       thissize, thispos, 0, 1);
1934       bitsdone += thissize;
1935
1936       /* Shift this part into place for the result.  */
1937       if (BYTES_BIG_ENDIAN)
1938         {
1939           if (bitsize != bitsdone)
1940             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1941                                  build_int_cst (NULL_TREE, bitsize - bitsdone),
1942                                  0, 1);
1943         }
1944       else
1945         {
1946           if (bitsdone != thissize)
1947             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1948                                  build_int_cst (NULL_TREE,
1949                                                 bitsdone - thissize), 0, 1);
1950         }
1951
1952       if (first)
1953         result = part;
1954       else
1955         /* Combine the parts with bitwise or.  This works
1956            because we extracted each part as an unsigned bit field.  */
1957         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1958                                OPTAB_LIB_WIDEN);
1959
1960       first = 0;
1961     }
1962
1963   /* Unsigned bit field: we are done.  */
1964   if (unsignedp)
1965     return result;
1966   /* Signed bit field: sign-extend with two arithmetic shifts.  */
1967   result = expand_shift (LSHIFT_EXPR, word_mode, result,
1968                          build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
1969                          NULL_RTX, 0);
1970   return expand_shift (RSHIFT_EXPR, word_mode, result,
1971                        build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
1972                        NULL_RTX, 0);
1973 }
1974 \f
1975 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
1976    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
1977    MODE, fill the upper bits with zeros.  Fail if the layout of either
1978    mode is unknown (as for CC modes) or if the extraction would involve
1979    unprofitable mode punning.  Return the value on success, otherwise
1980    return null.
1981
1982    This is different from gen_lowpart* in these respects:
1983
1984      - the returned value must always be considered an rvalue
1985
1986      - when MODE is wider than SRC_MODE, the extraction involves
1987        a zero extension
1988
1989      - when MODE is smaller than SRC_MODE, the extraction involves
1990        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
1991
1992    In other words, this routine performs a computation, whereas the
1993    gen_lowpart* routines are conceptually lvalue or rvalue subreg
1994    operations.  */
1995
1996 rtx
1997 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
1998 {
1999   enum machine_mode int_mode, src_int_mode;
2000
2001   if (mode == src_mode)
2002     return src;
2003
2004   if (CONSTANT_P (src))
2005     {
2006       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2007          fails, it will happily create (subreg (symbol_ref)) or similar
2008          invalid SUBREGs.  */
2009       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2010       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2011       if (ret)
2012         return ret;
2013
2014       if (GET_MODE (src) == VOIDmode
2015           || !validate_subreg (mode, src_mode, src, byte))
2016         return NULL_RTX;
2017
2018       src = force_reg (GET_MODE (src), src);
2019       return gen_rtx_SUBREG (mode, src, byte);
2020     }
2021
2022   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2023     return NULL_RTX;
2024
2025   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2026       && MODES_TIEABLE_P (mode, src_mode))
2027     {
2028       rtx x = gen_lowpart_common (mode, src);
2029       if (x)
2030         return x;
2031     }
2032
2033   src_int_mode = int_mode_for_mode (src_mode);
2034   int_mode = int_mode_for_mode (mode);
2035   if (src_int_mode == BLKmode || int_mode == BLKmode)
2036     return NULL_RTX;
2037
2038   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2039     return NULL_RTX;
2040   if (!MODES_TIEABLE_P (int_mode, mode))
2041     return NULL_RTX;
2042
2043   src = gen_lowpart (src_int_mode, src);
2044   src = convert_modes (int_mode, src_int_mode, src, true);
2045   src = gen_lowpart (mode, src);
2046   return src;
2047 }
2048 \f
2049 /* Add INC into TARGET.  */
2050
2051 void
2052 expand_inc (rtx target, rtx inc)
2053 {
2054   rtx value = expand_binop (GET_MODE (target), add_optab,
2055                             target, inc,
2056                             target, 0, OPTAB_LIB_WIDEN);
2057   if (value != target)
2058     emit_move_insn (target, value);
2059 }
2060
2061 /* Subtract DEC from TARGET.  */
2062
2063 void
2064 expand_dec (rtx target, rtx dec)
2065 {
2066   rtx value = expand_binop (GET_MODE (target), sub_optab,
2067                             target, dec,
2068                             target, 0, OPTAB_LIB_WIDEN);
2069   if (value != target)
2070     emit_move_insn (target, value);
2071 }
2072 \f
2073 /* Output a shift instruction for expression code CODE,
2074    with SHIFTED being the rtx for the value to shift,
2075    and AMOUNT the tree for the amount to shift by.
2076    Store the result in the rtx TARGET, if that is convenient.
2077    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2078    Return the rtx for where the value is.  */
2079
2080 rtx
2081 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2082               tree amount, rtx target, int unsignedp)
2083 {
2084   rtx op1, temp = 0;
2085   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2086   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2087   optab lshift_optab = ashl_optab;
2088   optab rshift_arith_optab = ashr_optab;
2089   optab rshift_uns_optab = lshr_optab;
2090   optab lrotate_optab = rotl_optab;
2091   optab rrotate_optab = rotr_optab;
2092   enum machine_mode op1_mode;
2093   int attempt;
2094   bool speed = optimize_insn_for_speed_p ();
2095
2096   op1 = expand_normal (amount);
2097   op1_mode = GET_MODE (op1);
2098
2099   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2100      shift amount is a vector, use the vector/vector shift patterns.  */
2101   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2102     {
2103       lshift_optab = vashl_optab;
2104       rshift_arith_optab = vashr_optab;
2105       rshift_uns_optab = vlshr_optab;
2106       lrotate_optab = vrotl_optab;
2107       rrotate_optab = vrotr_optab;
2108     }
2109
2110   /* Previously detected shift-counts computed by NEGATE_EXPR
2111      and shifted in the other direction; but that does not work
2112      on all machines.  */
2113
2114   if (SHIFT_COUNT_TRUNCATED)
2115     {
2116       if (CONST_INT_P (op1)
2117           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2118               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2119         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2120                        % GET_MODE_BITSIZE (mode));
2121       else if (GET_CODE (op1) == SUBREG
2122                && subreg_lowpart_p (op1)
2123                && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (op1))))
2124         op1 = SUBREG_REG (op1);
2125     }
2126
2127   if (op1 == const0_rtx)
2128     return shifted;
2129
2130   /* Check whether its cheaper to implement a left shift by a constant
2131      bit count by a sequence of additions.  */
2132   if (code == LSHIFT_EXPR
2133       && CONST_INT_P (op1)
2134       && INTVAL (op1) > 0
2135       && INTVAL (op1) < GET_MODE_BITSIZE (mode)
2136       && INTVAL (op1) < MAX_BITS_PER_WORD
2137       && shift_cost[speed][mode][INTVAL (op1)] > INTVAL (op1) * add_cost[speed][mode]
2138       && shift_cost[speed][mode][INTVAL (op1)] != MAX_COST)
2139     {
2140       int i;
2141       for (i = 0; i < INTVAL (op1); i++)
2142         {
2143           temp = force_reg (mode, shifted);
2144           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2145                                   unsignedp, OPTAB_LIB_WIDEN);
2146         }
2147       return shifted;
2148     }
2149
2150   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2151     {
2152       enum optab_methods methods;
2153
2154       if (attempt == 0)
2155         methods = OPTAB_DIRECT;
2156       else if (attempt == 1)
2157         methods = OPTAB_WIDEN;
2158       else
2159         methods = OPTAB_LIB_WIDEN;
2160
2161       if (rotate)
2162         {
2163           /* Widening does not work for rotation.  */
2164           if (methods == OPTAB_WIDEN)
2165             continue;
2166           else if (methods == OPTAB_LIB_WIDEN)
2167             {
2168               /* If we have been unable to open-code this by a rotation,
2169                  do it as the IOR of two shifts.  I.e., to rotate A
2170                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2171                  where C is the bitsize of A.
2172
2173                  It is theoretically possible that the target machine might
2174                  not be able to perform either shift and hence we would
2175                  be making two libcalls rather than just the one for the
2176                  shift (similarly if IOR could not be done).  We will allow
2177                  this extremely unlikely lossage to avoid complicating the
2178                  code below.  */
2179
2180               rtx subtarget = target == shifted ? 0 : target;
2181               tree new_amount, other_amount;
2182               rtx temp1;
2183               tree type = TREE_TYPE (amount);
2184               if (GET_MODE (op1) != TYPE_MODE (type)
2185                   && GET_MODE (op1) != VOIDmode)
2186                 op1 = convert_to_mode (TYPE_MODE (type), op1, 1);
2187               new_amount = make_tree (type, op1);
2188               other_amount
2189                 = fold_build2 (MINUS_EXPR, type,
2190                                build_int_cst (type, GET_MODE_BITSIZE (mode)),
2191                                new_amount);
2192
2193               shifted = force_reg (mode, shifted);
2194
2195               temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2196                                    mode, shifted, new_amount, 0, 1);
2197               temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2198                                     mode, shifted, other_amount, subtarget, 1);
2199               return expand_binop (mode, ior_optab, temp, temp1, target,
2200                                    unsignedp, methods);
2201             }
2202
2203           temp = expand_binop (mode,
2204                                left ? lrotate_optab : rrotate_optab,
2205                                shifted, op1, target, unsignedp, methods);
2206         }
2207       else if (unsignedp)
2208         temp = expand_binop (mode,
2209                              left ? lshift_optab : rshift_uns_optab,
2210                              shifted, op1, target, unsignedp, methods);
2211
2212       /* Do arithmetic shifts.
2213          Also, if we are going to widen the operand, we can just as well
2214          use an arithmetic right-shift instead of a logical one.  */
2215       if (temp == 0 && ! rotate
2216           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2217         {
2218           enum optab_methods methods1 = methods;
2219
2220           /* If trying to widen a log shift to an arithmetic shift,
2221              don't accept an arithmetic shift of the same size.  */
2222           if (unsignedp)
2223             methods1 = OPTAB_MUST_WIDEN;
2224
2225           /* Arithmetic shift */
2226
2227           temp = expand_binop (mode,
2228                                left ? lshift_optab : rshift_arith_optab,
2229                                shifted, op1, target, unsignedp, methods1);
2230         }
2231
2232       /* We used to try extzv here for logical right shifts, but that was
2233          only useful for one machine, the VAX, and caused poor code
2234          generation there for lshrdi3, so the code was deleted and a
2235          define_expand for lshrsi3 was added to vax.md.  */
2236     }
2237
2238   gcc_assert (temp);
2239   return temp;
2240 }
2241 \f
2242 enum alg_code {
2243   alg_unknown,
2244   alg_zero,
2245   alg_m, alg_shift,
2246   alg_add_t_m2,
2247   alg_sub_t_m2,
2248   alg_add_factor,
2249   alg_sub_factor,
2250   alg_add_t2_m,
2251   alg_sub_t2_m,
2252   alg_impossible
2253 };
2254
2255 /* This structure holds the "cost" of a multiply sequence.  The
2256    "cost" field holds the total rtx_cost of every operator in the
2257    synthetic multiplication sequence, hence cost(a op b) is defined
2258    as rtx_cost(op) + cost(a) + cost(b), where cost(leaf) is zero.
2259    The "latency" field holds the minimum possible latency of the
2260    synthetic multiply, on a hypothetical infinitely parallel CPU.
2261    This is the critical path, or the maximum height, of the expression
2262    tree which is the sum of rtx_costs on the most expensive path from
2263    any leaf to the root.  Hence latency(a op b) is defined as zero for
2264    leaves and rtx_cost(op) + max(latency(a), latency(b)) otherwise.  */
2265
2266 struct mult_cost {
2267   short cost;     /* Total rtx_cost of the multiplication sequence.  */
2268   short latency;  /* The latency of the multiplication sequence.  */
2269 };
2270
2271 /* This macro is used to compare a pointer to a mult_cost against an
2272    single integer "rtx_cost" value.  This is equivalent to the macro
2273    CHEAPER_MULT_COST(X,Z) where Z = {Y,Y}.  */
2274 #define MULT_COST_LESS(X,Y) ((X)->cost < (Y)    \
2275                              || ((X)->cost == (Y) && (X)->latency < (Y)))
2276
2277 /* This macro is used to compare two pointers to mult_costs against
2278    each other.  The macro returns true if X is cheaper than Y.
2279    Currently, the cheaper of two mult_costs is the one with the
2280    lower "cost".  If "cost"s are tied, the lower latency is cheaper.  */
2281 #define CHEAPER_MULT_COST(X,Y)  ((X)->cost < (Y)->cost          \
2282                                  || ((X)->cost == (Y)->cost     \
2283                                      && (X)->latency < (Y)->latency))
2284
2285 /* This structure records a sequence of operations.
2286    `ops' is the number of operations recorded.
2287    `cost' is their total cost.
2288    The operations are stored in `op' and the corresponding
2289    logarithms of the integer coefficients in `log'.
2290
2291    These are the operations:
2292    alg_zero             total := 0;
2293    alg_m                total := multiplicand;
2294    alg_shift            total := total * coeff
2295    alg_add_t_m2         total := total + multiplicand * coeff;
2296    alg_sub_t_m2         total := total - multiplicand * coeff;
2297    alg_add_factor       total := total * coeff + total;
2298    alg_sub_factor       total := total * coeff - total;
2299    alg_add_t2_m         total := total * coeff + multiplicand;
2300    alg_sub_t2_m         total := total * coeff - multiplicand;
2301
2302    The first operand must be either alg_zero or alg_m.  */
2303
2304 struct algorithm
2305 {
2306   struct mult_cost cost;
2307   short ops;
2308   /* The size of the OP and LOG fields are not directly related to the
2309      word size, but the worst-case algorithms will be if we have few
2310      consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2311      In that case we will generate shift-by-2, add, shift-by-2, add,...,
2312      in total wordsize operations.  */
2313   enum alg_code op[MAX_BITS_PER_WORD];
2314   char log[MAX_BITS_PER_WORD];
2315 };
2316
2317 /* The entry for our multiplication cache/hash table.  */
2318 struct alg_hash_entry {
2319   /* The number we are multiplying by.  */
2320   unsigned HOST_WIDE_INT t;
2321
2322   /* The mode in which we are multiplying something by T.  */
2323   enum machine_mode mode;
2324
2325   /* The best multiplication algorithm for t.  */
2326   enum alg_code alg;
2327
2328   /* The cost of multiplication if ALG_CODE is not alg_impossible.
2329      Otherwise, the cost within which multiplication by T is
2330      impossible.  */
2331   struct mult_cost cost;
2332
2333   /* OPtimized for speed? */
2334   bool speed;
2335 };
2336
2337 /* The number of cache/hash entries.  */
2338 #if HOST_BITS_PER_WIDE_INT == 64
2339 #define NUM_ALG_HASH_ENTRIES 1031
2340 #else
2341 #define NUM_ALG_HASH_ENTRIES 307
2342 #endif
2343
2344 /* Each entry of ALG_HASH caches alg_code for some integer.  This is
2345    actually a hash table.  If we have a collision, that the older
2346    entry is kicked out.  */
2347 static struct alg_hash_entry alg_hash[NUM_ALG_HASH_ENTRIES];
2348
2349 /* Indicates the type of fixup needed after a constant multiplication.
2350    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2351    the result should be negated, and ADD_VARIANT means that the
2352    multiplicand should be added to the result.  */
2353 enum mult_variant {basic_variant, negate_variant, add_variant};
2354
2355 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2356                         const struct mult_cost *, enum machine_mode mode);
2357 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2358                                  struct algorithm *, enum mult_variant *, int);
2359 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2360                               const struct algorithm *, enum mult_variant);
2361 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2362                                                  int, rtx *, int *, int *);
2363 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2364 static rtx extract_high_half (enum machine_mode, rtx);
2365 static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2366 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2367                                        int, int);
2368 /* Compute and return the best algorithm for multiplying by T.
2369    The algorithm must cost less than cost_limit
2370    If retval.cost >= COST_LIMIT, no algorithm was found and all
2371    other field of the returned struct are undefined.
2372    MODE is the machine mode of the multiplication.  */
2373
2374 static void
2375 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2376             const struct mult_cost *cost_limit, enum machine_mode mode)
2377 {
2378   int m;
2379   struct algorithm *alg_in, *best_alg;
2380   struct mult_cost best_cost;
2381   struct mult_cost new_limit;
2382   int op_cost, op_latency;
2383   unsigned HOST_WIDE_INT orig_t = t;
2384   unsigned HOST_WIDE_INT q;
2385   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2386   int hash_index;
2387   bool cache_hit = false;
2388   enum alg_code cache_alg = alg_zero;
2389   bool speed = optimize_insn_for_speed_p ();
2390
2391   /* Indicate that no algorithm is yet found.  If no algorithm
2392      is found, this value will be returned and indicate failure.  */
2393   alg_out->cost.cost = cost_limit->cost + 1;
2394   alg_out->cost.latency = cost_limit->latency + 1;
2395
2396   if (cost_limit->cost < 0
2397       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2398     return;
2399
2400   /* Restrict the bits of "t" to the multiplication's mode.  */
2401   t &= GET_MODE_MASK (mode);
2402
2403   /* t == 1 can be done in zero cost.  */
2404   if (t == 1)
2405     {
2406       alg_out->ops = 1;
2407       alg_out->cost.cost = 0;
2408       alg_out->cost.latency = 0;
2409       alg_out->op[0] = alg_m;
2410       return;
2411     }
2412
2413   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2414      fail now.  */
2415   if (t == 0)
2416     {
2417       if (MULT_COST_LESS (cost_limit, zero_cost[speed]))
2418         return;
2419       else
2420         {
2421           alg_out->ops = 1;
2422           alg_out->cost.cost = zero_cost[speed];
2423           alg_out->cost.latency = zero_cost[speed];
2424           alg_out->op[0] = alg_zero;
2425           return;
2426         }
2427     }
2428
2429   /* We'll be needing a couple extra algorithm structures now.  */
2430
2431   alg_in = XALLOCA (struct algorithm);
2432   best_alg = XALLOCA (struct algorithm);
2433   best_cost = *cost_limit;
2434
2435   /* Compute the hash index.  */
2436   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2437
2438   /* See if we already know what to do for T.  */
2439   if (alg_hash[hash_index].t == t
2440       && alg_hash[hash_index].mode == mode
2441       && alg_hash[hash_index].mode == mode
2442       && alg_hash[hash_index].speed == speed
2443       && alg_hash[hash_index].alg != alg_unknown)
2444     {
2445       cache_alg = alg_hash[hash_index].alg;
2446
2447       if (cache_alg == alg_impossible)
2448         {
2449           /* The cache tells us that it's impossible to synthesize
2450              multiplication by T within alg_hash[hash_index].cost.  */
2451           if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit))
2452             /* COST_LIMIT is at least as restrictive as the one
2453                recorded in the hash table, in which case we have no
2454                hope of synthesizing a multiplication.  Just
2455                return.  */
2456             return;
2457
2458           /* If we get here, COST_LIMIT is less restrictive than the
2459              one recorded in the hash table, so we may be able to
2460              synthesize a multiplication.  Proceed as if we didn't
2461              have the cache entry.  */
2462         }
2463       else
2464         {
2465           if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost))
2466             /* The cached algorithm shows that this multiplication
2467                requires more cost than COST_LIMIT.  Just return.  This
2468                way, we don't clobber this cache entry with
2469                alg_impossible but retain useful information.  */
2470             return;
2471
2472           cache_hit = true;
2473
2474           switch (cache_alg)
2475             {
2476             case alg_shift:
2477               goto do_alg_shift;
2478
2479             case alg_add_t_m2:
2480             case alg_sub_t_m2:
2481               goto do_alg_addsub_t_m2;
2482
2483             case alg_add_factor:
2484             case alg_sub_factor:
2485               goto do_alg_addsub_factor;
2486
2487             case alg_add_t2_m:
2488               goto do_alg_add_t2_m;
2489
2490             case alg_sub_t2_m:
2491               goto do_alg_sub_t2_m;
2492
2493             default:
2494               gcc_unreachable ();
2495             }
2496         }
2497     }
2498
2499   /* If we have a group of zero bits at the low-order part of T, try
2500      multiplying by the remaining bits and then doing a shift.  */
2501
2502   if ((t & 1) == 0)
2503     {
2504     do_alg_shift:
2505       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2506       if (m < maxm)
2507         {
2508           q = t >> m;
2509           /* The function expand_shift will choose between a shift and
2510              a sequence of additions, so the observed cost is given as
2511              MIN (m * add_cost[speed][mode], shift_cost[speed][mode][m]).  */
2512           op_cost = m * add_cost[speed][mode];
2513           if (shift_cost[speed][mode][m] < op_cost)
2514             op_cost = shift_cost[speed][mode][m];
2515           new_limit.cost = best_cost.cost - op_cost;
2516           new_limit.latency = best_cost.latency - op_cost;
2517           synth_mult (alg_in, q, &new_limit, mode);
2518
2519           alg_in->cost.cost += op_cost;
2520           alg_in->cost.latency += op_cost;
2521           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2522             {
2523               struct algorithm *x;
2524               best_cost = alg_in->cost;
2525               x = alg_in, alg_in = best_alg, best_alg = x;
2526               best_alg->log[best_alg->ops] = m;
2527               best_alg->op[best_alg->ops] = alg_shift;
2528             }
2529
2530           /* See if treating ORIG_T as a signed number yields a better
2531              sequence.  Try this sequence only for a negative ORIG_T
2532              as it would be useless for a non-negative ORIG_T.  */
2533           if ((HOST_WIDE_INT) orig_t < 0)
2534             {
2535               /* Shift ORIG_T as follows because a right shift of a
2536                  negative-valued signed type is implementation
2537                  defined.  */
2538               q = ~(~orig_t >> m);
2539               /* The function expand_shift will choose between a shift
2540                  and a sequence of additions, so the observed cost is
2541                  given as MIN (m * add_cost[speed][mode],
2542                  shift_cost[speed][mode][m]).  */
2543               op_cost = m * add_cost[speed][mode];
2544               if (shift_cost[speed][mode][m] < op_cost)
2545                 op_cost = shift_cost[speed][mode][m];
2546               new_limit.cost = best_cost.cost - op_cost;
2547               new_limit.latency = best_cost.latency - op_cost;
2548               synth_mult (alg_in, q, &new_limit, mode);
2549
2550               alg_in->cost.cost += op_cost;
2551               alg_in->cost.latency += op_cost;
2552               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2553                 {
2554                   struct algorithm *x;
2555                   best_cost = alg_in->cost;
2556                   x = alg_in, alg_in = best_alg, best_alg = x;
2557                   best_alg->log[best_alg->ops] = m;
2558                   best_alg->op[best_alg->ops] = alg_shift;
2559                 }
2560             }
2561         }
2562       if (cache_hit)
2563         goto done;
2564     }
2565
2566   /* If we have an odd number, add or subtract one.  */
2567   if ((t & 1) != 0)
2568     {
2569       unsigned HOST_WIDE_INT w;
2570
2571     do_alg_addsub_t_m2:
2572       for (w = 1; (w & t) != 0; w <<= 1)
2573         ;
2574       /* If T was -1, then W will be zero after the loop.  This is another
2575          case where T ends with ...111.  Handling this with (T + 1) and
2576          subtract 1 produces slightly better code and results in algorithm
2577          selection much faster than treating it like the ...0111 case
2578          below.  */
2579       if (w == 0
2580           || (w > 2
2581               /* Reject the case where t is 3.
2582                  Thus we prefer addition in that case.  */
2583               && t != 3))
2584         {
2585           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2586
2587           op_cost = add_cost[speed][mode];
2588           new_limit.cost = best_cost.cost - op_cost;
2589           new_limit.latency = best_cost.latency - op_cost;
2590           synth_mult (alg_in, t + 1, &new_limit, mode);
2591
2592           alg_in->cost.cost += op_cost;
2593           alg_in->cost.latency += op_cost;
2594           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2595             {
2596               struct algorithm *x;
2597               best_cost = alg_in->cost;
2598               x = alg_in, alg_in = best_alg, best_alg = x;
2599               best_alg->log[best_alg->ops] = 0;
2600               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2601             }
2602         }
2603       else
2604         {
2605           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2606
2607           op_cost = add_cost[speed][mode];
2608           new_limit.cost = best_cost.cost - op_cost;
2609           new_limit.latency = best_cost.latency - op_cost;
2610           synth_mult (alg_in, t - 1, &new_limit, mode);
2611
2612           alg_in->cost.cost += op_cost;
2613           alg_in->cost.latency += op_cost;
2614           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2615             {
2616               struct algorithm *x;
2617               best_cost = alg_in->cost;
2618               x = alg_in, alg_in = best_alg, best_alg = x;
2619               best_alg->log[best_alg->ops] = 0;
2620               best_alg->op[best_alg->ops] = alg_add_t_m2;
2621             }
2622         }
2623
2624       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2625          quickly with a - a * n for some appropriate constant n.  */
2626       m = exact_log2 (-orig_t + 1);
2627       if (m >= 0 && m < maxm)
2628         {
2629           op_cost = shiftsub1_cost[speed][mode][m];
2630           new_limit.cost = best_cost.cost - op_cost;
2631           new_limit.latency = best_cost.latency - op_cost;
2632           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m, &new_limit, mode);
2633
2634           alg_in->cost.cost += op_cost;
2635           alg_in->cost.latency += op_cost;
2636           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2637             {
2638               struct algorithm *x;
2639               best_cost = alg_in->cost;
2640               x = alg_in, alg_in = best_alg, best_alg = x;
2641               best_alg->log[best_alg->ops] = m;
2642               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2643             }
2644         }
2645
2646       if (cache_hit)
2647         goto done;
2648     }
2649
2650   /* Look for factors of t of the form
2651      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2652      If we find such a factor, we can multiply by t using an algorithm that
2653      multiplies by q, shift the result by m and add/subtract it to itself.
2654
2655      We search for large factors first and loop down, even if large factors
2656      are less probable than small; if we find a large factor we will find a
2657      good sequence quickly, and therefore be able to prune (by decreasing
2658      COST_LIMIT) the search.  */
2659
2660  do_alg_addsub_factor:
2661   for (m = floor_log2 (t - 1); m >= 2; m--)
2662     {
2663       unsigned HOST_WIDE_INT d;
2664
2665       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2666       if (t % d == 0 && t > d && m < maxm
2667           && (!cache_hit || cache_alg == alg_add_factor))
2668         {
2669           /* If the target has a cheap shift-and-add instruction use
2670              that in preference to a shift insn followed by an add insn.
2671              Assume that the shift-and-add is "atomic" with a latency
2672              equal to its cost, otherwise assume that on superscalar
2673              hardware the shift may be executed concurrently with the
2674              earlier steps in the algorithm.  */
2675           op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2676           if (shiftadd_cost[speed][mode][m] < op_cost)
2677             {
2678               op_cost = shiftadd_cost[speed][mode][m];
2679               op_latency = op_cost;
2680             }
2681           else
2682             op_latency = add_cost[speed][mode];
2683
2684           new_limit.cost = best_cost.cost - op_cost;
2685           new_limit.latency = best_cost.latency - op_latency;
2686           synth_mult (alg_in, t / d, &new_limit, mode);
2687
2688           alg_in->cost.cost += op_cost;
2689           alg_in->cost.latency += op_latency;
2690           if (alg_in->cost.latency < op_cost)
2691             alg_in->cost.latency = op_cost;
2692           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2693             {
2694               struct algorithm *x;
2695               best_cost = alg_in->cost;
2696               x = alg_in, alg_in = best_alg, best_alg = x;
2697               best_alg->log[best_alg->ops] = m;
2698               best_alg->op[best_alg->ops] = alg_add_factor;
2699             }
2700           /* Other factors will have been taken care of in the recursion.  */
2701           break;
2702         }
2703
2704       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2705       if (t % d == 0 && t > d && m < maxm
2706           && (!cache_hit || cache_alg == alg_sub_factor))
2707         {
2708           /* If the target has a cheap shift-and-subtract insn use
2709              that in preference to a shift insn followed by a sub insn.
2710              Assume that the shift-and-sub is "atomic" with a latency
2711              equal to it's cost, otherwise assume that on superscalar
2712              hardware the shift may be executed concurrently with the
2713              earlier steps in the algorithm.  */
2714           op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2715           if (shiftsub0_cost[speed][mode][m] < op_cost)
2716             {
2717               op_cost = shiftsub0_cost[speed][mode][m];
2718               op_latency = op_cost;
2719             }
2720           else
2721             op_latency = add_cost[speed][mode];
2722
2723           new_limit.cost = best_cost.cost - op_cost;
2724           new_limit.latency = best_cost.latency - op_latency;
2725           synth_mult (alg_in, t / d, &new_limit, mode);
2726
2727           alg_in->cost.cost += op_cost;
2728           alg_in->cost.latency += op_latency;
2729           if (alg_in->cost.latency < op_cost)
2730             alg_in->cost.latency = op_cost;
2731           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2732             {
2733               struct algorithm *x;
2734               best_cost = alg_in->cost;
2735               x = alg_in, alg_in = best_alg, best_alg = x;
2736               best_alg->log[best_alg->ops] = m;
2737               best_alg->op[best_alg->ops] = alg_sub_factor;
2738             }
2739           break;
2740         }
2741     }
2742   if (cache_hit)
2743     goto done;
2744
2745   /* Try shift-and-add (load effective address) instructions,
2746      i.e. do a*3, a*5, a*9.  */
2747   if ((t & 1) != 0)
2748     {
2749     do_alg_add_t2_m:
2750       q = t - 1;
2751       q = q & -q;
2752       m = exact_log2 (q);
2753       if (m >= 0 && m < maxm)
2754         {
2755           op_cost = shiftadd_cost[speed][mode][m];
2756           new_limit.cost = best_cost.cost - op_cost;
2757           new_limit.latency = best_cost.latency - op_cost;
2758           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2759
2760           alg_in->cost.cost += op_cost;
2761           alg_in->cost.latency += op_cost;
2762           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2763             {
2764               struct algorithm *x;
2765               best_cost = alg_in->cost;
2766               x = alg_in, alg_in = best_alg, best_alg = x;
2767               best_alg->log[best_alg->ops] = m;
2768               best_alg->op[best_alg->ops] = alg_add_t2_m;
2769             }
2770         }
2771       if (cache_hit)
2772         goto done;
2773
2774     do_alg_sub_t2_m:
2775       q = t + 1;
2776       q = q & -q;
2777       m = exact_log2 (q);
2778       if (m >= 0 && m < maxm)
2779         {
2780           op_cost = shiftsub0_cost[speed][mode][m];
2781           new_limit.cost = best_cost.cost - op_cost;
2782           new_limit.latency = best_cost.latency - op_cost;
2783           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2784
2785           alg_in->cost.cost += op_cost;
2786           alg_in->cost.latency += op_cost;
2787           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2788             {
2789               struct algorithm *x;
2790               best_cost = alg_in->cost;
2791               x = alg_in, alg_in = best_alg, best_alg = x;
2792               best_alg->log[best_alg->ops] = m;
2793               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2794             }
2795         }
2796       if (cache_hit)
2797         goto done;
2798     }
2799
2800  done:
2801   /* If best_cost has not decreased, we have not found any algorithm.  */
2802   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2803     {
2804       /* We failed to find an algorithm.  Record alg_impossible for
2805          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2806          we are asked to find an algorithm for T within the same or
2807          lower COST_LIMIT, we can immediately return to the
2808          caller.  */
2809       alg_hash[hash_index].t = t;
2810       alg_hash[hash_index].mode = mode;
2811       alg_hash[hash_index].speed = speed;
2812       alg_hash[hash_index].alg = alg_impossible;
2813       alg_hash[hash_index].cost = *cost_limit;
2814       return;
2815     }
2816
2817   /* Cache the result.  */
2818   if (!cache_hit)
2819     {
2820       alg_hash[hash_index].t = t;
2821       alg_hash[hash_index].mode = mode;
2822       alg_hash[hash_index].speed = speed;
2823       alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2824       alg_hash[hash_index].cost.cost = best_cost.cost;
2825       alg_hash[hash_index].cost.latency = best_cost.latency;
2826     }
2827
2828   /* If we are getting a too long sequence for `struct algorithm'
2829      to record, make this search fail.  */
2830   if (best_alg->ops == MAX_BITS_PER_WORD)
2831     return;
2832
2833   /* Copy the algorithm from temporary space to the space at alg_out.
2834      We avoid using structure assignment because the majority of
2835      best_alg is normally undefined, and this is a critical function.  */
2836   alg_out->ops = best_alg->ops + 1;
2837   alg_out->cost = best_cost;
2838   memcpy (alg_out->op, best_alg->op,
2839           alg_out->ops * sizeof *alg_out->op);
2840   memcpy (alg_out->log, best_alg->log,
2841           alg_out->ops * sizeof *alg_out->log);
2842 }
2843 \f
2844 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2845    Try three variations:
2846
2847        - a shift/add sequence based on VAL itself
2848        - a shift/add sequence based on -VAL, followed by a negation
2849        - a shift/add sequence based on VAL - 1, followed by an addition.
2850
2851    Return true if the cheapest of these cost less than MULT_COST,
2852    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2853
2854 static bool
2855 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2856                      struct algorithm *alg, enum mult_variant *variant,
2857                      int mult_cost)
2858 {
2859   struct algorithm alg2;
2860   struct mult_cost limit;
2861   int op_cost;
2862   bool speed = optimize_insn_for_speed_p ();
2863
2864   /* Fail quickly for impossible bounds.  */
2865   if (mult_cost < 0)
2866     return false;
2867
2868   /* Ensure that mult_cost provides a reasonable upper bound.
2869      Any constant multiplication can be performed with less
2870      than 2 * bits additions.  */
2871   op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[speed][mode];
2872   if (mult_cost > op_cost)
2873     mult_cost = op_cost;
2874
2875   *variant = basic_variant;
2876   limit.cost = mult_cost;
2877   limit.latency = mult_cost;
2878   synth_mult (alg, val, &limit, mode);
2879
2880   /* This works only if the inverted value actually fits in an
2881      `unsigned int' */
2882   if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2883     {
2884       op_cost = neg_cost[speed][mode];
2885       if (MULT_COST_LESS (&alg->cost, mult_cost))
2886         {
2887           limit.cost = alg->cost.cost - op_cost;
2888           limit.latency = alg->cost.latency - op_cost;
2889         }
2890       else
2891         {
2892           limit.cost = mult_cost - op_cost;
2893           limit.latency = mult_cost - op_cost;
2894         }
2895
2896       synth_mult (&alg2, -val, &limit, mode);
2897       alg2.cost.cost += op_cost;
2898       alg2.cost.latency += op_cost;
2899       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2900         *alg = alg2, *variant = negate_variant;
2901     }
2902
2903   /* This proves very useful for division-by-constant.  */
2904   op_cost = add_cost[speed][mode];
2905   if (MULT_COST_LESS (&alg->cost, mult_cost))
2906     {
2907       limit.cost = alg->cost.cost - op_cost;
2908       limit.latency = alg->cost.latency - op_cost;
2909     }
2910   else
2911     {
2912       limit.cost = mult_cost - op_cost;
2913       limit.latency = mult_cost - op_cost;
2914     }
2915
2916   synth_mult (&alg2, val - 1, &limit, mode);
2917   alg2.cost.cost += op_cost;
2918   alg2.cost.latency += op_cost;
2919   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2920     *alg = alg2, *variant = add_variant;
2921
2922   return MULT_COST_LESS (&alg->cost, mult_cost);
2923 }
2924
2925 /* A subroutine of expand_mult, used for constant multiplications.
2926    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2927    convenient.  Use the shift/add sequence described by ALG and apply
2928    the final fixup specified by VARIANT.  */
2929
2930 static rtx
2931 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2932                    rtx target, const struct algorithm *alg,
2933                    enum mult_variant variant)
2934 {
2935   HOST_WIDE_INT val_so_far;
2936   rtx insn, accum, tem;
2937   int opno;
2938   enum machine_mode nmode;
2939
2940   /* Avoid referencing memory over and over and invalid sharing
2941      on SUBREGs.  */
2942   op0 = force_reg (mode, op0);
2943
2944   /* ACCUM starts out either as OP0 or as a zero, depending on
2945      the first operation.  */
2946
2947   if (alg->op[0] == alg_zero)
2948     {
2949       accum = copy_to_mode_reg (mode, const0_rtx);
2950       val_so_far = 0;
2951     }
2952   else if (alg->op[0] == alg_m)
2953     {
2954       accum = copy_to_mode_reg (mode, op0);
2955       val_so_far = 1;
2956     }
2957   else
2958     gcc_unreachable ();
2959
2960   for (opno = 1; opno < alg->ops; opno++)
2961     {
2962       int log = alg->log[opno];
2963       rtx shift_subtarget = optimize ? 0 : accum;
2964       rtx add_target
2965         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2966            && !optimize)
2967           ? target : 0;
2968       rtx accum_target = optimize ? 0 : accum;
2969
2970       switch (alg->op[opno])
2971         {
2972         case alg_shift:
2973           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2974                                 build_int_cst (NULL_TREE, log),
2975                                 NULL_RTX, 0);
2976           val_so_far <<= log;
2977           break;
2978
2979         case alg_add_t_m2:
2980           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2981                               build_int_cst (NULL_TREE, log),
2982                               NULL_RTX, 0);
2983           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2984                                  add_target ? add_target : accum_target);
2985           val_so_far += (HOST_WIDE_INT) 1 << log;
2986           break;
2987
2988         case alg_sub_t_m2:
2989           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2990                               build_int_cst (NULL_TREE, log),
2991                               NULL_RTX, 0);
2992           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2993                                  add_target ? add_target : accum_target);
2994           val_so_far -= (HOST_WIDE_INT) 1 << log;
2995           break;
2996
2997         case alg_add_t2_m:
2998           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2999                                 build_int_cst (NULL_TREE, log),
3000                                 shift_subtarget,
3001                                 0);
3002           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3003                                  add_target ? add_target : accum_target);
3004           val_so_far = (val_so_far << log) + 1;
3005           break;
3006
3007         case alg_sub_t2_m:
3008           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3009                                 build_int_cst (NULL_TREE, log),
3010                                 shift_subtarget, 0);
3011           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3012                                  add_target ? add_target : accum_target);
3013           val_so_far = (val_so_far << log) - 1;
3014           break;
3015
3016         case alg_add_factor:
3017           tem = expand_shift (LSHIFT_EXPR, mode, accum,
3018                               build_int_cst (NULL_TREE, log),
3019                               NULL_RTX, 0);
3020           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3021                                  add_target ? add_target : accum_target);
3022           val_so_far += val_so_far << log;
3023           break;
3024
3025         case alg_sub_factor:
3026           tem = expand_shift (LSHIFT_EXPR, mode, accum,
3027                               build_int_cst (NULL_TREE, log),
3028                               NULL_RTX, 0);
3029           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3030                                  (add_target
3031                                   ? add_target : (optimize ? 0 : tem)));
3032           val_so_far = (val_so_far << log) - val_so_far;
3033           break;
3034
3035         default:
3036           gcc_unreachable ();
3037         }
3038
3039       /* Write a REG_EQUAL note on the last insn so that we can cse
3040          multiplication sequences.  Note that if ACCUM is a SUBREG,
3041          we've set the inner register and must properly indicate
3042          that.  */
3043
3044       tem = op0, nmode = mode;
3045       if (GET_CODE (accum) == SUBREG)
3046         {
3047           nmode = GET_MODE (SUBREG_REG (accum));
3048           tem = gen_lowpart (nmode, op0);
3049         }
3050
3051       insn = get_last_insn ();
3052       set_unique_reg_note (insn, REG_EQUAL,
3053                            gen_rtx_MULT (nmode, tem,
3054                                          GEN_INT (val_so_far)));
3055     }
3056
3057   if (variant == negate_variant)
3058     {
3059       val_so_far = -val_so_far;
3060       accum = expand_unop (mode, neg_optab, accum, target, 0);
3061     }
3062   else if (variant == add_variant)
3063     {
3064       val_so_far = val_so_far + 1;
3065       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3066     }
3067
3068   /* Compare only the bits of val and val_so_far that are significant
3069      in the result mode, to avoid sign-/zero-extension confusion.  */
3070   val &= GET_MODE_MASK (mode);
3071   val_so_far &= GET_MODE_MASK (mode);
3072   gcc_assert (val == val_so_far);
3073
3074   return accum;
3075 }
3076
3077 /* Perform a multiplication and return an rtx for the result.
3078    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3079    TARGET is a suggestion for where to store the result (an rtx).
3080
3081    We check specially for a constant integer as OP1.
3082    If you want this check for OP0 as well, then before calling
3083    you should swap the two operands if OP0 would be constant.  */
3084
3085 rtx
3086 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3087              int unsignedp)
3088 {
3089   enum mult_variant variant;
3090   struct algorithm algorithm;
3091   int max_cost;
3092   bool speed = optimize_insn_for_speed_p ();
3093
3094   /* Handling const0_rtx here allows us to use zero as a rogue value for
3095      coeff below.  */
3096   if (op1 == const0_rtx)
3097     return const0_rtx;
3098   if (op1 == const1_rtx)
3099     return op0;
3100   if (op1 == constm1_rtx)
3101     return expand_unop (mode,
3102                         GET_MODE_CLASS (mode) == MODE_INT
3103                         && !unsignedp && flag_trapv
3104                         ? negv_optab : neg_optab,
3105                         op0, target, 0);
3106
3107   /* These are the operations that are potentially turned into a sequence
3108      of shifts and additions.  */
3109   if (SCALAR_INT_MODE_P (mode)
3110       && (unsignedp || !flag_trapv))
3111     {
3112       HOST_WIDE_INT coeff = 0;
3113       rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3114
3115       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3116          less than or equal in size to `unsigned int' this doesn't matter.
3117          If the mode is larger than `unsigned int', then synth_mult works
3118          only if the constant value exactly fits in an `unsigned int' without
3119          any truncation.  This means that multiplying by negative values does
3120          not work; results are off by 2^32 on a 32 bit machine.  */
3121
3122       if (CONST_INT_P (op1))
3123         {
3124           /* Attempt to handle multiplication of DImode values by negative
3125              coefficients, by performing the multiplication by a positive
3126              multiplier and then inverting the result.  */
3127           if (INTVAL (op1) < 0
3128               && GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3129             {
3130               /* Its safe to use -INTVAL (op1) even for INT_MIN, as the
3131                  result is interpreted as an unsigned coefficient.
3132                  Exclude cost of op0 from max_cost to match the cost
3133                  calculation of the synth_mult.  */
3134               max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET, speed)
3135                          - neg_cost[speed][mode];
3136               if (max_cost > 0
3137                   && choose_mult_variant (mode, -INTVAL (op1), &algorithm,
3138                                           &variant, max_cost))
3139                 {
3140                   rtx temp = expand_mult_const (mode, op0, -INTVAL (op1),
3141                                                 NULL_RTX, &algorithm,
3142                                                 variant);
3143                   return expand_unop (mode, neg_optab, temp, target, 0);
3144                 }
3145             }
3146           else coeff = INTVAL (op1);
3147         }
3148       else if (GET_CODE (op1) == CONST_DOUBLE)
3149         {
3150           /* If we are multiplying in DImode, it may still be a win
3151              to try to work with shifts and adds.  */
3152           if (CONST_DOUBLE_HIGH (op1) == 0
3153               && CONST_DOUBLE_LOW (op1) > 0)
3154             coeff = CONST_DOUBLE_LOW (op1);
3155           else if (CONST_DOUBLE_LOW (op1) == 0
3156                    && EXACT_POWER_OF_2_OR_ZERO_P (CONST_DOUBLE_HIGH (op1)))
3157             {
3158               int shift = floor_log2 (CONST_DOUBLE_HIGH (op1))
3159                           + HOST_BITS_PER_WIDE_INT;
3160               return expand_shift (LSHIFT_EXPR, mode, op0,
3161                                    build_int_cst (NULL_TREE, shift),
3162                                    target, unsignedp);
3163             }
3164         }
3165
3166       /* We used to test optimize here, on the grounds that it's better to
3167          produce a smaller program when -O is not used.  But this causes
3168          such a terrible slowdown sometimes that it seems better to always
3169          use synth_mult.  */
3170       if (coeff != 0)
3171         {
3172           /* Special case powers of two.  */
3173           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3174             return expand_shift (LSHIFT_EXPR, mode, op0,
3175                                  build_int_cst (NULL_TREE, floor_log2 (coeff)),
3176                                  target, unsignedp);
3177
3178           /* Exclude cost of op0 from max_cost to match the cost
3179              calculation of the synth_mult.  */
3180           max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET, speed);
3181           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3182                                    max_cost))
3183             return expand_mult_const (mode, op0, coeff, target,
3184                                       &algorithm, variant);
3185         }
3186     }
3187
3188   if (GET_CODE (op0) == CONST_DOUBLE)
3189     {
3190       rtx temp = op0;
3191       op0 = op1;
3192       op1 = temp;
3193     }
3194
3195   /* Expand x*2.0 as x+x.  */
3196   if (GET_CODE (op1) == CONST_DOUBLE
3197       && SCALAR_FLOAT_MODE_P (mode))
3198     {
3199       REAL_VALUE_TYPE d;
3200       REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3201
3202       if (REAL_VALUES_EQUAL (d, dconst2))
3203         {
3204           op0 = force_reg (GET_MODE (op0), op0);
3205           return expand_binop (mode, add_optab, op0, op0,
3206                                target, unsignedp, OPTAB_LIB_WIDEN);
3207         }
3208     }
3209
3210   /* This used to use umul_optab if unsigned, but for non-widening multiply
3211      there is no difference between signed and unsigned.  */
3212   op0 = expand_binop (mode,
3213                       ! unsignedp
3214                       && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3215                       ? smulv_optab : smul_optab,
3216                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3217   gcc_assert (op0);
3218   return op0;
3219 }
3220 \f
3221 /* Return the smallest n such that 2**n >= X.  */
3222
3223 int
3224 ceil_log2 (unsigned HOST_WIDE_INT x)
3225 {
3226   return floor_log2 (x - 1) + 1;
3227 }
3228
3229 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3230    replace division by D, and put the least significant N bits of the result
3231    in *MULTIPLIER_PTR and return the most significant bit.
3232
3233    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3234    needed precision is in PRECISION (should be <= N).
3235
3236    PRECISION should be as small as possible so this function can choose
3237    multiplier more freely.
3238
3239    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3240    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3241
3242    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3243    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3244
3245 static
3246 unsigned HOST_WIDE_INT
3247 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3248                    rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr)
3249 {
3250   HOST_WIDE_INT mhigh_hi, mlow_hi;
3251   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3252   int lgup, post_shift;
3253   int pow, pow2;
3254   unsigned HOST_WIDE_INT nl, dummy1;
3255   HOST_WIDE_INT nh, dummy2;
3256
3257   /* lgup = ceil(log2(divisor)); */
3258   lgup = ceil_log2 (d);
3259
3260   gcc_assert (lgup <= n);
3261
3262   pow = n + lgup;
3263   pow2 = n + lgup - precision;
3264
3265   /* We could handle this with some effort, but this case is much
3266      better handled directly with a scc insn, so rely on caller using
3267      that.  */
3268   gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3269
3270   /* mlow = 2^(N + lgup)/d */
3271  if (pow >= HOST_BITS_PER_WIDE_INT)
3272     {
3273       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3274       nl = 0;
3275     }
3276   else
3277     {
3278       nh = 0;
3279       nl = (unsigned HOST_WIDE_INT) 1 << pow;
3280     }
3281   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3282                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3283
3284   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3285   if (pow2 >= HOST_BITS_PER_WIDE_INT)
3286     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3287   else
3288     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3289   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3290                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3291
3292   gcc_assert (!mhigh_hi || nh - d < d);
3293   gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3294   /* Assert that mlow < mhigh.  */
3295   gcc_assert (mlow_hi < mhigh_hi
3296               || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3297
3298   /* If precision == N, then mlow, mhigh exceed 2^N
3299      (but they do not exceed 2^(N+1)).  */
3300
3301   /* Reduce to lowest terms.  */
3302   for (post_shift = lgup; post_shift > 0; post_shift--)
3303     {
3304       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3305       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3306       if (ml_lo >= mh_lo)
3307         break;
3308
3309       mlow_hi = 0;
3310       mlow_lo = ml_lo;
3311       mhigh_hi = 0;
3312       mhigh_lo = mh_lo;
3313     }
3314
3315   *post_shift_ptr = post_shift;
3316   *lgup_ptr = lgup;
3317   if (n < HOST_BITS_PER_WIDE_INT)
3318     {
3319       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3320       *multiplier_ptr = GEN_INT (mhigh_lo & mask);
3321       return mhigh_lo >= mask;
3322     }
3323   else
3324     {
3325       *multiplier_ptr = GEN_INT (mhigh_lo);
3326       return mhigh_hi;
3327     }
3328 }
3329
3330 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3331    congruent to 1 (mod 2**N).  */
3332
3333 static unsigned HOST_WIDE_INT
3334 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3335 {
3336   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3337
3338   /* The algorithm notes that the choice y = x satisfies
3339      x*y == 1 mod 2^3, since x is assumed odd.
3340      Each iteration doubles the number of bits of significance in y.  */
3341
3342   unsigned HOST_WIDE_INT mask;
3343   unsigned HOST_WIDE_INT y = x;
3344   int nbit = 3;
3345
3346   mask = (n == HOST_BITS_PER_WIDE_INT
3347           ? ~(unsigned HOST_WIDE_INT) 0
3348           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3349
3350   while (nbit < n)
3351     {
3352       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3353       nbit *= 2;
3354     }
3355   return y;
3356 }
3357
3358 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3359    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3360    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3361    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3362    become signed.
3363
3364    The result is put in TARGET if that is convenient.
3365
3366    MODE is the mode of operation.  */
3367
3368 rtx
3369 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3370                              rtx op1, rtx target, int unsignedp)
3371 {
3372   rtx tem;
3373   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3374
3375   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3376                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3377                       NULL_RTX, 0);
3378   tem = expand_and (mode, tem, op1, NULL_RTX);
3379   adj_operand
3380     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3381                      adj_operand);
3382
3383   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3384                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3385                       NULL_RTX, 0);
3386   tem = expand_and (mode, tem, op0, NULL_RTX);
3387   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3388                           target);
3389
3390   return target;
3391 }
3392
3393 /* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */
3394
3395 static rtx
3396 extract_high_half (enum machine_mode mode, rtx op)
3397 {
3398   enum machine_mode wider_mode;
3399
3400   if (mode == word_mode)
3401     return gen_highpart (mode, op);
3402
3403   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3404
3405   wider_mode = GET_MODE_WIDER_MODE (mode);
3406   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3407                      build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode)), 0, 1);
3408   return convert_modes (mode, wider_mode, op, 0);
3409 }
3410
3411 /* Like expand_mult_highpart, but only consider using a multiplication
3412    optab.  OP1 is an rtx for the constant operand.  */
3413
3414 static rtx
3415 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3416                             rtx target, int unsignedp, int max_cost)
3417 {
3418   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3419   enum machine_mode wider_mode;
3420   optab moptab;
3421   rtx tem;
3422   int size;
3423   bool speed = optimize_insn_for_speed_p ();
3424
3425   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3426
3427   wider_mode = GET_MODE_WIDER_MODE (mode);
3428   size = GET_MODE_BITSIZE (mode);
3429
3430   /* Firstly, try using a multiplication insn that only generates the needed
3431      high part of the product, and in the sign flavor of unsignedp.  */
3432   if (mul_highpart_cost[speed][mode] < max_cost)
3433     {
3434       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3435       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3436                           unsignedp, OPTAB_DIRECT);
3437       if (tem)
3438         return tem;
3439     }
3440
3441   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3442      Need to adjust the result after the multiplication.  */
3443   if (size - 1 < BITS_PER_WORD
3444       && (mul_highpart_cost[speed][mode] + 2 * shift_cost[speed][mode][size-1]
3445           + 4 * add_cost[speed][mode] < max_cost))
3446     {
3447       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3448       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3449                           unsignedp, OPTAB_DIRECT);
3450       if (tem)
3451         /* We used the wrong signedness.  Adjust the result.  */
3452         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3453                                             tem, unsignedp);
3454     }
3455
3456   /* Try widening multiplication.  */
3457   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3458   if (optab_handler (moptab, wider_mode)->insn_code != CODE_FOR_nothing
3459       && mul_widen_cost[speed][wider_mode] < max_cost)
3460     {
3461       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3462                           unsignedp, OPTAB_WIDEN);
3463       if (tem)
3464         return extract_high_half (mode, tem);
3465     }
3466
3467   /* Try widening the mode and perform a non-widening multiplication.  */
3468   if (optab_handler (smul_optab, wider_mode)->insn_code != CODE_FOR_nothing
3469       && size - 1 < BITS_PER_WORD
3470       && mul_cost[speed][wider_mode] + shift_cost[speed][mode][size-1] < max_cost)
3471     {
3472       rtx insns, wop0, wop1;
3473
3474       /* We need to widen the operands, for example to ensure the
3475          constant multiplier is correctly sign or zero extended.
3476          Use a sequence to clean-up any instructions emitted by
3477          the conversions if things don't work out.  */
3478       start_sequence ();
3479       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3480       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3481       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3482                           unsignedp, OPTAB_WIDEN);
3483       insns = get_insns ();
3484       end_sequence ();
3485
3486       if (tem)
3487         {
3488           emit_insn (insns);
3489           return extract_high_half (mode, tem);
3490         }
3491     }
3492
3493   /* Try widening multiplication of opposite signedness, and adjust.  */
3494   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3495   if (optab_handler (moptab, wider_mode)->insn_code != CODE_FOR_nothing
3496       && size - 1 < BITS_PER_WORD
3497       && (mul_widen_cost[speed][wider_mode] + 2 * shift_cost[speed][mode][size-1]
3498           + 4 * add_cost[speed][mode] < max_cost))
3499     {
3500       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3501                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3502       if (tem != 0)
3503         {
3504           tem = extract_high_half (mode, tem);
3505           /* We used the wrong signedness.  Adjust the result.  */
3506           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3507                                               target, unsignedp);
3508         }
3509     }
3510
3511   return 0;
3512 }
3513
3514 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3515    putting the high half of the result in TARGET if that is convenient,
3516    and return where the result is.  If the operation can not be performed,
3517    0 is returned.
3518
3519    MODE is the mode of operation and result.
3520
3521    UNSIGNEDP nonzero means unsigned multiply.
3522
3523    MAX_COST is the total allowed cost for the expanded RTL.  */
3524
3525 static rtx
3526 expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3527                       rtx target, int unsignedp, int max_cost)
3528 {
3529   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3530   unsigned HOST_WIDE_INT cnst1;
3531   int extra_cost;
3532   bool sign_adjust = false;
3533   enum mult_variant variant;
3534   struct algorithm alg;
3535   rtx tem;
3536   bool speed = optimize_insn_for_speed_p ();
3537
3538   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3539   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3540   gcc_assert (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT);
3541
3542   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3543
3544   /* We can't optimize modes wider than BITS_PER_WORD.
3545      ??? We might be able to perform double-word arithmetic if
3546      mode == word_mode, however all the cost calculations in
3547      synth_mult etc. assume single-word operations.  */
3548   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3549     return expand_mult_highpart_optab (mode, op0, op1, target,
3550                                        unsignedp, max_cost);
3551
3552   extra_cost = shift_cost[speed][mode][GET_MODE_BITSIZE (mode) - 1];
3553
3554   /* Check whether we try to multiply by a negative constant.  */
3555   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3556     {
3557       sign_adjust = true;
3558       extra_cost += add_cost[speed][mode];
3559     }
3560
3561   /* See whether shift/add multiplication is cheap enough.  */
3562   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3563                            max_cost - extra_cost))
3564     {
3565       /* See whether the specialized multiplication optabs are
3566          cheaper than the shift/add version.  */
3567       tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3568                                         alg.cost.cost + extra_cost);
3569       if (tem)
3570         return tem;
3571
3572       tem = convert_to_mode (wider_mode, op0, unsignedp);
3573       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3574       tem = extract_high_half (mode, tem);
3575
3576       /* Adjust result for signedness.  */
3577       if (sign_adjust)
3578         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3579
3580       return tem;
3581     }
3582   return expand_mult_highpart_optab (mode, op0, op1, target,
3583                                      unsignedp, max_cost);
3584 }
3585
3586
3587 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3588
3589 static rtx
3590 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3591 {
3592   unsigned HOST_WIDE_INT masklow, maskhigh;
3593   rtx result, temp, shift, label;
3594   int logd;
3595
3596   logd = floor_log2 (d);
3597   result = gen_reg_rtx (mode);
3598
3599   /* Avoid conditional branches when they're expensive.  */
3600   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3601       && optimize_insn_for_speed_p ())
3602     {
3603       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3604                                       mode, 0, -1);
3605       if (signmask)
3606         {
3607           signmask = force_reg (mode, signmask);
3608           masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3609           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3610
3611           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3612              which instruction sequence to use.  If logical right shifts
3613              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3614              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3615
3616           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3617           if (optab_handler (lshr_optab, mode)->insn_code == CODE_FOR_nothing
3618               || rtx_cost (temp, SET, optimize_insn_for_speed_p ()) > COSTS_N_INSNS (2))
3619             {
3620               temp = expand_binop (mode, xor_optab, op0, signmask,
3621                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3622               temp = expand_binop (mode, sub_optab, temp, signmask,
3623                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3624               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3625                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3626               temp = expand_binop (mode, xor_optab, temp, signmask,
3627                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3628               temp = expand_binop (mode, sub_optab, temp, signmask,
3629                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3630             }
3631           else
3632             {
3633               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3634                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3635               signmask = force_reg (mode, signmask);
3636
3637               temp = expand_binop (mode, add_optab, op0, signmask,
3638                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3639               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3640                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3641               temp = expand_binop (mode, sub_optab, temp, signmask,
3642                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3643             }
3644           return temp;
3645         }
3646     }
3647
3648   /* Mask contains the mode's signbit and the significant bits of the
3649      modulus.  By including the signbit in the operation, many targets
3650      can avoid an explicit compare operation in the following comparison
3651      against zero.  */
3652
3653   masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3654   if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3655     {
3656       masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3657       maskhigh = -1;
3658     }
3659   else
3660     maskhigh = (HOST_WIDE_INT) -1
3661                  << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3662
3663   temp = expand_binop (mode, and_optab, op0,
3664                        immed_double_const (masklow, maskhigh, mode),
3665                        result, 1, OPTAB_LIB_WIDEN);
3666   if (temp != result)
3667     emit_move_insn (result, temp);
3668
3669   label = gen_label_rtx ();
3670   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3671
3672   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3673                        0, OPTAB_LIB_WIDEN);
3674   masklow = (HOST_WIDE_INT) -1 << logd;
3675   maskhigh = -1;
3676   temp = expand_binop (mode, ior_optab, temp,
3677                        immed_double_const (masklow, maskhigh, mode),
3678                        result, 1, OPTAB_LIB_WIDEN);
3679   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3680                        0, OPTAB_LIB_WIDEN);
3681   if (temp != result)
3682     emit_move_insn (result, temp);
3683   emit_label (label);
3684   return result;
3685 }
3686
3687 /* Expand signed division of OP0 by a power of two D in mode MODE.
3688    This routine is only called for positive values of D.  */
3689
3690 static rtx
3691 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3692 {
3693   rtx temp, label;
3694   tree shift;
3695   int logd;
3696
3697   logd = floor_log2 (d);
3698   shift = build_int_cst (NULL_TREE, logd);
3699
3700   if (d == 2
3701       && BRANCH_COST (optimize_insn_for_speed_p (),
3702                       false) >= 1)
3703     {
3704       temp = gen_reg_rtx (mode);
3705       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3706       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3707                            0, OPTAB_LIB_WIDEN);
3708       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3709     }
3710
3711 #ifdef HAVE_conditional_move
3712   if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3713       >= 2)
3714     {
3715       rtx temp2;
3716
3717       /* ??? emit_conditional_move forces a stack adjustment via
3718          compare_from_rtx so, if the sequence is discarded, it will
3719          be lost.  Do it now instead.  */
3720       do_pending_stack_adjust ();
3721
3722       start_sequence ();
3723       temp2 = copy_to_mode_reg (mode, op0);
3724       temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3725                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3726       temp = force_reg (mode, temp);
3727
3728       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3729       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3730                                      mode, temp, temp2, mode, 0);
3731       if (temp2)
3732         {
3733           rtx seq = get_insns ();
3734           end_sequence ();
3735           emit_insn (seq);
3736           return expand_shift (RSHIFT_EXPR, mode, temp2, shift, NULL_RTX, 0);
3737         }
3738       end_sequence ();
3739     }
3740 #endif
3741
3742   if (BRANCH_COST (optimize_insn_for_speed_p (),
3743                    false) >= 2)
3744     {
3745       int ushift = GET_MODE_BITSIZE (mode) - logd;
3746
3747       temp = gen_reg_rtx (mode);
3748       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3749       if (shift_cost[optimize_insn_for_speed_p ()][mode][ushift] > COSTS_N_INSNS (1))
3750         temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3751                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3752       else
3753         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3754                              build_int_cst (NULL_TREE, ushift),
3755                              NULL_RTX, 1);
3756       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3757                            0, OPTAB_LIB_WIDEN);
3758       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3759     }
3760
3761   label = gen_label_rtx ();
3762   temp = copy_to_mode_reg (mode, op0);
3763   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3764   expand_inc (temp, GEN_INT (d - 1));
3765   emit_label (label);
3766   return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3767 }
3768 \f
3769 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3770    if that is convenient, and returning where the result is.
3771    You may request either the quotient or the remainder as the result;
3772    specify REM_FLAG nonzero to get the remainder.
3773
3774    CODE is the expression code for which kind of division this is;
3775    it controls how rounding is done.  MODE is the machine mode to use.
3776    UNSIGNEDP nonzero means do unsigned division.  */
3777
3778 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3779    and then correct it by or'ing in missing high bits
3780    if result of ANDI is nonzero.
3781    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3782    This could optimize to a bfexts instruction.
3783    But C doesn't use these operations, so their optimizations are
3784    left for later.  */
3785 /* ??? For modulo, we don't actually need the highpart of the first product,
3786    the low part will do nicely.  And for small divisors, the second multiply
3787    can also be a low-part only multiply or even be completely left out.
3788    E.g. to calculate the remainder of a division by 3 with a 32 bit
3789    multiply, multiply with 0x55555556 and extract the upper two bits;
3790    the result is exact for inputs up to 0x1fffffff.
3791    The input range can be reduced by using cross-sum rules.
3792    For odd divisors >= 3, the following table gives right shift counts
3793    so that if a number is shifted by an integer multiple of the given
3794    amount, the remainder stays the same:
3795    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3796    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3797    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3798    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3799    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3800
3801    Cross-sum rules for even numbers can be derived by leaving as many bits
3802    to the right alone as the divisor has zeros to the right.
3803    E.g. if x is an unsigned 32 bit number:
3804    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3805    */
3806
3807 rtx
3808 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3809                rtx op0, rtx op1, rtx target, int unsignedp)
3810 {
3811   enum machine_mode compute_mode;
3812   rtx tquotient;
3813   rtx quotient = 0, remainder = 0;
3814   rtx last;
3815   int size;
3816   rtx insn, set;
3817   optab optab1, optab2;
3818   int op1_is_constant, op1_is_pow2 = 0;
3819   int max_cost, extra_cost;
3820   static HOST_WIDE_INT last_div_const = 0;
3821   static HOST_WIDE_INT ext_op1;
3822   bool speed = optimize_insn_for_speed_p ();
3823
3824   op1_is_constant = CONST_INT_P (op1);
3825   if (op1_is_constant)
3826     {
3827       ext_op1 = INTVAL (op1);
3828       if (unsignedp)
3829         ext_op1 &= GET_MODE_MASK (mode);
3830       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3831                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3832     }
3833
3834   /*
3835      This is the structure of expand_divmod:
3836
3837      First comes code to fix up the operands so we can perform the operations
3838      correctly and efficiently.
3839
3840      Second comes a switch statement with code specific for each rounding mode.
3841      For some special operands this code emits all RTL for the desired
3842      operation, for other cases, it generates only a quotient and stores it in
3843      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3844      to indicate that it has not done anything.
3845
3846      Last comes code that finishes the operation.  If QUOTIENT is set and
3847      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3848      QUOTIENT is not set, it is computed using trunc rounding.
3849
3850      We try to generate special code for division and remainder when OP1 is a
3851      constant.  If |OP1| = 2**n we can use shifts and some other fast
3852      operations.  For other values of OP1, we compute a carefully selected
3853      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3854      by m.
3855
3856      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3857      half of the product.  Different strategies for generating the product are
3858      implemented in expand_mult_highpart.
3859
3860      If what we actually want is the remainder, we generate that by another
3861      by-constant multiplication and a subtraction.  */
3862
3863   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3864      code below will malfunction if we are, so check here and handle
3865      the special case if so.  */
3866   if (op1 == const1_rtx)
3867     return rem_flag ? const0_rtx : op0;
3868
3869     /* When dividing by -1, we could get an overflow.
3870      negv_optab can handle overflows.  */
3871   if (! unsignedp && op1 == constm1_rtx)
3872     {
3873       if (rem_flag)
3874         return const0_rtx;
3875       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3876                           ? negv_optab : neg_optab, op0, target, 0);
3877     }
3878
3879   if (target
3880       /* Don't use the function value register as a target
3881          since we have to read it as well as write it,
3882          and function-inlining gets confused by this.  */
3883       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3884           /* Don't clobber an operand while doing a multi-step calculation.  */
3885           || ((rem_flag || op1_is_constant)
3886               && (reg_mentioned_p (target, op0)
3887                   || (MEM_P (op0) && MEM_P (target))))
3888           || reg_mentioned_p (target, op1)
3889           || (MEM_P (op1) && MEM_P (target))))
3890     target = 0;
3891
3892   /* Get the mode in which to perform this computation.  Normally it will
3893      be MODE, but sometimes we can't do the desired operation in MODE.
3894      If so, pick a wider mode in which we can do the operation.  Convert
3895      to that mode at the start to avoid repeated conversions.
3896
3897      First see what operations we need.  These depend on the expression
3898      we are evaluating.  (We assume that divxx3 insns exist under the
3899      same conditions that modxx3 insns and that these insns don't normally
3900      fail.  If these assumptions are not correct, we may generate less
3901      efficient code in some cases.)
3902
3903      Then see if we find a mode in which we can open-code that operation
3904      (either a division, modulus, or shift).  Finally, check for the smallest
3905      mode for which we can do the operation with a library call.  */
3906
3907   /* We might want to refine this now that we have division-by-constant
3908      optimization.  Since expand_mult_highpart tries so many variants, it is
3909      not straightforward to generalize this.  Maybe we should make an array
3910      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3911
3912   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3913             ? (unsignedp ? lshr_optab : ashr_optab)
3914             : (unsignedp ? udiv_optab : sdiv_optab));
3915   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3916             ? optab1
3917             : (unsignedp ? udivmod_optab : sdivmod_optab));
3918
3919   for (compute_mode = mode; compute_mode != VOIDmode;
3920        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3921     if (optab_handler (optab1, compute_mode)->insn_code != CODE_FOR_nothing
3922         || optab_handler (optab2, compute_mode)->insn_code != CODE_FOR_nothing)
3923       break;
3924
3925   if (compute_mode == VOIDmode)
3926     for (compute_mode = mode; compute_mode != VOIDmode;
3927          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3928       if (optab_libfunc (optab1, compute_mode)
3929           || optab_libfunc (optab2, compute_mode))
3930         break;
3931
3932   /* If we still couldn't find a mode, use MODE, but expand_binop will
3933      probably die.  */
3934   if (compute_mode == VOIDmode)
3935     compute_mode = mode;
3936
3937   if (target && GET_MODE (target) == compute_mode)
3938     tquotient = target;
3939   else
3940     tquotient = gen_reg_rtx (compute_mode);
3941
3942   size = GET_MODE_BITSIZE (compute_mode);
3943 #if 0
3944   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3945      (mode), and thereby get better code when OP1 is a constant.  Do that
3946      later.  It will require going over all usages of SIZE below.  */
3947   size = GET_MODE_BITSIZE (mode);
3948 #endif
3949
3950   /* Only deduct something for a REM if the last divide done was
3951      for a different constant.   Then set the constant of the last
3952      divide.  */
3953   max_cost = unsignedp ? udiv_cost[speed][compute_mode] : sdiv_cost[speed][compute_mode];
3954   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
3955                      && INTVAL (op1) == last_div_const))
3956     max_cost -= mul_cost[speed][compute_mode] + add_cost[speed][compute_mode];
3957
3958   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3959
3960   /* Now convert to the best mode to use.  */
3961   if (compute_mode != mode)
3962     {
3963       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3964       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3965
3966       /* convert_modes may have placed op1 into a register, so we
3967          must recompute the following.  */
3968       op1_is_constant = CONST_INT_P (op1);
3969       op1_is_pow2 = (op1_is_constant
3970                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3971                           || (! unsignedp
3972                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3973     }
3974
3975   /* If one of the operands is a volatile MEM, copy it into a register.  */
3976
3977   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3978     op0 = force_reg (compute_mode, op0);
3979   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3980     op1 = force_reg (compute_mode, op1);
3981
3982   /* If we need the remainder or if OP1 is constant, we need to
3983      put OP0 in a register in case it has any queued subexpressions.  */
3984   if (rem_flag || op1_is_constant)
3985     op0 = force_reg (compute_mode, op0);
3986
3987   last = get_last_insn ();
3988
3989   /* Promote floor rounding to trunc rounding for unsigned operations.  */
3990   if (unsignedp)
3991     {
3992       if (code == FLOOR_DIV_EXPR)
3993         code = TRUNC_DIV_EXPR;
3994       if (code == FLOOR_MOD_EXPR)
3995         code = TRUNC_MOD_EXPR;
3996       if (code == EXACT_DIV_EXPR && op1_is_pow2)
3997         code = TRUNC_DIV_EXPR;
3998     }
3999
4000   if (op1 != const0_rtx)
4001     switch (code)
4002       {
4003       case TRUNC_MOD_EXPR:
4004       case TRUNC_DIV_EXPR:
4005         if (op1_is_constant)
4006           {
4007             if (unsignedp)
4008               {
4009                 unsigned HOST_WIDE_INT mh;
4010                 int pre_shift, post_shift;
4011                 int dummy;
4012                 rtx ml;
4013                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4014                                             & GET_MODE_MASK (compute_mode));
4015
4016                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4017                   {
4018                     pre_shift = floor_log2 (d);
4019                     if (rem_flag)
4020                       {
4021                         remainder
4022                           = expand_binop (compute_mode, and_optab, op0,
4023                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4024                                           remainder, 1,
4025                                           OPTAB_LIB_WIDEN);
4026                         if (remainder)
4027                           return gen_lowpart (mode, remainder);
4028                       }
4029                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4030                                              build_int_cst (NULL_TREE,
4031                                                             pre_shift),
4032                                              tquotient, 1);
4033                   }
4034                 else if (size <= HOST_BITS_PER_WIDE_INT)
4035                   {
4036                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4037                       {
4038                         /* Most significant bit of divisor is set; emit an scc
4039                            insn.  */
4040                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4041                                                           compute_mode, 1, 1);
4042                       }
4043                     else
4044                       {
4045                         /* Find a suitable multiplier and right shift count
4046                            instead of multiplying with D.  */
4047
4048                         mh = choose_multiplier (d, size, size,
4049                                                 &ml, &post_shift, &dummy);
4050
4051                         /* If the suggested multiplier is more than SIZE bits,
4052                            we can do better for even divisors, using an
4053                            initial right shift.  */
4054                         if (mh != 0 && (d & 1) == 0)
4055                           {
4056                             pre_shift = floor_log2 (d & -d);
4057                             mh = choose_multiplier (d >> pre_shift, size,
4058                                                     size - pre_shift,
4059                                                     &ml, &post_shift, &dummy);
4060                             gcc_assert (!mh);
4061                           }
4062                         else
4063                           pre_shift = 0;
4064
4065                         if (mh != 0)
4066                           {
4067                             rtx t1, t2, t3, t4;
4068
4069                             if (post_shift - 1 >= BITS_PER_WORD)
4070                               goto fail1;
4071
4072                             extra_cost
4073                               = (shift_cost[speed][compute_mode][post_shift - 1]
4074                                  + shift_cost[speed][compute_mode][1]
4075                                  + 2 * add_cost[speed][compute_mode]);
4076                             t1 = expand_mult_highpart (compute_mode, op0, ml,
4077                                                        NULL_RTX, 1,
4078                                                        max_cost - extra_cost);
4079                             if (t1 == 0)
4080                               goto fail1;
4081                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4082                                                                op0, t1),
4083                                                 NULL_RTX);
4084                             t3 = expand_shift
4085                               (RSHIFT_EXPR, compute_mode, t2,
4086                                build_int_cst (NULL_TREE, 1),
4087                                NULL_RTX,1);
4088                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4089                                                               t1, t3),
4090                                                 NULL_RTX);
4091                             quotient = expand_shift
4092                               (RSHIFT_EXPR, compute_mode, t4,
4093                                build_int_cst (NULL_TREE, post_shift - 1),
4094                                tquotient, 1);
4095                           }
4096                         else
4097                           {
4098                             rtx t1, t2;
4099
4100                             if (pre_shift >= BITS_PER_WORD
4101                                 || post_shift >= BITS_PER_WORD)
4102                               goto fail1;
4103
4104                             t1 = expand_shift
4105                               (RSHIFT_EXPR, compute_mode, op0,
4106                                build_int_cst (NULL_TREE, pre_shift),
4107                                NULL_RTX, 1);
4108                             extra_cost
4109                               = (shift_cost[speed][compute_mode][pre_shift]
4110                                  + shift_cost[speed][compute_mode][post_shift]);
4111                             t2 = expand_mult_highpart (compute_mode, t1, ml,
4112                                                        NULL_RTX, 1,
4113                                                        max_cost - extra_cost);
4114                             if (t2 == 0)
4115                               goto fail1;
4116                             quotient = expand_shift
4117                               (RSHIFT_EXPR, compute_mode, t2,
4118                                build_int_cst (NULL_TREE, post_shift),
4119                                tquotient, 1);
4120                           }
4121                       }
4122                   }
4123                 else            /* Too wide mode to use tricky code */
4124                   break;
4125
4126                 insn = get_last_insn ();
4127                 if (insn != last
4128                     && (set = single_set (insn)) != 0
4129                     && SET_DEST (set) == quotient)
4130                   set_unique_reg_note (insn,
4131                                        REG_EQUAL,
4132                                        gen_rtx_UDIV (compute_mode, op0, op1));
4133               }
4134             else                /* TRUNC_DIV, signed */
4135               {
4136                 unsigned HOST_WIDE_INT ml;
4137                 int lgup, post_shift;
4138                 rtx mlr;
4139                 HOST_WIDE_INT d = INTVAL (op1);
4140                 unsigned HOST_WIDE_INT abs_d;
4141
4142                 /* Since d might be INT_MIN, we have to cast to
4143                    unsigned HOST_WIDE_INT before negating to avoid
4144                    undefined signed overflow.  */
4145                 abs_d = (d >= 0
4146                          ? (unsigned HOST_WIDE_INT) d
4147                          : - (unsigned HOST_WIDE_INT) d);
4148
4149                 /* n rem d = n rem -d */
4150                 if (rem_flag && d < 0)
4151                   {
4152                     d = abs_d;
4153                     op1 = gen_int_mode (abs_d, compute_mode);
4154                   }
4155
4156                 if (d == 1)
4157                   quotient = op0;
4158                 else if (d == -1)
4159                   quotient = expand_unop (compute_mode, neg_optab, op0,
4160                                           tquotient, 0);
4161                 else if (HOST_BITS_PER_WIDE_INT >= size
4162                          && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4163                   {
4164                     /* This case is not handled correctly below.  */
4165                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4166                                                 compute_mode, 1, 1);
4167                     if (quotient == 0)
4168                       goto fail1;
4169                   }
4170                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4171                          && (rem_flag ? smod_pow2_cheap[speed][compute_mode]
4172                                       : sdiv_pow2_cheap[speed][compute_mode])
4173                          /* We assume that cheap metric is true if the
4174                             optab has an expander for this mode.  */
4175                          && ((optab_handler ((rem_flag ? smod_optab
4176                                               : sdiv_optab),
4177                                               compute_mode)->insn_code
4178                               != CODE_FOR_nothing)
4179                              || (optab_handler(sdivmod_optab,
4180                                                compute_mode)
4181                                  ->insn_code != CODE_FOR_nothing)))
4182                   ;
4183                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4184                   {
4185                     if (rem_flag)
4186                       {
4187                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4188                         if (remainder)
4189                           return gen_lowpart (mode, remainder);
4190                       }
4191
4192                     if (sdiv_pow2_cheap[speed][compute_mode]
4193                         && ((optab_handler (sdiv_optab, compute_mode)->insn_code
4194                              != CODE_FOR_nothing)
4195                             || (optab_handler (sdivmod_optab, compute_mode)->insn_code
4196                                 != CODE_FOR_nothing)))
4197                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4198                                                 compute_mode, op0,
4199                                                 gen_int_mode (abs_d,
4200                                                               compute_mode),
4201                                                 NULL_RTX, 0);
4202                     else
4203                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4204
4205                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4206                        negate the quotient.  */
4207                     if (d < 0)
4208                       {
4209                         insn = get_last_insn ();
4210                         if (insn != last
4211                             && (set = single_set (insn)) != 0
4212                             && SET_DEST (set) == quotient
4213                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4214                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4215                           set_unique_reg_note (insn,
4216                                                REG_EQUAL,
4217                                                gen_rtx_DIV (compute_mode,
4218                                                             op0,
4219                                                             GEN_INT
4220                                                             (trunc_int_for_mode
4221                                                              (abs_d,
4222                                                               compute_mode))));
4223
4224                         quotient = expand_unop (compute_mode, neg_optab,
4225                                                 quotient, quotient, 0);
4226                       }
4227                   }
4228                 else if (size <= HOST_BITS_PER_WIDE_INT)
4229                   {
4230                     choose_multiplier (abs_d, size, size - 1,
4231                                        &mlr, &post_shift, &lgup);
4232                     ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
4233                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4234                       {
4235                         rtx t1, t2, t3;
4236
4237                         if (post_shift >= BITS_PER_WORD
4238                             || size - 1 >= BITS_PER_WORD)
4239                           goto fail1;
4240
4241                         extra_cost = (shift_cost[speed][compute_mode][post_shift]
4242                                       + shift_cost[speed][compute_mode][size - 1]
4243                                       + add_cost[speed][compute_mode]);
4244                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4245                                                    NULL_RTX, 0,
4246                                                    max_cost - extra_cost);
4247                         if (t1 == 0)
4248                           goto fail1;
4249                         t2 = expand_shift
4250                           (RSHIFT_EXPR, compute_mode, t1,
4251                            build_int_cst (NULL_TREE, post_shift),
4252                            NULL_RTX, 0);
4253                         t3 = expand_shift
4254                           (RSHIFT_EXPR, compute_mode, op0,
4255                            build_int_cst (NULL_TREE, size - 1),
4256                            NULL_RTX, 0);
4257                         if (d < 0)
4258                           quotient
4259                             = force_operand (gen_rtx_MINUS (compute_mode,
4260                                                             t3, t2),
4261                                              tquotient);
4262                         else
4263                           quotient
4264                             = force_operand (gen_rtx_MINUS (compute_mode,
4265                                                             t2, t3),
4266                                              tquotient);
4267                       }
4268                     else
4269                       {
4270                         rtx t1, t2, t3, t4;
4271
4272                         if (post_shift >= BITS_PER_WORD
4273                             || size - 1 >= BITS_PER_WORD)
4274                           goto fail1;
4275
4276                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4277                         mlr = gen_int_mode (ml, compute_mode);
4278                         extra_cost = (shift_cost[speed][compute_mode][post_shift]
4279                                       + shift_cost[speed][compute_mode][size - 1]
4280                                       + 2 * add_cost[speed][compute_mode]);
4281                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4282                                                    NULL_RTX, 0,
4283                                                    max_cost - extra_cost);
4284                         if (t1 == 0)
4285                           goto fail1;
4286                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4287                                                           t1, op0),
4288                                             NULL_RTX);
4289                         t3 = expand_shift
4290                           (RSHIFT_EXPR, compute_mode, t2,
4291                            build_int_cst (NULL_TREE, post_shift),
4292                            NULL_RTX, 0);
4293                         t4 = expand_shift
4294                           (RSHIFT_EXPR, compute_mode, op0,
4295                            build_int_cst (NULL_TREE, size - 1),
4296                            NULL_RTX, 0);
4297                         if (d < 0)
4298                           quotient
4299                             = force_operand (gen_rtx_MINUS (compute_mode,
4300                                                             t4, t3),
4301                                              tquotient);
4302                         else
4303                           quotient
4304                             = force_operand (gen_rtx_MINUS (compute_mode,
4305                                                             t3, t4),
4306                                              tquotient);
4307                       }
4308                   }
4309                 else            /* Too wide mode to use tricky code */
4310                   break;
4311
4312                 insn = get_last_insn ();
4313                 if (insn != last
4314                     && (set = single_set (insn)) != 0
4315                     && SET_DEST (set) == quotient)
4316                   set_unique_reg_note (insn,
4317                                        REG_EQUAL,
4318                                        gen_rtx_DIV (compute_mode, op0, op1));
4319               }
4320             break;
4321           }
4322       fail1:
4323         delete_insns_since (last);
4324         break;
4325
4326       case FLOOR_DIV_EXPR:
4327       case FLOOR_MOD_EXPR:
4328       /* We will come here only for signed operations.  */
4329         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4330           {
4331             unsigned HOST_WIDE_INT mh;
4332             int pre_shift, lgup, post_shift;
4333             HOST_WIDE_INT d = INTVAL (op1);
4334             rtx ml;
4335
4336             if (d > 0)
4337               {
4338                 /* We could just as easily deal with negative constants here,
4339                    but it does not seem worth the trouble for GCC 2.6.  */
4340                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4341                   {
4342                     pre_shift = floor_log2 (d);
4343                     if (rem_flag)
4344                       {
4345                         remainder = expand_binop (compute_mode, and_optab, op0,
4346                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4347                                                   remainder, 0, OPTAB_LIB_WIDEN);
4348                         if (remainder)
4349                           return gen_lowpart (mode, remainder);
4350                       }
4351                     quotient = expand_shift
4352                       (RSHIFT_EXPR, compute_mode, op0,
4353                        build_int_cst (NULL_TREE, pre_shift),
4354                        tquotient, 0);
4355                   }
4356                 else
4357                   {
4358                     rtx t1, t2, t3, t4;
4359
4360                     mh = choose_multiplier (d, size, size - 1,
4361                                             &ml, &post_shift, &lgup);
4362                     gcc_assert (!mh);
4363
4364                     if (post_shift < BITS_PER_WORD
4365                         && size - 1 < BITS_PER_WORD)
4366                       {
4367                         t1 = expand_shift
4368                           (RSHIFT_EXPR, compute_mode, op0,
4369                            build_int_cst (NULL_TREE, size - 1),
4370                            NULL_RTX, 0);
4371                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4372                                            NULL_RTX, 0, OPTAB_WIDEN);
4373                         extra_cost = (shift_cost[speed][compute_mode][post_shift]
4374                                       + shift_cost[speed][compute_mode][size - 1]
4375                                       + 2 * add_cost[speed][compute_mode]);
4376                         t3 = expand_mult_highpart (compute_mode, t2, ml,
4377                                                    NULL_RTX, 1,
4378                                                    max_cost - extra_cost);
4379                         if (t3 != 0)
4380                           {
4381                             t4 = expand_shift
4382                               (RSHIFT_EXPR, compute_mode, t3,
4383                                build_int_cst (NULL_TREE, post_shift),
4384                                NULL_RTX, 1);
4385                             quotient = expand_binop (compute_mode, xor_optab,
4386                                                      t4, t1, tquotient, 0,
4387                                                      OPTAB_WIDEN);
4388                           }
4389                       }
4390                   }
4391               }
4392             else
4393               {
4394                 rtx nsign, t1, t2, t3, t4;
4395                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4396                                                   op0, constm1_rtx), NULL_RTX);
4397                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4398                                    0, OPTAB_WIDEN);
4399                 nsign = expand_shift
4400                   (RSHIFT_EXPR, compute_mode, t2,
4401                    build_int_cst (NULL_TREE, size - 1),
4402                    NULL_RTX, 0);
4403                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4404                                     NULL_RTX);
4405                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4406                                     NULL_RTX, 0);
4407                 if (t4)
4408                   {
4409                     rtx t5;
4410                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4411                                       NULL_RTX, 0);
4412                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4413                                                             t4, t5),
4414                                               tquotient);
4415                   }
4416               }
4417           }
4418
4419         if (quotient != 0)
4420           break;
4421         delete_insns_since (last);
4422
4423         /* Try using an instruction that produces both the quotient and
4424            remainder, using truncation.  We can easily compensate the quotient
4425            or remainder to get floor rounding, once we have the remainder.
4426            Notice that we compute also the final remainder value here,
4427            and return the result right away.  */
4428         if (target == 0 || GET_MODE (target) != compute_mode)
4429           target = gen_reg_rtx (compute_mode);
4430
4431         if (rem_flag)
4432           {
4433             remainder
4434               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4435             quotient = gen_reg_rtx (compute_mode);
4436           }
4437         else
4438           {
4439             quotient
4440               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4441             remainder = gen_reg_rtx (compute_mode);
4442           }
4443
4444         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4445                                  quotient, remainder, 0))
4446           {
4447             /* This could be computed with a branch-less sequence.
4448                Save that for later.  */
4449             rtx tem;
4450             rtx label = gen_label_rtx ();
4451             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4452             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4453                                 NULL_RTX, 0, OPTAB_WIDEN);
4454             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4455             expand_dec (quotient, const1_rtx);
4456             expand_inc (remainder, op1);
4457             emit_label (label);
4458             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4459           }
4460
4461         /* No luck with division elimination or divmod.  Have to do it
4462            by conditionally adjusting op0 *and* the result.  */
4463         {
4464           rtx label1, label2, label3, label4, label5;
4465           rtx adjusted_op0;
4466           rtx tem;
4467
4468           quotient = gen_reg_rtx (compute_mode);
4469           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4470           label1 = gen_label_rtx ();
4471           label2 = gen_label_rtx ();
4472           label3 = gen_label_rtx ();
4473           label4 = gen_label_rtx ();
4474           label5 = gen_label_rtx ();
4475           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4476           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4477           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4478                               quotient, 0, OPTAB_LIB_WIDEN);
4479           if (tem != quotient)
4480             emit_move_insn (quotient, tem);
4481           emit_jump_insn (gen_jump (label5));
4482           emit_barrier ();
4483           emit_label (label1);
4484           expand_inc (adjusted_op0, const1_rtx);
4485           emit_jump_insn (gen_jump (label4));
4486           emit_barrier ();
4487           emit_label (label2);
4488           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4489           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4490                               quotient, 0, OPTAB_LIB_WIDEN);
4491           if (tem != quotient)
4492             emit_move_insn (quotient, tem);
4493           emit_jump_insn (gen_jump (label5));
4494           emit_barrier ();
4495           emit_label (label3);
4496           expand_dec (adjusted_op0, const1_rtx);
4497           emit_label (label4);
4498           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4499                               quotient, 0, OPTAB_LIB_WIDEN);
4500           if (tem != quotient)
4501             emit_move_insn (quotient, tem);
4502           expand_dec (quotient, const1_rtx);
4503           emit_label (label5);
4504         }
4505         break;
4506
4507       case CEIL_DIV_EXPR:
4508       case CEIL_MOD_EXPR:
4509         if (unsignedp)
4510           {
4511             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4512               {
4513                 rtx t1, t2, t3;
4514                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4515                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4516                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4517                                    tquotient, 1);
4518                 t2 = expand_binop (compute_mode, and_optab, op0,
4519                                    GEN_INT (d - 1),
4520                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4521                 t3 = gen_reg_rtx (compute_mode);
4522                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4523                                       compute_mode, 1, 1);
4524                 if (t3 == 0)
4525                   {
4526                     rtx lab;
4527                     lab = gen_label_rtx ();
4528                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4529                     expand_inc (t1, const1_rtx);
4530                     emit_label (lab);
4531                     quotient = t1;
4532                   }
4533                 else
4534                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4535                                                           t1, t3),
4536                                             tquotient);
4537                 break;
4538               }
4539
4540             /* Try using an instruction that produces both the quotient and
4541                remainder, using truncation.  We can easily compensate the
4542                quotient or remainder to get ceiling rounding, once we have the
4543                remainder.  Notice that we compute also the final remainder
4544                value here, and return the result right away.  */
4545             if (target == 0 || GET_MODE (target) != compute_mode)
4546               target = gen_reg_rtx (compute_mode);
4547
4548             if (rem_flag)
4549               {
4550                 remainder = (REG_P (target)
4551                              ? target : gen_reg_rtx (compute_mode));
4552                 quotient = gen_reg_rtx (compute_mode);
4553               }
4554             else
4555               {
4556                 quotient = (REG_P (target)
4557                             ? target : gen_reg_rtx (compute_mode));
4558                 remainder = gen_reg_rtx (compute_mode);
4559               }
4560
4561             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4562                                      remainder, 1))
4563               {
4564                 /* This could be computed with a branch-less sequence.
4565                    Save that for later.  */
4566                 rtx label = gen_label_rtx ();
4567                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4568                                  compute_mode, label);
4569                 expand_inc (quotient, const1_rtx);
4570                 expand_dec (remainder, op1);
4571                 emit_label (label);
4572                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4573               }
4574
4575             /* No luck with division elimination or divmod.  Have to do it
4576                by conditionally adjusting op0 *and* the result.  */
4577             {
4578               rtx label1, label2;
4579               rtx adjusted_op0, tem;
4580
4581               quotient = gen_reg_rtx (compute_mode);
4582               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4583               label1 = gen_label_rtx ();
4584               label2 = gen_label_rtx ();
4585               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4586                                compute_mode, label1);
4587               emit_move_insn  (quotient, const0_rtx);
4588               emit_jump_insn (gen_jump (label2));
4589               emit_barrier ();
4590               emit_label (label1);
4591               expand_dec (adjusted_op0, const1_rtx);
4592               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4593                                   quotient, 1, OPTAB_LIB_WIDEN);
4594               if (tem != quotient)
4595                 emit_move_insn (quotient, tem);
4596               expand_inc (quotient, const1_rtx);
4597               emit_label (label2);
4598             }
4599           }
4600         else /* signed */
4601           {
4602             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4603                 && INTVAL (op1) >= 0)
4604               {
4605                 /* This is extremely similar to the code for the unsigned case
4606                    above.  For 2.7 we should merge these variants, but for
4607                    2.6.1 I don't want to touch the code for unsigned since that
4608                    get used in C.  The signed case will only be used by other
4609                    languages (Ada).  */
4610
4611                 rtx t1, t2, t3;
4612                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4613                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4614                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4615                                    tquotient, 0);
4616                 t2 = expand_binop (compute_mode, and_optab, op0,
4617                                    GEN_INT (d - 1),
4618                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4619                 t3 = gen_reg_rtx (compute_mode);
4620                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4621                                       compute_mode, 1, 1);
4622                 if (t3 == 0)
4623                   {
4624                     rtx lab;
4625                     lab = gen_label_rtx ();
4626                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4627                     expand_inc (t1, const1_rtx);
4628                     emit_label (lab);
4629                     quotient = t1;
4630                   }
4631                 else
4632                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4633                                                           t1, t3),
4634                                             tquotient);
4635                 break;
4636               }
4637
4638             /* Try using an instruction that produces both the quotient and
4639                remainder, using truncation.  We can easily compensate the
4640                quotient or remainder to get ceiling rounding, once we have the
4641                remainder.  Notice that we compute also the final remainder
4642                value here, and return the result right away.  */
4643             if (target == 0 || GET_MODE (target) != compute_mode)
4644               target = gen_reg_rtx (compute_mode);
4645             if (rem_flag)
4646               {
4647                 remainder= (REG_P (target)
4648                             ? target : gen_reg_rtx (compute_mode));
4649                 quotient = gen_reg_rtx (compute_mode);
4650               }
4651             else
4652               {
4653                 quotient = (REG_P (target)
4654                             ? target : gen_reg_rtx (compute_mode));
4655                 remainder = gen_reg_rtx (compute_mode);
4656               }
4657
4658             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4659                                      remainder, 0))
4660               {
4661                 /* This could be computed with a branch-less sequence.
4662                    Save that for later.  */
4663                 rtx tem;
4664                 rtx label = gen_label_rtx ();
4665                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4666                                  compute_mode, label);
4667                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4668                                     NULL_RTX, 0, OPTAB_WIDEN);
4669                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4670                 expand_inc (quotient, const1_rtx);
4671                 expand_dec (remainder, op1);
4672                 emit_label (label);
4673                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4674               }
4675
4676             /* No luck with division elimination or divmod.  Have to do it
4677                by conditionally adjusting op0 *and* the result.  */
4678             {
4679               rtx label1, label2, label3, label4, label5;
4680               rtx adjusted_op0;
4681               rtx tem;
4682
4683               quotient = gen_reg_rtx (compute_mode);
4684               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4685               label1 = gen_label_rtx ();
4686               label2 = gen_label_rtx ();
4687               label3 = gen_label_rtx ();
4688               label4 = gen_label_rtx ();
4689               label5 = gen_label_rtx ();
4690               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4691               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4692                                compute_mode, label1);
4693               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4694                                   quotient, 0, OPTAB_LIB_WIDEN);
4695               if (tem != quotient)
4696                 emit_move_insn (quotient, tem);
4697               emit_jump_insn (gen_jump (label5));
4698               emit_barrier ();
4699               emit_label (label1);
4700               expand_dec (adjusted_op0, const1_rtx);
4701               emit_jump_insn (gen_jump (label4));
4702               emit_barrier ();
4703               emit_label (label2);
4704               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4705                                compute_mode, label3);
4706               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4707                                   quotient, 0, OPTAB_LIB_WIDEN);
4708               if (tem != quotient)
4709                 emit_move_insn (quotient, tem);
4710               emit_jump_insn (gen_jump (label5));
4711               emit_barrier ();
4712               emit_label (label3);
4713               expand_inc (adjusted_op0, const1_rtx);
4714               emit_label (label4);
4715               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4716                                   quotient, 0, OPTAB_LIB_WIDEN);
4717               if (tem != quotient)
4718                 emit_move_insn (quotient, tem);
4719               expand_inc (quotient, const1_rtx);
4720               emit_label (label5);
4721             }
4722           }
4723         break;
4724
4725       case EXACT_DIV_EXPR:
4726         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4727           {
4728             HOST_WIDE_INT d = INTVAL (op1);
4729             unsigned HOST_WIDE_INT ml;
4730             int pre_shift;
4731             rtx t1;
4732
4733             pre_shift = floor_log2 (d & -d);
4734             ml = invert_mod2n (d >> pre_shift, size);
4735             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4736                                build_int_cst (NULL_TREE, pre_shift),
4737                                NULL_RTX, unsignedp);
4738             quotient = expand_mult (compute_mode, t1,
4739                                     gen_int_mode (ml, compute_mode),
4740                                     NULL_RTX, 1);
4741
4742             insn = get_last_insn ();
4743             set_unique_reg_note (insn,
4744                                  REG_EQUAL,
4745                                  gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4746                                                  compute_mode,
4747                                                  op0, op1));
4748           }
4749         break;
4750
4751       case ROUND_DIV_EXPR:
4752       case ROUND_MOD_EXPR:
4753         if (unsignedp)
4754           {
4755             rtx tem;
4756             rtx label;
4757             label = gen_label_rtx ();
4758             quotient = gen_reg_rtx (compute_mode);
4759             remainder = gen_reg_rtx (compute_mode);
4760             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4761               {
4762                 rtx tem;
4763                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4764                                          quotient, 1, OPTAB_LIB_WIDEN);
4765                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4766                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4767                                           remainder, 1, OPTAB_LIB_WIDEN);
4768               }
4769             tem = plus_constant (op1, -1);
4770             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4771                                 build_int_cst (NULL_TREE, 1),
4772                                 NULL_RTX, 1);
4773             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4774             expand_inc (quotient, const1_rtx);
4775             expand_dec (remainder, op1);
4776             emit_label (label);
4777           }
4778         else
4779           {
4780             rtx abs_rem, abs_op1, tem, mask;
4781             rtx label;
4782             label = gen_label_rtx ();
4783             quotient = gen_reg_rtx (compute_mode);
4784             remainder = gen_reg_rtx (compute_mode);
4785             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4786               {
4787                 rtx tem;
4788                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4789                                          quotient, 0, OPTAB_LIB_WIDEN);
4790                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4791                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4792                                           remainder, 0, OPTAB_LIB_WIDEN);
4793               }
4794             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4795             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4796             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4797                                 build_int_cst (NULL_TREE, 1),
4798                                 NULL_RTX, 1);
4799             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4800             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4801                                 NULL_RTX, 0, OPTAB_WIDEN);
4802             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4803                                  build_int_cst (NULL_TREE, size - 1),
4804                                  NULL_RTX, 0);
4805             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4806                                 NULL_RTX, 0, OPTAB_WIDEN);
4807             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4808                                 NULL_RTX, 0, OPTAB_WIDEN);
4809             expand_inc (quotient, tem);
4810             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4811                                 NULL_RTX, 0, OPTAB_WIDEN);
4812             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4813                                 NULL_RTX, 0, OPTAB_WIDEN);
4814             expand_dec (remainder, tem);
4815             emit_label (label);
4816           }
4817         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4818
4819       default:
4820         gcc_unreachable ();
4821       }
4822
4823   if (quotient == 0)
4824     {
4825       if (target && GET_MODE (target) != compute_mode)
4826         target = 0;
4827
4828       if (rem_flag)
4829         {
4830           /* Try to produce the remainder without producing the quotient.
4831              If we seem to have a divmod pattern that does not require widening,
4832              don't try widening here.  We should really have a WIDEN argument
4833              to expand_twoval_binop, since what we'd really like to do here is
4834              1) try a mod insn in compute_mode
4835              2) try a divmod insn in compute_mode
4836              3) try a div insn in compute_mode and multiply-subtract to get
4837                 remainder
4838              4) try the same things with widening allowed.  */
4839           remainder
4840             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4841                                  op0, op1, target,
4842                                  unsignedp,
4843                                  ((optab_handler (optab2, compute_mode)->insn_code
4844                                    != CODE_FOR_nothing)
4845                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4846           if (remainder == 0)
4847             {
4848               /* No luck there.  Can we do remainder and divide at once
4849                  without a library call?  */
4850               remainder = gen_reg_rtx (compute_mode);
4851               if (! expand_twoval_binop ((unsignedp
4852                                           ? udivmod_optab
4853                                           : sdivmod_optab),
4854                                          op0, op1,
4855                                          NULL_RTX, remainder, unsignedp))
4856                 remainder = 0;
4857             }
4858
4859           if (remainder)
4860             return gen_lowpart (mode, remainder);
4861         }
4862
4863       /* Produce the quotient.  Try a quotient insn, but not a library call.
4864          If we have a divmod in this mode, use it in preference to widening
4865          the div (for this test we assume it will not fail). Note that optab2
4866          is set to the one of the two optabs that the call below will use.  */
4867       quotient
4868         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4869                              op0, op1, rem_flag ? NULL_RTX : target,
4870                              unsignedp,
4871                              ((optab_handler (optab2, compute_mode)->insn_code
4872                                != CODE_FOR_nothing)
4873                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4874
4875       if (quotient == 0)
4876         {
4877           /* No luck there.  Try a quotient-and-remainder insn,
4878              keeping the quotient alone.  */
4879           quotient = gen_reg_rtx (compute_mode);
4880           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4881                                      op0, op1,
4882                                      quotient, NULL_RTX, unsignedp))
4883             {
4884               quotient = 0;
4885               if (! rem_flag)
4886                 /* Still no luck.  If we are not computing the remainder,
4887                    use a library call for the quotient.  */
4888                 quotient = sign_expand_binop (compute_mode,
4889                                               udiv_optab, sdiv_optab,
4890                                               op0, op1, target,
4891                                               unsignedp, OPTAB_LIB_WIDEN);
4892             }
4893         }
4894     }
4895
4896   if (rem_flag)
4897     {
4898       if (target && GET_MODE (target) != compute_mode)
4899         target = 0;
4900
4901       if (quotient == 0)
4902         {
4903           /* No divide instruction either.  Use library for remainder.  */
4904           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4905                                          op0, op1, target,
4906                                          unsignedp, OPTAB_LIB_WIDEN);
4907           /* No remainder function.  Try a quotient-and-remainder
4908              function, keeping the remainder.  */
4909           if (!remainder)
4910             {
4911               remainder = gen_reg_rtx (compute_mode);
4912               if (!expand_twoval_binop_libfunc
4913                   (unsignedp ? udivmod_optab : sdivmod_optab,
4914                    op0, op1,
4915                    NULL_RTX, remainder,
4916                    unsignedp ? UMOD : MOD))
4917                 remainder = NULL_RTX;
4918             }
4919         }
4920       else
4921         {
4922           /* We divided.  Now finish doing X - Y * (X / Y).  */
4923           remainder = expand_mult (compute_mode, quotient, op1,
4924                                    NULL_RTX, unsignedp);
4925           remainder = expand_binop (compute_mode, sub_optab, op0,
4926                                     remainder, target, unsignedp,
4927                                     OPTAB_LIB_WIDEN);
4928         }
4929     }
4930
4931   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4932 }
4933 \f
4934 /* Return a tree node with data type TYPE, describing the value of X.
4935    Usually this is an VAR_DECL, if there is no obvious better choice.
4936    X may be an expression, however we only support those expressions
4937    generated by loop.c.  */
4938
4939 tree
4940 make_tree (tree type, rtx x)
4941 {
4942   tree t;
4943
4944   switch (GET_CODE (x))
4945     {
4946     case CONST_INT:
4947       {
4948         HOST_WIDE_INT hi = 0;
4949
4950         if (INTVAL (x) < 0
4951             && !(TYPE_UNSIGNED (type)
4952                  && (GET_MODE_BITSIZE (TYPE_MODE (type))
4953                      < HOST_BITS_PER_WIDE_INT)))
4954           hi = -1;
4955
4956         t = build_int_cst_wide (type, INTVAL (x), hi);
4957
4958         return t;
4959       }
4960
4961     case CONST_DOUBLE:
4962       if (GET_MODE (x) == VOIDmode)
4963         t = build_int_cst_wide (type,
4964                                 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4965       else
4966         {
4967           REAL_VALUE_TYPE d;
4968
4969           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4970           t = build_real (type, d);
4971         }
4972
4973       return t;
4974
4975     case CONST_VECTOR:
4976       {
4977         int units = CONST_VECTOR_NUNITS (x);
4978         tree itype = TREE_TYPE (type);
4979         tree t = NULL_TREE;
4980         int i;
4981
4982
4983         /* Build a tree with vector elements.  */
4984         for (i = units - 1; i >= 0; --i)
4985           {
4986             rtx elt = CONST_VECTOR_ELT (x, i);
4987             t = tree_cons (NULL_TREE, make_tree (itype, elt), t);
4988           }
4989
4990         return build_vector (type, t);
4991       }
4992
4993     case PLUS:
4994       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4995                           make_tree (type, XEXP (x, 1)));
4996
4997     case MINUS:
4998       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4999                           make_tree (type, XEXP (x, 1)));
5000
5001     case NEG:
5002       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5003
5004     case MULT:
5005       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5006                           make_tree (type, XEXP (x, 1)));
5007
5008     case ASHIFT:
5009       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5010                           make_tree (type, XEXP (x, 1)));
5011
5012     case LSHIFTRT:
5013       t = unsigned_type_for (type);
5014       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5015                                          make_tree (t, XEXP (x, 0)),
5016                                          make_tree (type, XEXP (x, 1))));
5017
5018     case ASHIFTRT:
5019       t = signed_type_for (type);
5020       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5021                                          make_tree (t, XEXP (x, 0)),
5022                                          make_tree (type, XEXP (x, 1))));
5023
5024     case DIV:
5025       if (TREE_CODE (type) != REAL_TYPE)
5026         t = signed_type_for (type);
5027       else
5028         t = type;
5029
5030       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5031                                          make_tree (t, XEXP (x, 0)),
5032                                          make_tree (t, XEXP (x, 1))));
5033     case UDIV:
5034       t = unsigned_type_for (type);
5035       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5036                                          make_tree (t, XEXP (x, 0)),
5037                                          make_tree (t, XEXP (x, 1))));
5038
5039     case SIGN_EXTEND:
5040     case ZERO_EXTEND:
5041       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5042                                           GET_CODE (x) == ZERO_EXTEND);
5043       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5044
5045     case CONST:
5046       return make_tree (type, XEXP (x, 0));
5047
5048     case SYMBOL_REF:
5049       t = SYMBOL_REF_DECL (x);
5050       if (t)
5051         return fold_convert (type, build_fold_addr_expr (t));
5052       /* else fall through.  */
5053
5054     default:
5055       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5056
5057       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5058          address mode to pointer mode.  */
5059       if (POINTER_TYPE_P (type))
5060         x = convert_memory_address_addr_space
5061               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5062
5063       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5064          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5065       t->decl_with_rtl.rtl = x;
5066
5067       return t;
5068     }
5069 }
5070 \f
5071 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5072    and returning TARGET.
5073
5074    If TARGET is 0, a pseudo-register or constant is returned.  */
5075
5076 rtx
5077 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5078 {
5079   rtx tem = 0;
5080
5081   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5082     tem = simplify_binary_operation (AND, mode, op0, op1);
5083   if (tem == 0)
5084     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5085
5086   if (target == 0)
5087     target = tem;
5088   else if (tem != target)
5089     emit_move_insn (target, tem);
5090   return target;
5091 }
5092
5093 /* Helper function for emit_store_flag.  */
5094 static rtx
5095 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5096              enum machine_mode mode, enum machine_mode compare_mode,
5097              int unsignedp, rtx x, rtx y, int normalizep,
5098              enum machine_mode target_mode)
5099 {
5100   rtx op0, last, comparison, subtarget, pattern;
5101   enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode;
5102
5103   last = get_last_insn ();
5104   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5105   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5106   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5107   if (!x || !y
5108       || !insn_data[icode].operand[2].predicate
5109           (x, insn_data[icode].operand[2].mode)
5110       || !insn_data[icode].operand[3].predicate
5111           (y, insn_data[icode].operand[3].mode)
5112       || !insn_data[icode].operand[1].predicate (comparison, VOIDmode))
5113     {
5114       delete_insns_since (last);
5115       return NULL_RTX;
5116     }
5117
5118   if (target_mode == VOIDmode)
5119     target_mode = result_mode;
5120   if (!target)
5121     target = gen_reg_rtx (target_mode);
5122
5123   if (optimize
5124       || !(insn_data[(int) icode].operand[0].predicate (target, result_mode)))
5125     subtarget = gen_reg_rtx (result_mode);
5126   else
5127     subtarget = target;
5128
5129   pattern = GEN_FCN (icode) (subtarget, comparison, x, y);
5130   if (!pattern)
5131     return NULL_RTX;
5132   emit_insn (pattern);
5133
5134   /* If we are converting to a wider mode, first convert to
5135      TARGET_MODE, then normalize.  This produces better combining
5136      opportunities on machines that have a SIGN_EXTRACT when we are
5137      testing a single bit.  This mostly benefits the 68k.
5138
5139      If STORE_FLAG_VALUE does not have the sign bit set when
5140      interpreted in MODE, we can do this conversion as unsigned, which
5141      is usually more efficient.  */
5142   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5143     {
5144       convert_move (target, subtarget,
5145                     (GET_MODE_BITSIZE (result_mode) <= HOST_BITS_PER_WIDE_INT)
5146                     && 0 == (STORE_FLAG_VALUE
5147                              & ((HOST_WIDE_INT) 1
5148                                 << (GET_MODE_BITSIZE (result_mode) -1))));
5149       op0 = target;
5150       result_mode = target_mode;
5151     }
5152   else
5153     op0 = subtarget;
5154
5155   /* If we want to keep subexpressions around, don't reuse our last
5156      target.  */
5157   if (optimize)
5158     subtarget = 0;
5159
5160   /* Now normalize to the proper value in MODE.  Sometimes we don't
5161      have to do anything.  */
5162   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5163     ;
5164   /* STORE_FLAG_VALUE might be the most negative number, so write
5165      the comparison this way to avoid a compiler-time warning.  */
5166   else if (- normalizep == STORE_FLAG_VALUE)
5167     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5168
5169   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5170      it hard to use a value of just the sign bit due to ANSI integer
5171      constant typing rules.  */
5172   else if (GET_MODE_BITSIZE (result_mode) <= HOST_BITS_PER_WIDE_INT
5173            && (STORE_FLAG_VALUE
5174                & ((HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (result_mode) - 1))))
5175     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5176                         size_int (GET_MODE_BITSIZE (result_mode) - 1), subtarget,
5177                         normalizep == 1);
5178   else
5179     {
5180       gcc_assert (STORE_FLAG_VALUE & 1);
5181
5182       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5183       if (normalizep == -1)
5184         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5185     }
5186
5187   /* If we were converting to a smaller mode, do the conversion now.  */
5188   if (target_mode != result_mode)
5189     {
5190       convert_move (target, op0, 0);
5191       return target;
5192     }
5193   else
5194     return op0;
5195 }
5196
5197
5198 /* A subroutine of emit_store_flag only including "tricks" that do not
5199    need a recursive call.  These are kept separate to avoid infinite
5200    loops.  */
5201
5202 static rtx
5203 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5204                    enum machine_mode mode, int unsignedp, int normalizep,
5205                    enum machine_mode target_mode)
5206 {
5207   rtx subtarget;
5208   enum insn_code icode;
5209   enum machine_mode compare_mode;
5210   enum mode_class mclass;
5211   enum rtx_code scode;
5212   rtx tem;
5213
5214   if (unsignedp)
5215     code = unsigned_condition (code);
5216   scode = swap_condition (code);
5217
5218   /* If one operand is constant, make it the second one.  Only do this
5219      if the other operand is not constant as well.  */
5220
5221   if (swap_commutative_operands_p (op0, op1))
5222     {
5223       tem = op0;
5224       op0 = op1;
5225       op1 = tem;
5226       code = swap_condition (code);
5227     }
5228
5229   if (mode == VOIDmode)
5230     mode = GET_MODE (op0);
5231
5232   /* For some comparisons with 1 and -1, we can convert this to
5233      comparisons with zero.  This will often produce more opportunities for
5234      store-flag insns.  */
5235
5236   switch (code)
5237     {
5238     case LT:
5239       if (op1 == const1_rtx)
5240         op1 = const0_rtx, code = LE;
5241       break;
5242     case LE:
5243       if (op1 == constm1_rtx)
5244         op1 = const0_rtx, code = LT;
5245       break;
5246     case GE:
5247       if (op1 == const1_rtx)
5248         op1 = const0_rtx, code = GT;
5249       break;
5250     case GT:
5251       if (op1 == constm1_rtx)
5252         op1 = const0_rtx, code = GE;
5253       break;
5254     case GEU:
5255       if (op1 == const1_rtx)
5256         op1 = const0_rtx, code = NE;
5257       break;
5258     case LTU:
5259       if (op1 == const1_rtx)
5260         op1 = const0_rtx, code = EQ;
5261       break;
5262     default:
5263       break;
5264     }
5265
5266   /* If we are comparing a double-word integer with zero or -1, we can
5267      convert the comparison into one involving a single word.  */
5268   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5269       && GET_MODE_CLASS (mode) == MODE_INT
5270       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5271     {
5272       if ((code == EQ || code == NE)
5273           && (op1 == const0_rtx || op1 == constm1_rtx))
5274         {
5275           rtx op00, op01;
5276
5277           /* Do a logical OR or AND of the two words and compare the
5278              result.  */
5279           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5280           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5281           tem = expand_binop (word_mode,
5282                               op1 == const0_rtx ? ior_optab : and_optab,
5283                               op00, op01, NULL_RTX, unsignedp,
5284                               OPTAB_DIRECT);
5285
5286           if (tem != 0)
5287             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5288                                    unsignedp, normalizep);
5289         }
5290       else if ((code == LT || code == GE) && op1 == const0_rtx)
5291         {
5292           rtx op0h;
5293
5294           /* If testing the sign bit, can just test on high word.  */
5295           op0h = simplify_gen_subreg (word_mode, op0, mode,
5296                                       subreg_highpart_offset (word_mode,
5297                                                               mode));
5298           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5299                                  unsignedp, normalizep);
5300         }
5301       else
5302         tem = NULL_RTX;
5303
5304       if (tem)
5305         {
5306           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5307             return tem;
5308           if (!target)
5309             target = gen_reg_rtx (target_mode);
5310
5311           convert_move (target, tem,
5312                         0 == ((normalizep ? normalizep : STORE_FLAG_VALUE)
5313                               & ((HOST_WIDE_INT) 1
5314                                  << (GET_MODE_BITSIZE (word_mode) -1))));
5315           return target;
5316         }
5317     }
5318
5319   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5320      complement of A (for GE) and shifting the sign bit to the low bit.  */
5321   if (op1 == const0_rtx && (code == LT || code == GE)
5322       && GET_MODE_CLASS (mode) == MODE_INT
5323       && (normalizep || STORE_FLAG_VALUE == 1
5324           || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5325               && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5326                   == ((unsigned HOST_WIDE_INT) 1
5327                       << (GET_MODE_BITSIZE (mode) - 1))))))
5328     {
5329       subtarget = target;
5330
5331       if (!target)
5332         target_mode = mode;
5333
5334       /* If the result is to be wider than OP0, it is best to convert it
5335          first.  If it is to be narrower, it is *incorrect* to convert it
5336          first.  */
5337       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5338         {
5339           op0 = convert_modes (target_mode, mode, op0, 0);
5340           mode = target_mode;
5341         }
5342
5343       if (target_mode != mode)
5344         subtarget = 0;
5345
5346       if (code == GE)
5347         op0 = expand_unop (mode, one_cmpl_optab, op0,
5348                            ((STORE_FLAG_VALUE == 1 || normalizep)
5349                             ? 0 : subtarget), 0);
5350
5351       if (STORE_FLAG_VALUE == 1 || normalizep)
5352         /* If we are supposed to produce a 0/1 value, we want to do
5353            a logical shift from the sign bit to the low-order bit; for
5354            a -1/0 value, we do an arithmetic shift.  */
5355         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5356                             size_int (GET_MODE_BITSIZE (mode) - 1),
5357                             subtarget, normalizep != -1);
5358
5359       if (mode != target_mode)
5360         op0 = convert_modes (target_mode, mode, op0, 0);
5361
5362       return op0;
5363     }
5364
5365   mclass = GET_MODE_CLASS (mode);
5366   for (compare_mode = mode; compare_mode != VOIDmode;
5367        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5368     {
5369      enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5370      icode = optab_handler (cstore_optab, optab_mode)->insn_code;
5371      if (icode != CODE_FOR_nothing)
5372         {
5373           do_pending_stack_adjust ();
5374           tem = emit_cstore (target, icode, code, mode, compare_mode,
5375                              unsignedp, op0, op1, normalizep, target_mode);
5376           if (tem)
5377             return tem;
5378
5379           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5380             {
5381               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5382                                  unsignedp, op1, op0, normalizep, target_mode);
5383               if (tem)
5384                 return tem;
5385             }
5386           break;
5387         }
5388     }
5389
5390   return 0;
5391 }
5392
5393 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5394    and storing in TARGET.  Normally return TARGET.
5395    Return 0 if that cannot be done.
5396
5397    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5398    it is VOIDmode, they cannot both be CONST_INT.
5399
5400    UNSIGNEDP is for the case where we have to widen the operands
5401    to perform the operation.  It says to use zero-extension.
5402
5403    NORMALIZEP is 1 if we should convert the result to be either zero
5404    or one.  Normalize is -1 if we should convert the result to be
5405    either zero or -1.  If NORMALIZEP is zero, the result will be left
5406    "raw" out of the scc insn.  */
5407
5408 rtx
5409 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5410                  enum machine_mode mode, int unsignedp, int normalizep)
5411 {
5412   enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5413   enum rtx_code rcode;
5414   rtx subtarget;
5415   rtx tem, last, trueval;
5416
5417   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5418                            target_mode);
5419   if (tem)
5420     return tem;
5421
5422   /* If we reached here, we can't do this with a scc insn, however there
5423      are some comparisons that can be done in other ways.  Don't do any
5424      of these cases if branches are very cheap.  */
5425   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5426     return 0;
5427
5428   /* See what we need to return.  We can only return a 1, -1, or the
5429      sign bit.  */
5430
5431   if (normalizep == 0)
5432     {
5433       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5434         normalizep = STORE_FLAG_VALUE;
5435
5436       else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5437                && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5438                    == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
5439         ;
5440       else
5441         return 0;
5442     }
5443
5444   last = get_last_insn ();
5445
5446   /* If optimizing, use different pseudo registers for each insn, instead
5447      of reusing the same pseudo.  This leads to better CSE, but slows
5448      down the compiler, since there are more pseudos */
5449   subtarget = (!optimize
5450                && (target_mode == mode)) ? target : NULL_RTX;
5451   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5452
5453   /* For floating-point comparisons, try the reverse comparison or try
5454      changing the "orderedness" of the comparison.  */
5455   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5456     {
5457       enum rtx_code first_code;
5458       bool and_them;
5459
5460       rcode = reverse_condition_maybe_unordered (code);
5461       if (can_compare_p (rcode, mode, ccp_store_flag)
5462           && (code == ORDERED || code == UNORDERED
5463               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5464               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5465         {
5466           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5467                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5468
5469           /* For the reverse comparison, use either an addition or a XOR.  */
5470           if (want_add
5471               && rtx_cost (GEN_INT (normalizep), PLUS,
5472                            optimize_insn_for_speed_p ()) == 0)
5473             {
5474               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5475                                        STORE_FLAG_VALUE, target_mode);
5476               if (tem)
5477                 return expand_binop (target_mode, add_optab, tem,
5478                                      GEN_INT (normalizep),
5479                                      target, 0, OPTAB_WIDEN);
5480             }
5481           else if (!want_add
5482                    && rtx_cost (trueval, XOR,
5483                                 optimize_insn_for_speed_p ()) == 0)
5484             {
5485               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5486                                        normalizep, target_mode);
5487               if (tem)
5488                 return expand_binop (target_mode, xor_optab, tem, trueval,
5489                                      target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5490             }
5491         }
5492
5493       delete_insns_since (last);
5494
5495       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5496       if (code == ORDERED || code == UNORDERED)
5497         return 0;
5498
5499       and_them = split_comparison (code, mode, &first_code, &code);
5500
5501       /* If there are no NaNs, the first comparison should always fall through.
5502          Effectively change the comparison to the other one.  */
5503       if (!HONOR_NANS (mode))
5504         {
5505           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5506           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5507                                     target_mode);
5508         }
5509
5510 #ifdef HAVE_conditional_move
5511       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5512          conditional move.  */
5513       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5514                                normalizep, target_mode);
5515       if (tem == 0)
5516         return 0;
5517
5518       if (and_them)
5519         tem = emit_conditional_move (target, code, op0, op1, mode,
5520                                      tem, const0_rtx, GET_MODE (tem), 0);
5521       else
5522         tem = emit_conditional_move (target, code, op0, op1, mode,
5523                                      trueval, tem, GET_MODE (tem), 0);
5524
5525       if (tem == 0)
5526         delete_insns_since (last);
5527       return tem;
5528 #else
5529       return 0;
5530 #endif
5531     }
5532
5533   /* The remaining tricks only apply to integer comparisons.  */
5534
5535   if (GET_MODE_CLASS (mode) != MODE_INT)
5536     return 0;
5537
5538   /* If this is an equality comparison of integers, we can try to exclusive-or
5539      (or subtract) the two operands and use a recursive call to try the
5540      comparison with zero.  Don't do any of these cases if branches are
5541      very cheap.  */
5542
5543   if ((code == EQ || code == NE) && op1 != const0_rtx)
5544     {
5545       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5546                           OPTAB_WIDEN);
5547
5548       if (tem == 0)
5549         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5550                             OPTAB_WIDEN);
5551       if (tem != 0)
5552         tem = emit_store_flag (target, code, tem, const0_rtx,
5553                                mode, unsignedp, normalizep);
5554       if (tem != 0)
5555         return tem;
5556
5557       delete_insns_since (last);
5558     }
5559
5560   /* For integer comparisons, try the reverse comparison.  However, for
5561      small X and if we'd have anyway to extend, implementing "X != 0"
5562      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5563   rcode = reverse_condition (code);
5564   if (can_compare_p (rcode, mode, ccp_store_flag)
5565       && ! (optab_handler (cstore_optab, mode)->insn_code == CODE_FOR_nothing
5566             && code == NE
5567             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5568             && op1 == const0_rtx))
5569     {
5570       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5571                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5572
5573       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5574       if (want_add
5575           && rtx_cost (GEN_INT (normalizep), PLUS,
5576                        optimize_insn_for_speed_p ()) == 0)
5577         {
5578           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5579                                    STORE_FLAG_VALUE, target_mode);
5580           if (tem != 0)
5581             tem = expand_binop (target_mode, add_optab, tem,
5582                                 GEN_INT (normalizep), target, 0, OPTAB_WIDEN);
5583         }
5584       else if (!want_add
5585                && rtx_cost (trueval, XOR,
5586                             optimize_insn_for_speed_p ()) == 0)
5587         {
5588           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5589                                    normalizep, target_mode);
5590           if (tem != 0)
5591             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5592                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5593         }
5594
5595       if (tem != 0)
5596         return tem;
5597       delete_insns_since (last);
5598     }
5599
5600   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5601      the constant zero.  Reject all other comparisons at this point.  Only
5602      do LE and GT if branches are expensive since they are expensive on
5603      2-operand machines.  */
5604
5605   if (op1 != const0_rtx
5606       || (code != EQ && code != NE
5607           && (BRANCH_COST (optimize_insn_for_speed_p (),
5608                            false) <= 1 || (code != LE && code != GT))))
5609     return 0;
5610
5611   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5612      do the necessary operation below.  */
5613
5614   tem = 0;
5615
5616   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5617      the sign bit set.  */
5618
5619   if (code == LE)
5620     {
5621       /* This is destructive, so SUBTARGET can't be OP0.  */
5622       if (rtx_equal_p (subtarget, op0))
5623         subtarget = 0;
5624
5625       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5626                           OPTAB_WIDEN);
5627       if (tem)
5628         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5629                             OPTAB_WIDEN);
5630     }
5631
5632   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5633      number of bits in the mode of OP0, minus one.  */
5634
5635   if (code == GT)
5636     {
5637       if (rtx_equal_p (subtarget, op0))
5638         subtarget = 0;
5639
5640       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5641                           size_int (GET_MODE_BITSIZE (mode) - 1),
5642                           subtarget, 0);
5643       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5644                           OPTAB_WIDEN);
5645     }
5646
5647   if (code == EQ || code == NE)
5648     {
5649       /* For EQ or NE, one way to do the comparison is to apply an operation
5650          that converts the operand into a positive number if it is nonzero
5651          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5652          for NE we negate.  This puts the result in the sign bit.  Then we
5653          normalize with a shift, if needed.
5654
5655          Two operations that can do the above actions are ABS and FFS, so try
5656          them.  If that doesn't work, and MODE is smaller than a full word,
5657          we can use zero-extension to the wider mode (an unsigned conversion)
5658          as the operation.  */
5659
5660       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5661          that is compensated by the subsequent overflow when subtracting
5662          one / negating.  */
5663
5664       if (optab_handler (abs_optab, mode)->insn_code != CODE_FOR_nothing)
5665         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5666       else if (optab_handler (ffs_optab, mode)->insn_code != CODE_FOR_nothing)
5667         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5668       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5669         {
5670           tem = convert_modes (word_mode, mode, op0, 1);
5671           mode = word_mode;
5672         }
5673
5674       if (tem != 0)
5675         {
5676           if (code == EQ)
5677             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5678                                 0, OPTAB_WIDEN);
5679           else
5680             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5681         }
5682
5683       /* If we couldn't do it that way, for NE we can "or" the two's complement
5684          of the value with itself.  For EQ, we take the one's complement of
5685          that "or", which is an extra insn, so we only handle EQ if branches
5686          are expensive.  */
5687
5688       if (tem == 0
5689           && (code == NE
5690               || BRANCH_COST (optimize_insn_for_speed_p (),
5691                               false) > 1))
5692         {
5693           if (rtx_equal_p (subtarget, op0))
5694             subtarget = 0;
5695
5696           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5697           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5698                               OPTAB_WIDEN);
5699
5700           if (tem && code == EQ)
5701             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5702         }
5703     }
5704
5705   if (tem && normalizep)
5706     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5707                         size_int (GET_MODE_BITSIZE (mode) - 1),
5708                         subtarget, normalizep == 1);
5709
5710   if (tem)
5711     {
5712       if (!target)
5713         ;
5714       else if (GET_MODE (tem) != target_mode)
5715         {
5716           convert_move (target, tem, 0);
5717           tem = target;
5718         }
5719       else if (!subtarget)
5720         {
5721           emit_move_insn (target, tem);
5722           tem = target;
5723         }
5724     }
5725   else
5726     delete_insns_since (last);
5727
5728   return tem;
5729 }
5730
5731 /* Like emit_store_flag, but always succeeds.  */
5732
5733 rtx
5734 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5735                        enum machine_mode mode, int unsignedp, int normalizep)
5736 {
5737   rtx tem, label;
5738   rtx trueval, falseval;
5739
5740   /* First see if emit_store_flag can do the job.  */
5741   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5742   if (tem != 0)
5743     return tem;
5744
5745   if (!target)
5746     target = gen_reg_rtx (word_mode);
5747
5748   /* If this failed, we have to do this with set/compare/jump/set code.
5749      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5750   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5751   if (code == NE
5752       && GET_MODE_CLASS (mode) == MODE_INT
5753       && REG_P (target)
5754       && op0 == target
5755       && op1 == const0_rtx)
5756     {
5757       label = gen_label_rtx ();
5758       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5759                                mode, NULL_RTX, NULL_RTX, label, -1);
5760       emit_move_insn (target, trueval);
5761       emit_label (label);
5762       return target;
5763     }
5764
5765   if (!REG_P (target)
5766       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5767     target = gen_reg_rtx (GET_MODE (target));
5768
5769   /* Jump in the right direction if the target cannot implement CODE
5770      but can jump on its reverse condition.  */
5771   falseval = const0_rtx;
5772   if (! can_compare_p (code, mode, ccp_jump)
5773       && (! FLOAT_MODE_P (mode)
5774           || code == ORDERED || code == UNORDERED
5775           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5776           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5777     {
5778       enum rtx_code rcode;
5779       if (FLOAT_MODE_P (mode))
5780         rcode = reverse_condition_maybe_unordered (code);
5781       else
5782         rcode = reverse_condition (code);
5783
5784       /* Canonicalize to UNORDERED for the libcall.  */
5785       if (can_compare_p (rcode, mode, ccp_jump)
5786           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5787         {
5788           falseval = trueval;
5789           trueval = const0_rtx;
5790           code = rcode;
5791         }
5792     }
5793
5794   emit_move_insn (target, trueval);
5795   label = gen_label_rtx ();
5796   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5797                            NULL_RTX, label, -1);
5798
5799   emit_move_insn (target, falseval);
5800   emit_label (label);
5801
5802   return target;
5803 }
5804 \f
5805 /* Perform possibly multi-word comparison and conditional jump to LABEL
5806    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5807    now a thin wrapper around do_compare_rtx_and_jump.  */
5808
5809 static void
5810 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5811                  rtx label)
5812 {
5813   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5814   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5815                            NULL_RTX, NULL_RTX, label, -1);
5816 }