gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007
   5    Free Software Foundation, Inc.
   6
   7 This file is part of GCC.
   8
   9 GCC is free software; you can redistribute it and/or modify it under
  10 the terms of the GNU General Public License as published by the Free
  11 Software Foundation; either version 3, or (at your option) any later
  12 version.
  13
  14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  16 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  17 for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with GCC; see the file COPYING3.  If not see
  21 <http://www.gnu.org/licenses/>.  */
  22
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "coretypes.h"
  27 #include "tm.h"
  28 #include "toplev.h"
  29 #include "rtl.h"
  30 #include "tree.h"
  31 #include "tm_p.h"
  32 #include "flags.h"
  33 #include "insn-config.h"
  34 #include "expr.h"
  35 #include "optabs.h"
  36 #include "real.h"
  37 #include "recog.h"
  38 #include "langhooks.h"
  39 #include "df.h"
  40 #include "target.h"
  41
  42 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  43                                    unsigned HOST_WIDE_INT,
  44                                    unsigned HOST_WIDE_INT, rtx);
  45 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  46                                    unsigned HOST_WIDE_INT, rtx);
  47 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  48                                     unsigned HOST_WIDE_INT,
  49                                     unsigned HOST_WIDE_INT,
  50                                     unsigned HOST_WIDE_INT, rtx, int);
  51 static rtx mask_rtx (enum machine_mode, int, int, int);
  52 static rtx lshift_value (enum machine_mode, rtx, int, int);
  53 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  54                                     unsigned HOST_WIDE_INT, int);
  55 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  56 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  57 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  58
  59 /* Test whether a value is zero of a power of two.  */
  60 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
  61
  62 /* Nonzero means divides or modulus operations are relatively cheap for
  63    powers of two, so don't use branches; emit the operation instead.
  64    Usually, this will mean that the MD file will emit non-branch
  65    sequences.  */
  66
  67 static bool sdiv_pow2_cheap[NUM_MACHINE_MODES];
  68 static bool smod_pow2_cheap[NUM_MACHINE_MODES];
  69
  70 #ifndef SLOW_UNALIGNED_ACCESS
  71 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  72 #endif
  73
  74 /* For compilers that support multiple targets with different word sizes,
  75    MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
  76    is the H8/300(H) compiler.  */
  77
  78 #ifndef MAX_BITS_PER_WORD
  79 #define MAX_BITS_PER_WORD BITS_PER_WORD
  80 #endif
  81
  82 /* Reduce conditional compilation elsewhere.  */
  83 #ifndef HAVE_insv
  84 #define HAVE_insv       0
  85 #define CODE_FOR_insv   CODE_FOR_nothing
  86 #define gen_insv(a,b,c,d) NULL_RTX
  87 #endif
  88 #ifndef HAVE_extv
  89 #define HAVE_extv       0
  90 #define CODE_FOR_extv   CODE_FOR_nothing
  91 #define gen_extv(a,b,c,d) NULL_RTX
  92 #endif
  93 #ifndef HAVE_extzv
  94 #define HAVE_extzv      0
  95 #define CODE_FOR_extzv  CODE_FOR_nothing
  96 #define gen_extzv(a,b,c,d) NULL_RTX
  97 #endif
  98
  99 /* Cost of various pieces of RTL.  Note that some of these are indexed by
 100    shift count and some by mode.  */
 101 static int zero_cost;
 102 static int add_cost[NUM_MACHINE_MODES];
 103 static int neg_cost[NUM_MACHINE_MODES];
 104 static int shift_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 105 static int shiftadd_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 106 static int shiftsub_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 107 static int mul_cost[NUM_MACHINE_MODES];
 108 static int sdiv_cost[NUM_MACHINE_MODES];
 109 static int udiv_cost[NUM_MACHINE_MODES];
 110 static int mul_widen_cost[NUM_MACHINE_MODES];
 111 static int mul_highpart_cost[NUM_MACHINE_MODES];
 112
 113 void
 114 init_expmed (void)
 115 {
 116   struct
 117   {
 118     struct rtx_def reg;         rtunion reg_fld[2];
 119     struct rtx_def plus;        rtunion plus_fld1;
 120     struct rtx_def neg;
 121     struct rtx_def mult;        rtunion mult_fld1;
 122     struct rtx_def sdiv;        rtunion sdiv_fld1;
 123     struct rtx_def udiv;        rtunion udiv_fld1;
 124     struct rtx_def zext;
 125     struct rtx_def sdiv_32;     rtunion sdiv_32_fld1;
 126     struct rtx_def smod_32;     rtunion smod_32_fld1;
 127     struct rtx_def wide_mult;   rtunion wide_mult_fld1;
 128     struct rtx_def wide_lshr;   rtunion wide_lshr_fld1;
 129     struct rtx_def wide_trunc;
 130     struct rtx_def shift;       rtunion shift_fld1;
 131     struct rtx_def shift_mult;  rtunion shift_mult_fld1;
 132     struct rtx_def shift_add;   rtunion shift_add_fld1;
 133     struct rtx_def shift_sub;   rtunion shift_sub_fld1;
 134   } all;
 135
 136   rtx pow2[MAX_BITS_PER_WORD];
 137   rtx cint[MAX_BITS_PER_WORD];
 138   int m, n;
 139   enum machine_mode mode, wider_mode;
 140
 141   zero_cost = rtx_cost (const0_rtx, 0);
 142
 143   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 144     {
 145       pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 146       cint[m] = GEN_INT (m);
 147     }
 148
 149   memset (&all, 0, sizeof all);
 150
 151   PUT_CODE (&all.reg, REG);
 152   /* Avoid using hard regs in ways which may be unsupported.  */
 153   SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
 154
 155   PUT_CODE (&all.plus, PLUS);
 156   XEXP (&all.plus, 0) = &all.reg;
 157   XEXP (&all.plus, 1) = &all.reg;
 158
 159   PUT_CODE (&all.neg, NEG);
 160   XEXP (&all.neg, 0) = &all.reg;
 161
 162   PUT_CODE (&all.mult, MULT);
 163   XEXP (&all.mult, 0) = &all.reg;
 164   XEXP (&all.mult, 1) = &all.reg;
 165
 166   PUT_CODE (&all.sdiv, DIV);
 167   XEXP (&all.sdiv, 0) = &all.reg;
 168   XEXP (&all.sdiv, 1) = &all.reg;
 169
 170   PUT_CODE (&all.udiv, UDIV);
 171   XEXP (&all.udiv, 0) = &all.reg;
 172   XEXP (&all.udiv, 1) = &all.reg;
 173
 174   PUT_CODE (&all.sdiv_32, DIV);
 175   XEXP (&all.sdiv_32, 0) = &all.reg;
 176   XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
 177
 178   PUT_CODE (&all.smod_32, MOD);
 179   XEXP (&all.smod_32, 0) = &all.reg;
 180   XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
 181
 182   PUT_CODE (&all.zext, ZERO_EXTEND);
 183   XEXP (&all.zext, 0) = &all.reg;
 184
 185   PUT_CODE (&all.wide_mult, MULT);
 186   XEXP (&all.wide_mult, 0) = &all.zext;
 187   XEXP (&all.wide_mult, 1) = &all.zext;
 188
 189   PUT_CODE (&all.wide_lshr, LSHIFTRT);
 190   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
 191
 192   PUT_CODE (&all.wide_trunc, TRUNCATE);
 193   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
 194
 195   PUT_CODE (&all.shift, ASHIFT);
 196   XEXP (&all.shift, 0) = &all.reg;
 197
 198   PUT_CODE (&all.shift_mult, MULT);
 199   XEXP (&all.shift_mult, 0) = &all.reg;
 200
 201   PUT_CODE (&all.shift_add, PLUS);
 202   XEXP (&all.shift_add, 0) = &all.shift_mult;
 203   XEXP (&all.shift_add, 1) = &all.reg;
 204
 205   PUT_CODE (&all.shift_sub, MINUS);
 206   XEXP (&all.shift_sub, 0) = &all.shift_mult;
 207   XEXP (&all.shift_sub, 1) = &all.reg;
 208
 209   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 210        mode != VOIDmode;
 211        mode = GET_MODE_WIDER_MODE (mode))
 212     {
 213       PUT_MODE (&all.reg, mode);
 214       PUT_MODE (&all.plus, mode);
 215       PUT_MODE (&all.neg, mode);
 216       PUT_MODE (&all.mult, mode);
 217       PUT_MODE (&all.sdiv, mode);
 218       PUT_MODE (&all.udiv, mode);
 219       PUT_MODE (&all.sdiv_32, mode);
 220       PUT_MODE (&all.smod_32, mode);
 221       PUT_MODE (&all.wide_trunc, mode);
 222       PUT_MODE (&all.shift, mode);
 223       PUT_MODE (&all.shift_mult, mode);
 224       PUT_MODE (&all.shift_add, mode);
 225       PUT_MODE (&all.shift_sub, mode);
 226
 227       add_cost[mode] = rtx_cost (&all.plus, SET);
 228       neg_cost[mode] = rtx_cost (&all.neg, SET);
 229       mul_cost[mode] = rtx_cost (&all.mult, SET);
 230       sdiv_cost[mode] = rtx_cost (&all.sdiv, SET);
 231       udiv_cost[mode] = rtx_cost (&all.udiv, SET);
 232
 233       sdiv_pow2_cheap[mode] = (rtx_cost (&all.sdiv_32, SET)
 234                                <= 2 * add_cost[mode]);
 235       smod_pow2_cheap[mode] = (rtx_cost (&all.smod_32, SET)
 236                                <= 4 * add_cost[mode]);
 237
 238       wider_mode = GET_MODE_WIDER_MODE (mode);
 239       if (wider_mode != VOIDmode)
 240         {
 241           PUT_MODE (&all.zext, wider_mode);
 242           PUT_MODE (&all.wide_mult, wider_mode);
 243           PUT_MODE (&all.wide_lshr, wider_mode);
 244           XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
 245
 246           mul_widen_cost[wider_mode] = rtx_cost (&all.wide_mult, SET);
 247           mul_highpart_cost[mode] = rtx_cost (&all.wide_trunc, SET);
 248         }
 249
 250       shift_cost[mode][0] = 0;
 251       shiftadd_cost[mode][0] = shiftsub_cost[mode][0] = add_cost[mode];
 252
 253       n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
 254       for (m = 1; m < n; m++)
 255         {
 256           XEXP (&all.shift, 1) = cint[m];
 257           XEXP (&all.shift_mult, 1) = pow2[m];
 258
 259           shift_cost[mode][m] = rtx_cost (&all.shift, SET);
 260           shiftadd_cost[mode][m] = rtx_cost (&all.shift_add, SET);
 261           shiftsub_cost[mode][m] = rtx_cost (&all.shift_sub, SET);
 262         }
 263     }
 264 }
 265
 266 /* Return an rtx representing minus the value of X.
 267    MODE is the intended mode of the result,
 268    useful if X is a CONST_INT.  */
 269
 270 rtx
 271 negate_rtx (enum machine_mode mode, rtx x)
 272 {
 273   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 274
 275   if (result == 0)
 276     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 277
 278   return result;
 279 }
 280
 281 /* Report on the availability of insv/extv/extzv and the desired mode
 282    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
 283    is false; else the mode of the specified operand.  If OPNO is -1,
 284    all the caller cares about is whether the insn is available.  */
 285 enum machine_mode
 286 mode_for_extraction (enum extraction_pattern pattern, int opno)
 287 {
 288   const struct insn_data *data;
 289
 290   switch (pattern)
 291     {
 292     case EP_insv:
 293       if (HAVE_insv)
 294         {
 295           data = &insn_data[CODE_FOR_insv];
 296           break;
 297         }
 298       return MAX_MACHINE_MODE;
 299
 300     case EP_extv:
 301       if (HAVE_extv)
 302         {
 303           data = &insn_data[CODE_FOR_extv];
 304           break;
 305         }
 306       return MAX_MACHINE_MODE;
 307
 308     case EP_extzv:
 309       if (HAVE_extzv)
 310         {
 311           data = &insn_data[CODE_FOR_extzv];
 312           break;
 313         }
 314       return MAX_MACHINE_MODE;
 315
 316     default:
 317       gcc_unreachable ();
 318     }
 319
 320   if (opno == -1)
 321     return VOIDmode;
 322
 323   /* Everyone who uses this function used to follow it with
 324      if (result == VOIDmode) result = word_mode; */
 325   if (data->operand[opno].mode == VOIDmode)
 326     return word_mode;
 327   return data->operand[opno].mode;
 328 }
 329
 330 /* Return true if X, of mode MODE, matches the predicate for operand
 331    OPNO of instruction ICODE.  Allow volatile memories, regardless of
 332    the ambient volatile_ok setting.  */
 333
 334 static bool
 335 check_predicate_volatile_ok (enum insn_code icode, int opno,
 336                              rtx x, enum machine_mode mode)
 337 {
 338   bool save_volatile_ok, result;
 339
 340   save_volatile_ok = volatile_ok;
 341   result = insn_data[(int) icode].operand[opno].predicate (x, mode);
 342   volatile_ok = save_volatile_ok;
 343   return result;
 344 }
 345 \f
 346 /* A subroutine of store_bit_field, with the same arguments.  Return true
 347    if the operation could be implemented.
 348
 349    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 350    no other way of implementing the operation.  If FALLBACK_P is false,
 351    return false instead.  */
 352
 353 static bool
 354 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 355                    unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
 356                    rtx value, bool fallback_p)
 357 {
 358   unsigned int unit
 359     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
 360   unsigned HOST_WIDE_INT offset, bitpos;
 361   rtx op0 = str_rtx;
 362   int byte_offset;
 363   rtx orig_value;
 364
 365   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
 366
 367   while (GET_CODE (op0) == SUBREG)
 368     {
 369       /* The following line once was done only if WORDS_BIG_ENDIAN,
 370          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 371          meaningful at a much higher level; when structures are copied
 372          between memory and regs, the higher-numbered regs
 373          always get higher addresses.  */
 374       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 375       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 376
 377       byte_offset = 0;
 378
 379       /* Paradoxical subregs need special handling on big endian machines.  */
 380       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 381         {
 382           int difference = inner_mode_size - outer_mode_size;
 383
 384           if (WORDS_BIG_ENDIAN)
 385             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 386           if (BYTES_BIG_ENDIAN)
 387             byte_offset += difference % UNITS_PER_WORD;
 388         }
 389       else
 390         byte_offset = SUBREG_BYTE (op0);
 391
 392       bitnum += byte_offset * BITS_PER_UNIT;
 393       op0 = SUBREG_REG (op0);
 394     }
 395
 396   /* No action is needed if the target is a register and if the field
 397      lies completely outside that register.  This can occur if the source
 398      code contains an out-of-bounds access to a small array.  */
 399   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 400     return true;
 401
 402   /* Use vec_set patterns for inserting parts of vectors whenever
 403      available.  */
 404   if (VECTOR_MODE_P (GET_MODE (op0))
 405       && !MEM_P (op0)
 406       && (optab_handler (vec_set_optab, GET_MODE (op0))->insn_code
 407           != CODE_FOR_nothing)
 408       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 409       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 410       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 411     {
 412       enum machine_mode outermode = GET_MODE (op0);
 413       enum machine_mode innermode = GET_MODE_INNER (outermode);
 414       int icode = (int) optab_handler (vec_set_optab, outermode)->insn_code;
 415       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 416       rtx rtxpos = GEN_INT (pos);
 417       rtx src = value;
 418       rtx dest = op0;
 419       rtx pat, seq;
 420       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
 421       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
 422       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
 423
 424       start_sequence ();
 425
 426       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
 427         src = copy_to_mode_reg (mode1, src);
 428
 429       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
 430         rtxpos = copy_to_mode_reg (mode1, rtxpos);
 431
 432       /* We could handle this, but we should always be called with a pseudo
 433          for our targets and all insns should take them as outputs.  */
 434       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
 435                   && (*insn_data[icode].operand[1].predicate) (src, mode1)
 436                   && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
 437       pat = GEN_FCN (icode) (dest, src, rtxpos);
 438       seq = get_insns ();
 439       end_sequence ();
 440       if (pat)
 441         {
 442           emit_insn (seq);
 443           emit_insn (pat);
 444           return true;
 445         }
 446     }
 447
 448   /* If the target is a register, overwriting the entire object, or storing
 449      a full-word or multi-word field can be done with just a SUBREG.
 450
 451      If the target is memory, storing any naturally aligned field can be
 452      done with a simple store.  For targets that support fast unaligned
 453      memory, any naturally sized, unit aligned field can be done directly.  */
 454
 455   offset = bitnum / unit;
 456   bitpos = bitnum % unit;
 457   byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 458                 + (offset * UNITS_PER_WORD);
 459
 460   if (bitpos == 0
 461       && bitsize == GET_MODE_BITSIZE (fieldmode)
 462       && (!MEM_P (op0)
 463           ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 464              || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
 465              && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
 466           : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 467              || (offset * BITS_PER_UNIT % bitsize == 0
 468                  && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
 469     {
 470       if (MEM_P (op0))
 471         op0 = adjust_address (op0, fieldmode, offset);
 472       else if (GET_MODE (op0) != fieldmode)
 473         op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 474                                    byte_offset);
 475       emit_move_insn (op0, value);
 476       return true;
 477     }
 478
 479   /* Make sure we are playing with integral modes.  Pun with subregs
 480      if we aren't.  This must come after the entire register case above,
 481      since that case is valid for any mode.  The following cases are only
 482      valid for integral modes.  */
 483   {
 484     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 485     if (imode != GET_MODE (op0))
 486       {
 487         if (MEM_P (op0))
 488           op0 = adjust_address (op0, imode, 0);
 489         else
 490           {
 491             gcc_assert (imode != BLKmode);
 492             op0 = gen_lowpart (imode, op0);
 493           }
 494       }
 495   }
 496
 497   /* We may be accessing data outside the field, which means
 498      we can alias adjacent data.  */
 499   if (MEM_P (op0))
 500     {
 501       op0 = shallow_copy_rtx (op0);
 502       set_mem_alias_set (op0, 0);
 503       set_mem_expr (op0, 0);
 504     }
 505
 506   /* If OP0 is a register, BITPOS must count within a word.
 507      But as we have it, it counts within whatever size OP0 now has.
 508      On a bigendian machine, these are not the same, so convert.  */
 509   if (BYTES_BIG_ENDIAN
 510       && !MEM_P (op0)
 511       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 512     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 513
 514   /* Storing an lsb-aligned field in a register
 515      can be done with a movestrict instruction.  */
 516
 517   if (!MEM_P (op0)
 518       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 519       && bitsize == GET_MODE_BITSIZE (fieldmode)
 520       && (optab_handler (movstrict_optab, fieldmode)->insn_code
 521           != CODE_FOR_nothing))
 522     {
 523       int icode = optab_handler (movstrict_optab, fieldmode)->insn_code;
 524
 525       /* Get appropriate low part of the value being stored.  */
 526       if (GET_CODE (value) == CONST_INT || REG_P (value))
 527         value = gen_lowpart (fieldmode, value);
 528       else if (!(GET_CODE (value) == SYMBOL_REF
 529                  || GET_CODE (value) == LABEL_REF
 530                  || GET_CODE (value) == CONST))
 531         value = convert_to_mode (fieldmode, value, 0);
 532
 533       if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
 534         value = copy_to_mode_reg (fieldmode, value);
 535
 536       if (GET_CODE (op0) == SUBREG)
 537         {
 538           /* Else we've got some float mode source being extracted into
 539              a different float mode destination -- this combination of
 540              subregs results in Severe Tire Damage.  */
 541           gcc_assert (GET_MODE (SUBREG_REG (op0)) == fieldmode
 542                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 543                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 544           op0 = SUBREG_REG (op0);
 545         }
 546
 547       emit_insn (GEN_FCN (icode)
 548                  (gen_rtx_SUBREG (fieldmode, op0,
 549                                   (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 550                                   + (offset * UNITS_PER_WORD)),
 551                                   value));
 552
 553       return true;
 554     }
 555
 556   /* Handle fields bigger than a word.  */
 557
 558   if (bitsize > BITS_PER_WORD)
 559     {
 560       /* Here we transfer the words of the field
 561          in the order least significant first.
 562          This is because the most significant word is the one which may
 563          be less than full.
 564          However, only do that if the value is not BLKmode.  */
 565
 566       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 567       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 568       unsigned int i;
 569       rtx last;
 570
 571       /* This is the mode we must force value to, so that there will be enough
 572          subwords to extract.  Note that fieldmode will often (always?) be
 573          VOIDmode, because that is what store_field uses to indicate that this
 574          is a bit field, but passing VOIDmode to operand_subword_force
 575          is not allowed.  */
 576       fieldmode = GET_MODE (value);
 577       if (fieldmode == VOIDmode)
 578         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 579
 580       last = get_last_insn ();
 581       for (i = 0; i < nwords; i++)
 582         {
 583           /* If I is 0, use the low-order word in both field and target;
 584              if I is 1, use the next to lowest word; and so on.  */
 585           unsigned int wordnum = (backwards ? nwords - i - 1 : i);
 586           unsigned int bit_offset = (backwards
 587                                      ? MAX ((int) bitsize - ((int) i + 1)
 588                                             * BITS_PER_WORD,
 589                                             0)
 590                                      : (int) i * BITS_PER_WORD);
 591           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 592
 593           if (!store_bit_field_1 (op0, MIN (BITS_PER_WORD,
 594                                             bitsize - i * BITS_PER_WORD),
 595                                   bitnum + bit_offset, word_mode,
 596                                   value_word, fallback_p))
 597             {
 598               delete_insns_since (last);
 599               return false;
 600             }
 601         }
 602       return true;
 603     }
 604
 605   /* From here on we can assume that the field to be stored in is
 606      a full-word (whatever type that is), since it is shorter than a word.  */
 607
 608   /* OFFSET is the number of words or bytes (UNIT says which)
 609      from STR_RTX to the first word or byte containing part of the field.  */
 610
 611   if (!MEM_P (op0))
 612     {
 613       if (offset != 0
 614           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 615         {
 616           if (!REG_P (op0))
 617             {
 618               /* Since this is a destination (lvalue), we can't copy
 619                  it to a pseudo.  We can remove a SUBREG that does not
 620                  change the size of the operand.  Such a SUBREG may
 621                  have been added above.  */
 622               gcc_assert (GET_CODE (op0) == SUBREG
 623                           && (GET_MODE_SIZE (GET_MODE (op0))
 624                               == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
 625               op0 = SUBREG_REG (op0);
 626             }
 627           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 628                                 op0, (offset * UNITS_PER_WORD));
 629         }
 630       offset = 0;
 631     }
 632
 633   /* If VALUE has a floating-point or complex mode, access it as an
 634      integer of the corresponding size.  This can occur on a machine
 635      with 64 bit registers that uses SFmode for float.  It can also
 636      occur for unaligned float or complex fields.  */
 637   orig_value = value;
 638   if (GET_MODE (value) != VOIDmode
 639       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 640       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 641     {
 642       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 643       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 644     }
 645
 646   /* Now OFFSET is nonzero only if OP0 is memory
 647      and is therefore always measured in bytes.  */
 648
 649   if (HAVE_insv
 650       && GET_MODE (value) != BLKmode
 651       && bitsize > 0
 652       && GET_MODE_BITSIZE (op_mode) >= bitsize
 653       && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
 654             && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode)))
 655       && insn_data[CODE_FOR_insv].operand[1].predicate (GEN_INT (bitsize),
 656                                                         VOIDmode)
 657       && check_predicate_volatile_ok (CODE_FOR_insv, 0, op0, VOIDmode))
 658     {
 659       int xbitpos = bitpos;
 660       rtx value1;
 661       rtx xop0 = op0;
 662       rtx last = get_last_insn ();
 663       rtx pat;
 664
 665       /* Add OFFSET into OP0's address.  */
 666       if (MEM_P (xop0))
 667         xop0 = adjust_address (xop0, byte_mode, offset);
 668
 669       /* If xop0 is a register, we need it in OP_MODE
 670          to make it acceptable to the format of insv.  */
 671       if (GET_CODE (xop0) == SUBREG)
 672         /* We can't just change the mode, because this might clobber op0,
 673            and we will need the original value of op0 if insv fails.  */
 674         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 675       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 676         xop0 = gen_rtx_SUBREG (op_mode, xop0, 0);
 677
 678       /* On big-endian machines, we count bits from the most significant.
 679          If the bit field insn does not, we must invert.  */
 680
 681       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 682         xbitpos = unit - bitsize - xbitpos;
 683
 684       /* We have been counting XBITPOS within UNIT.
 685          Count instead within the size of the register.  */
 686       if (BITS_BIG_ENDIAN && !MEM_P (xop0))
 687         xbitpos += GET_MODE_BITSIZE (op_mode) - unit;
 688
 689       unit = GET_MODE_BITSIZE (op_mode);
 690
 691       /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 692       value1 = value;
 693       if (GET_MODE (value) != op_mode)
 694         {
 695           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 696             {
 697               /* Optimization: Don't bother really extending VALUE
 698                  if it has all the bits we will actually use.  However,
 699                  if we must narrow it, be sure we do it correctly.  */
 700
 701               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 702                 {
 703                   rtx tmp;
 704
 705                   tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 706                   if (! tmp)
 707                     tmp = simplify_gen_subreg (op_mode,
 708                                                force_reg (GET_MODE (value),
 709                                                           value1),
 710                                                GET_MODE (value), 0);
 711                   value1 = tmp;
 712                 }
 713               else
 714                 value1 = gen_lowpart (op_mode, value1);
 715             }
 716           else if (GET_CODE (value) == CONST_INT)
 717             value1 = gen_int_mode (INTVAL (value), op_mode);
 718           else
 719             /* Parse phase is supposed to make VALUE's data type
 720                match that of the component reference, which is a type
 721                at least as wide as the field; so VALUE should have
 722                a mode that corresponds to that type.  */
 723             gcc_assert (CONSTANT_P (value));
 724         }
 725
 726       /* If this machine's insv insists on a register,
 727          get VALUE1 into a register.  */
 728       if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
 729              (value1, op_mode)))
 730         value1 = force_reg (op_mode, value1);
 731
 732       pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
 733       if (pat)
 734         {
 735           emit_insn (pat);
 736           return true;
 737         }
 738       delete_insns_since (last);
 739     }
 740
 741   /* If OP0 is a memory, try copying it to a register and seeing if a
 742      cheap register alternative is available.  */
 743   if (HAVE_insv && MEM_P (op0))
 744     {
 745       enum machine_mode bestmode;
 746
 747       /* Get the mode to use for inserting into this field.  If OP0 is
 748          BLKmode, get the smallest mode consistent with the alignment. If
 749          OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
 750          mode. Otherwise, use the smallest mode containing the field.  */
 751
 752       if (GET_MODE (op0) == BLKmode
 753           || (op_mode != MAX_MACHINE_MODE
 754               && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode)))
 755         bestmode = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0),
 756                                   (op_mode == MAX_MACHINE_MODE
 757                                    ? VOIDmode : op_mode),
 758                                   MEM_VOLATILE_P (op0));
 759       else
 760         bestmode = GET_MODE (op0);
 761
 762       if (bestmode != VOIDmode
 763           && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode)
 764           && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
 765                && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
 766         {
 767           rtx last, tempreg, xop0;
 768           unsigned HOST_WIDE_INT xoffset, xbitpos;
 769
 770           last = get_last_insn ();
 771
 772           /* Adjust address to point to the containing unit of
 773              that mode.  Compute the offset as a multiple of this unit,
 774              counting in bytes.  */
 775           unit = GET_MODE_BITSIZE (bestmode);
 776           xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 777           xbitpos = bitnum % unit;
 778           xop0 = adjust_address (op0, bestmode, xoffset);
 779
 780           /* Fetch that unit, store the bitfield in it, then store
 781              the unit.  */
 782           tempreg = copy_to_reg (xop0);
 783           if (store_bit_field_1 (tempreg, bitsize, xbitpos,
 784                                  fieldmode, orig_value, false))
 785             {
 786               emit_move_insn (xop0, tempreg);
 787               return true;
 788             }
 789           delete_insns_since (last);
 790         }
 791     }
 792
 793   if (!fallback_p)
 794     return false;
 795
 796   store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 797   return true;
 798 }
 799
 800 /* Generate code to store value from rtx VALUE
 801    into a bit-field within structure STR_RTX
 802    containing BITSIZE bits starting at bit BITNUM.
 803    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.  */
 804
 805 void
 806 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 807                  unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
 808                  rtx value)
 809 {
 810   if (!store_bit_field_1 (str_rtx, bitsize, bitnum, fieldmode, value, true))
 811     gcc_unreachable ();
 812 }
 813 \f
 814 /* Use shifts and boolean operations to store VALUE
 815    into a bit field of width BITSIZE
 816    in a memory location specified by OP0 except offset by OFFSET bytes.
 817      (OFFSET must be 0 if OP0 is a register.)
 818    The field starts at position BITPOS within the byte.
 819     (If OP0 is a register, it may be a full word or a narrower mode,
 820      but BITPOS still counts within a full word,
 821      which is significant on bigendian machines.)  */
 822
 823 static void
 824 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
 825                        unsigned HOST_WIDE_INT bitsize,
 826                        unsigned HOST_WIDE_INT bitpos, rtx value)
 827 {
 828   enum machine_mode mode;
 829   unsigned int total_bits = BITS_PER_WORD;
 830   rtx temp;
 831   int all_zero = 0;
 832   int all_one = 0;
 833
 834   /* There is a case not handled here:
 835      a structure with a known alignment of just a halfword
 836      and a field split across two aligned halfwords within the structure.
 837      Or likewise a structure with a known alignment of just a byte
 838      and a field split across two bytes.
 839      Such cases are not supposed to be able to occur.  */
 840
 841   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 842     {
 843       gcc_assert (!offset);
 844       /* Special treatment for a bit field split across two registers.  */
 845       if (bitsize + bitpos > BITS_PER_WORD)
 846         {
 847           store_split_bit_field (op0, bitsize, bitpos, value);
 848           return;
 849         }
 850     }
 851   else
 852     {
 853       /* Get the proper mode to use for this field.  We want a mode that
 854          includes the entire field.  If such a mode would be larger than
 855          a word, we won't be doing the extraction the normal way.
 856          We don't want a mode bigger than the destination.  */
 857
 858       mode = GET_MODE (op0);
 859       if (GET_MODE_BITSIZE (mode) == 0
 860           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 861         mode = word_mode;
 862       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 863                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 864
 865       if (mode == VOIDmode)
 866         {
 867           /* The only way this should occur is if the field spans word
 868              boundaries.  */
 869           store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
 870                                  value);
 871           return;
 872         }
 873
 874       total_bits = GET_MODE_BITSIZE (mode);
 875
 876       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 877          be in the range 0 to total_bits-1, and put any excess bytes in
 878          OFFSET.  */
 879       if (bitpos >= total_bits)
 880         {
 881           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 882           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 883                      * BITS_PER_UNIT);
 884         }
 885
 886       /* Get ref to an aligned byte, halfword, or word containing the field.
 887          Adjust BITPOS to be position within a word,
 888          and OFFSET to be the offset of that word.
 889          Then alter OP0 to refer to that word.  */
 890       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 891       offset -= (offset % (total_bits / BITS_PER_UNIT));
 892       op0 = adjust_address (op0, mode, offset);
 893     }
 894
 895   mode = GET_MODE (op0);
 896
 897   /* Now MODE is either some integral mode for a MEM as OP0,
 898      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 899      The bit field is contained entirely within OP0.
 900      BITPOS is the starting bit number within OP0.
 901      (OP0's mode may actually be narrower than MODE.)  */
 902
 903   if (BYTES_BIG_ENDIAN)
 904       /* BITPOS is the distance between our msb
 905          and that of the containing datum.
 906          Convert it to the distance from the lsb.  */
 907       bitpos = total_bits - bitsize - bitpos;
 908
 909   /* Now BITPOS is always the distance between our lsb
 910      and that of OP0.  */
 911
 912   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 913      we must first convert its mode to MODE.  */
 914
 915   if (GET_CODE (value) == CONST_INT)
 916     {
 917       HOST_WIDE_INT v = INTVAL (value);
 918
 919       if (bitsize < HOST_BITS_PER_WIDE_INT)
 920         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 921
 922       if (v == 0)
 923         all_zero = 1;
 924       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 925                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 926                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 927         all_one = 1;
 928
 929       value = lshift_value (mode, value, bitpos, bitsize);
 930     }
 931   else
 932     {
 933       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 934                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 935
 936       if (GET_MODE (value) != mode)
 937         {
 938           if ((REG_P (value) || GET_CODE (value) == SUBREG)
 939               && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
 940             value = gen_lowpart (mode, value);
 941           else
 942             value = convert_to_mode (mode, value, 1);
 943         }
 944
 945       if (must_and)
 946         value = expand_binop (mode, and_optab, value,
 947                               mask_rtx (mode, 0, bitsize, 0),
 948                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
 949       if (bitpos > 0)
 950         value = expand_shift (LSHIFT_EXPR, mode, value,
 951                               build_int_cst (NULL_TREE, bitpos), NULL_RTX, 1);
 952     }
 953
 954   /* Now clear the chosen bits in OP0,
 955      except that if VALUE is -1 we need not bother.  */
 956   /* We keep the intermediates in registers to allow CSE to combine
 957      consecutive bitfield assignments.  */
 958
 959   temp = force_reg (mode, op0);
 960
 961   if (! all_one)
 962     {
 963       temp = expand_binop (mode, and_optab, temp,
 964                            mask_rtx (mode, bitpos, bitsize, 1),
 965                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
 966       temp = force_reg (mode, temp);
 967     }
 968
 969   /* Now logical-or VALUE into OP0, unless it is zero.  */
 970
 971   if (! all_zero)
 972     {
 973       temp = expand_binop (mode, ior_optab, temp, value,
 974                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
 975       temp = force_reg (mode, temp);
 976     }
 977
 978   if (op0 != temp)
 979     {
 980       op0 = copy_rtx (op0);
 981       emit_move_insn (op0, temp);
 982     }
 983 }
 984 \f
 985 /* Store a bit field that is split across multiple accessible memory objects.
 986
 987    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
 988    BITSIZE is the field width; BITPOS the position of its first bit
 989    (within the word).
 990    VALUE is the value to store.
 991
 992    This does not yet handle fields wider than BITS_PER_WORD.  */
 993
 994 static void
 995 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
 996                        unsigned HOST_WIDE_INT bitpos, rtx value)
 997 {
 998   unsigned int unit;
 999   unsigned int bitsdone = 0;
1000
1001   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1002      much at a time.  */
1003   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1004     unit = BITS_PER_WORD;
1005   else
1006     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1007
1008   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1009      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1010      that VALUE might be a floating-point constant.  */
1011   if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
1012     {
1013       rtx word = gen_lowpart_common (word_mode, value);
1014
1015       if (word && (value != word))
1016         value = word;
1017       else
1018         value = gen_lowpart_common (word_mode,
1019                                     force_reg (GET_MODE (value) != VOIDmode
1020                                                ? GET_MODE (value)
1021                                                : word_mode, value));
1022     }
1023
1024   while (bitsdone < bitsize)
1025     {
1026       unsigned HOST_WIDE_INT thissize;
1027       rtx part, word;
1028       unsigned HOST_WIDE_INT thispos;
1029       unsigned HOST_WIDE_INT offset;
1030
1031       offset = (bitpos + bitsdone) / unit;
1032       thispos = (bitpos + bitsdone) % unit;
1033
1034       /* THISSIZE must not overrun a word boundary.  Otherwise,
1035          store_fixed_bit_field will call us again, and we will mutually
1036          recurse forever.  */
1037       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1038       thissize = MIN (thissize, unit - thispos);
1039
1040       if (BYTES_BIG_ENDIAN)
1041         {
1042           int total_bits;
1043
1044           /* We must do an endian conversion exactly the same way as it is
1045              done in extract_bit_field, so that the two calls to
1046              extract_fixed_bit_field will have comparable arguments.  */
1047           if (!MEM_P (value) || GET_MODE (value) == BLKmode)
1048             total_bits = BITS_PER_WORD;
1049           else
1050             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1051
1052           /* Fetch successively less significant portions.  */
1053           if (GET_CODE (value) == CONST_INT)
1054             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1055                              >> (bitsize - bitsdone - thissize))
1056                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1057           else
1058             /* The args are chosen so that the last part includes the
1059                lsb.  Give extract_bit_field the value it needs (with
1060                endianness compensation) to fetch the piece we want.  */
1061             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1062                                             total_bits - bitsize + bitsdone,
1063                                             NULL_RTX, 1);
1064         }
1065       else
1066         {
1067           /* Fetch successively more significant portions.  */
1068           if (GET_CODE (value) == CONST_INT)
1069             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1070                              >> bitsdone)
1071                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1072           else
1073             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1074                                             bitsdone, NULL_RTX, 1);
1075         }
1076
1077       /* If OP0 is a register, then handle OFFSET here.
1078
1079          When handling multiword bitfields, extract_bit_field may pass
1080          down a word_mode SUBREG of a larger REG for a bitfield that actually
1081          crosses a word boundary.  Thus, for a SUBREG, we must find
1082          the current word starting from the base register.  */
1083       if (GET_CODE (op0) == SUBREG)
1084         {
1085           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1086           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1087                                         GET_MODE (SUBREG_REG (op0)));
1088           offset = 0;
1089         }
1090       else if (REG_P (op0))
1091         {
1092           word = operand_subword_force (op0, offset, GET_MODE (op0));
1093           offset = 0;
1094         }
1095       else
1096         word = op0;
1097
1098       /* OFFSET is in UNITs, and UNIT is in bits.
1099          store_fixed_bit_field wants offset in bytes.  */
1100       store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1101                              thispos, part);
1102       bitsdone += thissize;
1103     }
1104 }
1105 \f
1106 /* A subroutine of extract_bit_field_1 that converts return value X
1107    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1108    to extract_bit_field.  */
1109
1110 static rtx
1111 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1112                              enum machine_mode tmode, bool unsignedp)
1113 {
1114   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1115     return x;
1116
1117   /* If the x mode is not a scalar integral, first convert to the
1118      integer mode of that size and then access it as a floating-point
1119      value via a SUBREG.  */
1120   if (!SCALAR_INT_MODE_P (tmode))
1121     {
1122       enum machine_mode smode;
1123
1124       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1125       x = convert_to_mode (smode, x, unsignedp);
1126       x = force_reg (smode, x);
1127       return gen_lowpart (tmode, x);
1128     }
1129
1130   return convert_to_mode (tmode, x, unsignedp);
1131 }
1132
1133 /* A subroutine of extract_bit_field, with the same arguments.
1134    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1135    if we can find no other means of implementing the operation.
1136    if FALLBACK_P is false, return NULL instead.  */
1137
1138 static rtx
1139 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1140                      unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1141                      enum machine_mode mode, enum machine_mode tmode,
1142                      bool fallback_p)
1143 {
1144   unsigned int unit
1145     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1146   unsigned HOST_WIDE_INT offset, bitpos;
1147   rtx op0 = str_rtx;
1148   enum machine_mode int_mode;
1149   enum machine_mode ext_mode;
1150   enum machine_mode mode1;
1151   enum insn_code icode;
1152   int byte_offset;
1153
1154   if (tmode == VOIDmode)
1155     tmode = mode;
1156
1157   while (GET_CODE (op0) == SUBREG)
1158     {
1159       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1160       op0 = SUBREG_REG (op0);
1161     }
1162
1163   /* If we have an out-of-bounds access to a register, just return an
1164      uninitialized register of the required mode.  This can occur if the
1165      source code contains an out-of-bounds access to a small array.  */
1166   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1167     return gen_reg_rtx (tmode);
1168
1169   if (REG_P (op0)
1170       && mode == GET_MODE (op0)
1171       && bitnum == 0
1172       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1173     {
1174       /* We're trying to extract a full register from itself.  */
1175       return op0;
1176     }
1177
1178   /* See if we can get a better vector mode before extracting.  */
1179   if (VECTOR_MODE_P (GET_MODE (op0))
1180       && !MEM_P (op0)
1181       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1182     {
1183       enum machine_mode new_mode;
1184       int nunits = GET_MODE_NUNITS (GET_MODE (op0));
1185
1186       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1187         new_mode = MIN_MODE_VECTOR_FLOAT;
1188       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1189         new_mode = MIN_MODE_VECTOR_FRACT;
1190       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1191         new_mode = MIN_MODE_VECTOR_UFRACT;
1192       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1193         new_mode = MIN_MODE_VECTOR_ACCUM;
1194       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1195         new_mode = MIN_MODE_VECTOR_UACCUM;
1196       else
1197         new_mode = MIN_MODE_VECTOR_INT;
1198
1199       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1200         if (GET_MODE_NUNITS (new_mode) == nunits
1201             && GET_MODE_INNER (new_mode) == tmode
1202             && targetm.vector_mode_supported_p (new_mode))
1203           break;
1204       if (new_mode != VOIDmode)
1205         op0 = gen_lowpart (new_mode, op0);
1206     }
1207
1208   /* Use vec_extract patterns for extracting parts of vectors whenever
1209      available.  */
1210   if (VECTOR_MODE_P (GET_MODE (op0))
1211       && !MEM_P (op0)
1212       && (optab_handler (vec_extract_optab, GET_MODE (op0))->insn_code
1213           != CODE_FOR_nothing)
1214       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1215           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1216     {
1217       enum machine_mode outermode = GET_MODE (op0);
1218       enum machine_mode innermode = GET_MODE_INNER (outermode);
1219       int icode = (int) optab_handler (vec_extract_optab, outermode)->insn_code;
1220       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1221       rtx rtxpos = GEN_INT (pos);
1222       rtx src = op0;
1223       rtx dest = NULL, pat, seq;
1224       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
1225       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
1226       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
1227
1228       if (innermode == tmode || innermode == mode)
1229         dest = target;
1230
1231       if (!dest)
1232         dest = gen_reg_rtx (innermode);
1233
1234       start_sequence ();
1235
1236       if (! (*insn_data[icode].operand[0].predicate) (dest, mode0))
1237         dest = copy_to_mode_reg (mode0, dest);
1238
1239       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
1240         src = copy_to_mode_reg (mode1, src);
1241
1242       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1243         rtxpos = copy_to_mode_reg (mode1, rtxpos);
1244
1245       /* We could handle this, but we should always be called with a pseudo
1246          for our targets and all insns should take them as outputs.  */
1247       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
1248                   && (*insn_data[icode].operand[1].predicate) (src, mode1)
1249                   && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
1250
1251       pat = GEN_FCN (icode) (dest, src, rtxpos);
1252       seq = get_insns ();
1253       end_sequence ();
1254       if (pat)
1255         {
1256           emit_insn (seq);
1257           emit_insn (pat);
1258           if (mode0 != mode)
1259             return gen_lowpart (tmode, dest);
1260           return dest;
1261         }
1262     }
1263
1264   /* Make sure we are playing with integral modes.  Pun with subregs
1265      if we aren't.  */
1266   {
1267     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1268     if (imode != GET_MODE (op0))
1269       {
1270         if (MEM_P (op0))
1271           op0 = adjust_address (op0, imode, 0);
1272         else
1273           {
1274             gcc_assert (imode != BLKmode);
1275             op0 = gen_lowpart (imode, op0);
1276
1277             /* If we got a SUBREG, force it into a register since we
1278                aren't going to be able to do another SUBREG on it.  */
1279             if (GET_CODE (op0) == SUBREG)
1280               op0 = force_reg (imode, op0);
1281           }
1282       }
1283   }
1284
1285   /* We may be accessing data outside the field, which means
1286      we can alias adjacent data.  */
1287   if (MEM_P (op0))
1288     {
1289       op0 = shallow_copy_rtx (op0);
1290       set_mem_alias_set (op0, 0);
1291       set_mem_expr (op0, 0);
1292     }
1293
1294   /* Extraction of a full-word or multi-word value from a structure
1295      in a register or aligned memory can be done with just a SUBREG.
1296      A subword value in the least significant part of a register
1297      can also be extracted with a SUBREG.  For this, we need the
1298      byte offset of the value in op0.  */
1299
1300   bitpos = bitnum % unit;
1301   offset = bitnum / unit;
1302   byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1303
1304   /* If OP0 is a register, BITPOS must count within a word.
1305      But as we have it, it counts within whatever size OP0 now has.
1306      On a bigendian machine, these are not the same, so convert.  */
1307   if (BYTES_BIG_ENDIAN
1308       && !MEM_P (op0)
1309       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1310     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1311
1312   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1313      If that's wrong, the solution is to test for it and set TARGET to 0
1314      if needed.  */
1315
1316   /* Only scalar integer modes can be converted via subregs.  There is an
1317      additional problem for FP modes here in that they can have a precision
1318      which is different from the size.  mode_for_size uses precision, but
1319      we want a mode based on the size, so we must avoid calling it for FP
1320      modes.  */
1321   mode1  = (SCALAR_INT_MODE_P (tmode)
1322             ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1323             : mode);
1324
1325   if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1326         && bitpos % BITS_PER_WORD == 0)
1327        || (mode1 != BLKmode
1328            /* ??? The big endian test here is wrong.  This is correct
1329               if the value is in a register, and if mode_for_size is not
1330               the same mode as op0.  This causes us to get unnecessarily
1331               inefficient code from the Thumb port when -mbig-endian.  */
1332            && (BYTES_BIG_ENDIAN
1333                ? bitpos + bitsize == BITS_PER_WORD
1334                : bitpos == 0)))
1335       && ((!MEM_P (op0)
1336            && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
1337                                      GET_MODE_BITSIZE (GET_MODE (op0)))
1338            && GET_MODE_SIZE (mode1) != 0
1339            && byte_offset % GET_MODE_SIZE (mode1) == 0)
1340           || (MEM_P (op0)
1341               && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1342                   || (offset * BITS_PER_UNIT % bitsize == 0
1343                       && MEM_ALIGN (op0) % bitsize == 0)))))
1344     {
1345       if (MEM_P (op0))
1346         op0 = adjust_address (op0, mode1, offset);
1347       else if (mode1 != GET_MODE (op0))
1348         {
1349           rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1350                                          byte_offset);
1351           if (sub == NULL)
1352             goto no_subreg_mode_swap;
1353           op0 = sub;
1354         }
1355       if (mode1 != mode)
1356         return convert_to_mode (tmode, op0, unsignedp);
1357       return op0;
1358     }
1359  no_subreg_mode_swap:
1360
1361   /* Handle fields bigger than a word.  */
1362
1363   if (bitsize > BITS_PER_WORD)
1364     {
1365       /* Here we transfer the words of the field
1366          in the order least significant first.
1367          This is because the most significant word is the one which may
1368          be less than full.  */
1369
1370       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1371       unsigned int i;
1372
1373       if (target == 0 || !REG_P (target))
1374         target = gen_reg_rtx (mode);
1375
1376       /* Indicate for flow that the entire target reg is being set.  */
1377       emit_insn (gen_rtx_CLOBBER (VOIDmode, target));
1378
1379       for (i = 0; i < nwords; i++)
1380         {
1381           /* If I is 0, use the low-order word in both field and target;
1382              if I is 1, use the next to lowest word; and so on.  */
1383           /* Word number in TARGET to use.  */
1384           unsigned int wordnum
1385             = (WORDS_BIG_ENDIAN
1386                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1387                : i);
1388           /* Offset from start of field in OP0.  */
1389           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1390                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1391                                                 * (int) BITS_PER_WORD))
1392                                      : (int) i * BITS_PER_WORD);
1393           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1394           rtx result_part
1395             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1396                                            bitsize - i * BITS_PER_WORD),
1397                                  bitnum + bit_offset, 1, target_part, mode,
1398                                  word_mode);
1399
1400           gcc_assert (target_part);
1401
1402           if (result_part != target_part)
1403             emit_move_insn (target_part, result_part);
1404         }
1405
1406       if (unsignedp)
1407         {
1408           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1409              need to be zero'd out.  */
1410           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1411             {
1412               unsigned int i, total_words;
1413
1414               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1415               for (i = nwords; i < total_words; i++)
1416                 emit_move_insn
1417                   (operand_subword (target,
1418                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1419                                     1, VOIDmode),
1420                    const0_rtx);
1421             }
1422           return target;
1423         }
1424
1425       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1426       target = expand_shift (LSHIFT_EXPR, mode, target,
1427                              build_int_cst (NULL_TREE,
1428                                             GET_MODE_BITSIZE (mode) - bitsize),
1429                              NULL_RTX, 0);
1430       return expand_shift (RSHIFT_EXPR, mode, target,
1431                            build_int_cst (NULL_TREE,
1432                                           GET_MODE_BITSIZE (mode) - bitsize),
1433                            NULL_RTX, 0);
1434     }
1435
1436   /* From here on we know the desired field is smaller than a word.  */
1437
1438   /* Check if there is a correspondingly-sized integer field, so we can
1439      safely extract it as one size of integer, if necessary; then
1440      truncate or extend to the size that is wanted; then use SUBREGs or
1441      convert_to_mode to get one of the modes we really wanted.  */
1442
1443   int_mode = int_mode_for_mode (tmode);
1444   if (int_mode == BLKmode)
1445     int_mode = int_mode_for_mode (mode);
1446   /* Should probably push op0 out to memory and then do a load.  */
1447   gcc_assert (int_mode != BLKmode);
1448
1449   /* OFFSET is the number of words or bytes (UNIT says which)
1450      from STR_RTX to the first word or byte containing part of the field.  */
1451   if (!MEM_P (op0))
1452     {
1453       if (offset != 0
1454           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1455         {
1456           if (!REG_P (op0))
1457             op0 = copy_to_reg (op0);
1458           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1459                                 op0, (offset * UNITS_PER_WORD));
1460         }
1461       offset = 0;
1462     }
1463
1464   /* Now OFFSET is nonzero only for memory operands.  */
1465   ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0);
1466   icode = unsignedp ? CODE_FOR_extzv : CODE_FOR_extv;
1467   if (ext_mode != MAX_MACHINE_MODE
1468       && bitsize > 0
1469       && GET_MODE_BITSIZE (ext_mode) >= bitsize
1470       /* If op0 is a register, we need it in EXT_MODE to make it
1471          acceptable to the format of ext(z)v.  */
1472       && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1473       && !((REG_P (op0) || GET_CODE (op0) == SUBREG)
1474            && (bitsize + bitpos > GET_MODE_BITSIZE (ext_mode)))
1475       && check_predicate_volatile_ok (icode, 1, op0, GET_MODE (op0)))
1476     {
1477       unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1478       rtx bitsize_rtx, bitpos_rtx;
1479       rtx last = get_last_insn ();
1480       rtx xop0 = op0;
1481       rtx xtarget = target;
1482       rtx xspec_target = target;
1483       rtx xspec_target_subreg = 0;
1484       rtx pat;
1485
1486       /* If op0 is a register, we need it in EXT_MODE to make it
1487          acceptable to the format of ext(z)v.  */
1488       if (REG_P (xop0) && GET_MODE (xop0) != ext_mode)
1489         xop0 = gen_rtx_SUBREG (ext_mode, xop0, 0);
1490       if (MEM_P (xop0))
1491         /* Get ref to first byte containing part of the field.  */
1492         xop0 = adjust_address (xop0, byte_mode, xoffset);
1493
1494       /* On big-endian machines, we count bits from the most significant.
1495          If the bit field insn does not, we must invert.  */
1496       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1497         xbitpos = unit - bitsize - xbitpos;
1498
1499       /* Now convert from counting within UNIT to counting in EXT_MODE.  */
1500       if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1501         xbitpos += GET_MODE_BITSIZE (ext_mode) - unit;
1502
1503       unit = GET_MODE_BITSIZE (ext_mode);
1504
1505       if (xtarget == 0)
1506         xtarget = xspec_target = gen_reg_rtx (tmode);
1507
1508       if (GET_MODE (xtarget) != ext_mode)
1509         {
1510           if (REG_P (xtarget))
1511             {
1512               xtarget = gen_lowpart (ext_mode, xtarget);
1513               if (GET_MODE_SIZE (ext_mode)
1514                   > GET_MODE_SIZE (GET_MODE (xspec_target)))
1515                 xspec_target_subreg = xtarget;
1516             }
1517           else
1518             xtarget = gen_reg_rtx (ext_mode);
1519         }
1520
1521       /* If this machine's ext(z)v insists on a register target,
1522          make sure we have one.  */
1523       if (!insn_data[(int) icode].operand[0].predicate (xtarget, ext_mode))
1524         xtarget = gen_reg_rtx (ext_mode);
1525
1526       bitsize_rtx = GEN_INT (bitsize);
1527       bitpos_rtx = GEN_INT (xbitpos);
1528
1529       pat = (unsignedp
1530              ? gen_extzv (xtarget, xop0, bitsize_rtx, bitpos_rtx)
1531              : gen_extv (xtarget, xop0, bitsize_rtx, bitpos_rtx));
1532       if (pat)
1533         {
1534           emit_insn (pat);
1535           if (xtarget == xspec_target)
1536             return xtarget;
1537           if (xtarget == xspec_target_subreg)
1538             return xspec_target;
1539           return convert_extracted_bit_field (xtarget, mode, tmode, unsignedp);
1540         }
1541       delete_insns_since (last);
1542     }
1543
1544   /* If OP0 is a memory, try copying it to a register and seeing if a
1545      cheap register alternative is available.  */
1546   if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0))
1547     {
1548       enum machine_mode bestmode;
1549
1550       /* Get the mode to use for inserting into this field.  If
1551          OP0 is BLKmode, get the smallest mode consistent with the
1552          alignment. If OP0 is a non-BLKmode object that is no
1553          wider than EXT_MODE, use its mode. Otherwise, use the
1554          smallest mode containing the field.  */
1555
1556       if (GET_MODE (op0) == BLKmode
1557           || (ext_mode != MAX_MACHINE_MODE
1558               && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode)))
1559         bestmode = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0),
1560                                   (ext_mode == MAX_MACHINE_MODE
1561                                    ? VOIDmode : ext_mode),
1562                                   MEM_VOLATILE_P (op0));
1563       else
1564         bestmode = GET_MODE (op0);
1565
1566       if (bestmode != VOIDmode
1567           && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
1568                && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
1569         {
1570           unsigned HOST_WIDE_INT xoffset, xbitpos;
1571
1572           /* Compute the offset as a multiple of this unit,
1573              counting in bytes.  */
1574           unit = GET_MODE_BITSIZE (bestmode);
1575           xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1576           xbitpos = bitnum % unit;
1577
1578           /* Make sure the register is big enough for the whole field.  */
1579           if (xoffset * BITS_PER_UNIT + unit
1580               >= offset * BITS_PER_UNIT + bitsize)
1581             {
1582               rtx last, result, xop0;
1583
1584               last = get_last_insn ();
1585
1586               /* Fetch it to a register in that size.  */
1587               xop0 = adjust_address (op0, bestmode, xoffset);
1588               xop0 = force_reg (bestmode, xop0);
1589               result = extract_bit_field_1 (xop0, bitsize, xbitpos,
1590                                             unsignedp, target,
1591                                             mode, tmode, false);
1592               if (result)
1593                 return result;
1594
1595               delete_insns_since (last);
1596             }
1597         }
1598     }
1599
1600   if (!fallback_p)
1601     return NULL;
1602
1603   target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1604                                     bitpos, target, unsignedp);
1605   return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1606 }
1607
1608 /* Generate code to extract a byte-field from STR_RTX
1609    containing BITSIZE bits, starting at BITNUM,
1610    and put it in TARGET if possible (if TARGET is nonzero).
1611    Regardless of TARGET, we return the rtx for where the value is placed.
1612
1613    STR_RTX is the structure containing the byte (a REG or MEM).
1614    UNSIGNEDP is nonzero if this is an unsigned bit field.
1615    MODE is the natural mode of the field value once extracted.
1616    TMODE is the mode the caller would like the value to have;
1617    but the value may be returned with type MODE instead.
1618
1619    If a TARGET is specified and we can store in it at no extra cost,
1620    we do so, and return TARGET.
1621    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1622    if they are equally easy.  */
1623
1624 rtx
1625 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1626                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1627                    enum machine_mode mode, enum machine_mode tmode)
1628 {
1629   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1630                               target, mode, tmode, true);
1631 }
1632 \f
1633 /* Extract a bit field using shifts and boolean operations
1634    Returns an rtx to represent the value.
1635    OP0 addresses a register (word) or memory (byte).
1636    BITPOS says which bit within the word or byte the bit field starts in.
1637    OFFSET says how many bytes farther the bit field starts;
1638     it is 0 if OP0 is a register.
1639    BITSIZE says how many bits long the bit field is.
1640     (If OP0 is a register, it may be narrower than a full word,
1641      but BITPOS still counts within a full word,
1642      which is significant on bigendian machines.)
1643
1644    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1645    If TARGET is nonzero, attempts to store the value there
1646    and return TARGET, but this is not guaranteed.
1647    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1648
1649 static rtx
1650 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1651                          unsigned HOST_WIDE_INT offset,
1652                          unsigned HOST_WIDE_INT bitsize,
1653                          unsigned HOST_WIDE_INT bitpos, rtx target,
1654                          int unsignedp)
1655 {
1656   unsigned int total_bits = BITS_PER_WORD;
1657   enum machine_mode mode;
1658
1659   if (GET_CODE (op0) == SUBREG || REG_P (op0))
1660     {
1661       /* Special treatment for a bit field split across two registers.  */
1662       if (bitsize + bitpos > BITS_PER_WORD)
1663         return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1664     }
1665   else
1666     {
1667       /* Get the proper mode to use for this field.  We want a mode that
1668          includes the entire field.  If such a mode would be larger than
1669          a word, we won't be doing the extraction the normal way.  */
1670
1671       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1672                             MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1673
1674       if (mode == VOIDmode)
1675         /* The only way this should occur is if the field spans word
1676            boundaries.  */
1677         return extract_split_bit_field (op0, bitsize,
1678                                         bitpos + offset * BITS_PER_UNIT,
1679                                         unsignedp);
1680
1681       total_bits = GET_MODE_BITSIZE (mode);
1682
1683       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1684          be in the range 0 to total_bits-1, and put any excess bytes in
1685          OFFSET.  */
1686       if (bitpos >= total_bits)
1687         {
1688           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1689           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1690                      * BITS_PER_UNIT);
1691         }
1692
1693       /* Get ref to an aligned byte, halfword, or word containing the field.
1694          Adjust BITPOS to be position within a word,
1695          and OFFSET to be the offset of that word.
1696          Then alter OP0 to refer to that word.  */
1697       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1698       offset -= (offset % (total_bits / BITS_PER_UNIT));
1699       op0 = adjust_address (op0, mode, offset);
1700     }
1701
1702   mode = GET_MODE (op0);
1703
1704   if (BYTES_BIG_ENDIAN)
1705     /* BITPOS is the distance between our msb and that of OP0.
1706        Convert it to the distance from the lsb.  */
1707     bitpos = total_bits - bitsize - bitpos;
1708
1709   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1710      We have reduced the big-endian case to the little-endian case.  */
1711
1712   if (unsignedp)
1713     {
1714       if (bitpos)
1715         {
1716           /* If the field does not already start at the lsb,
1717              shift it so it does.  */
1718           tree amount = build_int_cst (NULL_TREE, bitpos);
1719           /* Maybe propagate the target for the shift.  */
1720           /* But not if we will return it--could confuse integrate.c.  */
1721           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1722           if (tmode != mode) subtarget = 0;
1723           op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1724         }
1725       /* Convert the value to the desired mode.  */
1726       if (mode != tmode)
1727         op0 = convert_to_mode (tmode, op0, 1);
1728
1729       /* Unless the msb of the field used to be the msb when we shifted,
1730          mask out the upper bits.  */
1731
1732       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1733         return expand_binop (GET_MODE (op0), and_optab, op0,
1734                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1735                              target, 1, OPTAB_LIB_WIDEN);
1736       return op0;
1737     }
1738
1739   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1740      then arithmetic-shift its lsb to the lsb of the word.  */
1741   op0 = force_reg (mode, op0);
1742   if (mode != tmode)
1743     target = 0;
1744
1745   /* Find the narrowest integer mode that contains the field.  */
1746
1747   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1748        mode = GET_MODE_WIDER_MODE (mode))
1749     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1750       {
1751         op0 = convert_to_mode (mode, op0, 0);
1752         break;
1753       }
1754
1755   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1756     {
1757       tree amount
1758         = build_int_cst (NULL_TREE,
1759                          GET_MODE_BITSIZE (mode) - (bitsize + bitpos));
1760       /* Maybe propagate the target for the shift.  */
1761       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1762       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1763     }
1764
1765   return expand_shift (RSHIFT_EXPR, mode, op0,
1766                        build_int_cst (NULL_TREE,
1767                                       GET_MODE_BITSIZE (mode) - bitsize),
1768                        target, 0);
1769 }
1770 \f
1771 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1772    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1773    complement of that if COMPLEMENT.  The mask is truncated if
1774    necessary to the width of mode MODE.  The mask is zero-extended if
1775    BITSIZE+BITPOS is too small for MODE.  */
1776
1777 static rtx
1778 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1779 {
1780   HOST_WIDE_INT masklow, maskhigh;
1781
1782   if (bitsize == 0)
1783     masklow = 0;
1784   else if (bitpos < HOST_BITS_PER_WIDE_INT)
1785     masklow = (HOST_WIDE_INT) -1 << bitpos;
1786   else
1787     masklow = 0;
1788
1789   if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1790     masklow &= ((unsigned HOST_WIDE_INT) -1
1791                 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1792
1793   if (bitpos <= HOST_BITS_PER_WIDE_INT)
1794     maskhigh = -1;
1795   else
1796     maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1797
1798   if (bitsize == 0)
1799     maskhigh = 0;
1800   else if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1801     maskhigh &= ((unsigned HOST_WIDE_INT) -1
1802                  >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1803   else
1804     maskhigh = 0;
1805
1806   if (complement)
1807     {
1808       maskhigh = ~maskhigh;
1809       masklow = ~masklow;
1810     }
1811
1812   return immed_double_const (masklow, maskhigh, mode);
1813 }
1814
1815 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1816    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1817
1818 static rtx
1819 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1820 {
1821   unsigned HOST_WIDE_INT v = INTVAL (value);
1822   HOST_WIDE_INT low, high;
1823
1824   if (bitsize < HOST_BITS_PER_WIDE_INT)
1825     v &= ~((HOST_WIDE_INT) -1 << bitsize);
1826
1827   if (bitpos < HOST_BITS_PER_WIDE_INT)
1828     {
1829       low = v << bitpos;
1830       high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1831     }
1832   else
1833     {
1834       low = 0;
1835       high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1836     }
1837
1838   return immed_double_const (low, high, mode);
1839 }
1840 \f
1841 /* Extract a bit field that is split across two words
1842    and return an RTX for the result.
1843
1844    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1845    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1846    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1847
1848 static rtx
1849 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1850                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1851 {
1852   unsigned int unit;
1853   unsigned int bitsdone = 0;
1854   rtx result = NULL_RTX;
1855   int first = 1;
1856
1857   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1858      much at a time.  */
1859   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1860     unit = BITS_PER_WORD;
1861   else
1862     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1863
1864   while (bitsdone < bitsize)
1865     {
1866       unsigned HOST_WIDE_INT thissize;
1867       rtx part, word;
1868       unsigned HOST_WIDE_INT thispos;
1869       unsigned HOST_WIDE_INT offset;
1870
1871       offset = (bitpos + bitsdone) / unit;
1872       thispos = (bitpos + bitsdone) % unit;
1873
1874       /* THISSIZE must not overrun a word boundary.  Otherwise,
1875          extract_fixed_bit_field will call us again, and we will mutually
1876          recurse forever.  */
1877       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1878       thissize = MIN (thissize, unit - thispos);
1879
1880       /* If OP0 is a register, then handle OFFSET here.
1881
1882          When handling multiword bitfields, extract_bit_field may pass
1883          down a word_mode SUBREG of a larger REG for a bitfield that actually
1884          crosses a word boundary.  Thus, for a SUBREG, we must find
1885          the current word starting from the base register.  */
1886       if (GET_CODE (op0) == SUBREG)
1887         {
1888           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1889           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1890                                         GET_MODE (SUBREG_REG (op0)));
1891           offset = 0;
1892         }
1893       else if (REG_P (op0))
1894         {
1895           word = operand_subword_force (op0, offset, GET_MODE (op0));
1896           offset = 0;
1897         }
1898       else
1899         word = op0;
1900
1901       /* Extract the parts in bit-counting order,
1902          whose meaning is determined by BYTES_PER_UNIT.
1903          OFFSET is in UNITs, and UNIT is in bits.
1904          extract_fixed_bit_field wants offset in bytes.  */
1905       part = extract_fixed_bit_field (word_mode, word,
1906                                       offset * unit / BITS_PER_UNIT,
1907                                       thissize, thispos, 0, 1);
1908       bitsdone += thissize;
1909
1910       /* Shift this part into place for the result.  */
1911       if (BYTES_BIG_ENDIAN)
1912         {
1913           if (bitsize != bitsdone)
1914             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1915                                  build_int_cst (NULL_TREE, bitsize - bitsdone),
1916                                  0, 1);
1917         }
1918       else
1919         {
1920           if (bitsdone != thissize)
1921             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1922                                  build_int_cst (NULL_TREE,
1923                                                 bitsdone - thissize), 0, 1);
1924         }
1925
1926       if (first)
1927         result = part;
1928       else
1929         /* Combine the parts with bitwise or.  This works
1930            because we extracted each part as an unsigned bit field.  */
1931         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1932                                OPTAB_LIB_WIDEN);
1933
1934       first = 0;
1935     }
1936
1937   /* Unsigned bit field: we are done.  */
1938   if (unsignedp)
1939     return result;
1940   /* Signed bit field: sign-extend with two arithmetic shifts.  */
1941   result = expand_shift (LSHIFT_EXPR, word_mode, result,
1942                          build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
1943                          NULL_RTX, 0);
1944   return expand_shift (RSHIFT_EXPR, word_mode, result,
1945                        build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
1946                        NULL_RTX, 0);
1947 }
1948 \f
1949 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
1950    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
1951    MODE, fill the upper bits with zeros.  Fail if the layout of either
1952    mode is unknown (as for CC modes) or if the extraction would involve
1953    unprofitable mode punning.  Return the value on success, otherwise
1954    return null.
1955
1956    This is different from gen_lowpart* in these respects:
1957
1958      - the returned value must always be considered an rvalue
1959
1960      - when MODE is wider than SRC_MODE, the extraction involves
1961        a zero extension
1962
1963      - when MODE is smaller than SRC_MODE, the extraction involves
1964        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
1965
1966    In other words, this routine performs a computation, whereas the
1967    gen_lowpart* routines are conceptually lvalue or rvalue subreg
1968    operations.  */
1969
1970 rtx
1971 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
1972 {
1973   enum machine_mode int_mode, src_int_mode;
1974
1975   if (mode == src_mode)
1976     return src;
1977
1978   if (CONSTANT_P (src))
1979     return simplify_gen_subreg (mode, src, src_mode,
1980                                 subreg_lowpart_offset (mode, src_mode));
1981
1982   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
1983     return NULL_RTX;
1984
1985   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
1986       && MODES_TIEABLE_P (mode, src_mode))
1987     {
1988       rtx x = gen_lowpart_common (mode, src);
1989       if (x)
1990         return x;
1991     }
1992
1993   src_int_mode = int_mode_for_mode (src_mode);
1994   int_mode = int_mode_for_mode (mode);
1995   if (src_int_mode == BLKmode || int_mode == BLKmode)
1996     return NULL_RTX;
1997
1998   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
1999     return NULL_RTX;
2000   if (!MODES_TIEABLE_P (int_mode, mode))
2001     return NULL_RTX;
2002
2003   src = gen_lowpart (src_int_mode, src);
2004   src = convert_modes (int_mode, src_int_mode, src, true);
2005   src = gen_lowpart (mode, src);
2006   return src;
2007 }
2008 \f
2009 /* Add INC into TARGET.  */
2010
2011 void
2012 expand_inc (rtx target, rtx inc)
2013 {
2014   rtx value = expand_binop (GET_MODE (target), add_optab,
2015                             target, inc,
2016                             target, 0, OPTAB_LIB_WIDEN);
2017   if (value != target)
2018     emit_move_insn (target, value);
2019 }
2020
2021 /* Subtract DEC from TARGET.  */
2022
2023 void
2024 expand_dec (rtx target, rtx dec)
2025 {
2026   rtx value = expand_binop (GET_MODE (target), sub_optab,
2027                             target, dec,
2028                             target, 0, OPTAB_LIB_WIDEN);
2029   if (value != target)
2030     emit_move_insn (target, value);
2031 }
2032 \f
2033 /* Output a shift instruction for expression code CODE,
2034    with SHIFTED being the rtx for the value to shift,
2035    and AMOUNT the tree for the amount to shift by.
2036    Store the result in the rtx TARGET, if that is convenient.
2037    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2038    Return the rtx for where the value is.  */
2039
2040 rtx
2041 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2042               tree amount, rtx target, int unsignedp)
2043 {
2044   rtx op1, temp = 0;
2045   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2046   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2047   int try;
2048
2049   /* Previously detected shift-counts computed by NEGATE_EXPR
2050      and shifted in the other direction; but that does not work
2051      on all machines.  */
2052
2053   op1 = expand_normal (amount);
2054
2055   if (SHIFT_COUNT_TRUNCATED)
2056     {
2057       if (GET_CODE (op1) == CONST_INT
2058           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2059               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2060         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2061                        % GET_MODE_BITSIZE (mode));
2062       else if (GET_CODE (op1) == SUBREG
2063                && subreg_lowpart_p (op1))
2064         op1 = SUBREG_REG (op1);
2065     }
2066
2067   if (op1 == const0_rtx)
2068     return shifted;
2069
2070   /* Check whether its cheaper to implement a left shift by a constant
2071      bit count by a sequence of additions.  */
2072   if (code == LSHIFT_EXPR
2073       && GET_CODE (op1) == CONST_INT
2074       && INTVAL (op1) > 0
2075       && INTVAL (op1) < GET_MODE_BITSIZE (mode)
2076       && INTVAL (op1) < MAX_BITS_PER_WORD
2077       && shift_cost[mode][INTVAL (op1)] > INTVAL (op1) * add_cost[mode]
2078       && shift_cost[mode][INTVAL (op1)] != MAX_COST)
2079     {
2080       int i;
2081       for (i = 0; i < INTVAL (op1); i++)
2082         {
2083           temp = force_reg (mode, shifted);
2084           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2085                                   unsignedp, OPTAB_LIB_WIDEN);
2086         }
2087       return shifted;
2088     }
2089
2090   for (try = 0; temp == 0 && try < 3; try++)
2091     {
2092       enum optab_methods methods;
2093
2094       if (try == 0)
2095         methods = OPTAB_DIRECT;
2096       else if (try == 1)
2097         methods = OPTAB_WIDEN;
2098       else
2099         methods = OPTAB_LIB_WIDEN;
2100
2101       if (rotate)
2102         {
2103           /* Widening does not work for rotation.  */
2104           if (methods == OPTAB_WIDEN)
2105             continue;
2106           else if (methods == OPTAB_LIB_WIDEN)
2107             {
2108               /* If we have been unable to open-code this by a rotation,
2109                  do it as the IOR of two shifts.  I.e., to rotate A
2110                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2111                  where C is the bitsize of A.
2112
2113                  It is theoretically possible that the target machine might
2114                  not be able to perform either shift and hence we would
2115                  be making two libcalls rather than just the one for the
2116                  shift (similarly if IOR could not be done).  We will allow
2117                  this extremely unlikely lossage to avoid complicating the
2118                  code below.  */
2119
2120               rtx subtarget = target == shifted ? 0 : target;
2121               tree new_amount, other_amount;
2122               rtx temp1;
2123               tree type = TREE_TYPE (amount);
2124               if (GET_MODE (op1) != TYPE_MODE (type)
2125                   && GET_MODE (op1) != VOIDmode)
2126                 op1 = convert_to_mode (TYPE_MODE (type), op1, 1);
2127               new_amount = make_tree (type, op1);
2128               other_amount
2129                 = fold_build2 (MINUS_EXPR, type,
2130                                build_int_cst (type, GET_MODE_BITSIZE (mode)),
2131                                new_amount);
2132
2133               shifted = force_reg (mode, shifted);
2134
2135               temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2136                                    mode, shifted, new_amount, 0, 1);
2137               temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2138                                     mode, shifted, other_amount, subtarget, 1);
2139               return expand_binop (mode, ior_optab, temp, temp1, target,
2140                                    unsignedp, methods);
2141             }
2142
2143           temp = expand_binop (mode,
2144                                left ? rotl_optab : rotr_optab,
2145                                shifted, op1, target, unsignedp, methods);
2146         }
2147       else if (unsignedp)
2148         temp = expand_binop (mode,
2149                              left ? ashl_optab : lshr_optab,
2150                              shifted, op1, target, unsignedp, methods);
2151
2152       /* Do arithmetic shifts.
2153          Also, if we are going to widen the operand, we can just as well
2154          use an arithmetic right-shift instead of a logical one.  */
2155       if (temp == 0 && ! rotate
2156           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2157         {
2158           enum optab_methods methods1 = methods;
2159
2160           /* If trying to widen a log shift to an arithmetic shift,
2161              don't accept an arithmetic shift of the same size.  */
2162           if (unsignedp)
2163             methods1 = OPTAB_MUST_WIDEN;
2164
2165           /* Arithmetic shift */
2166
2167           temp = expand_binop (mode,
2168                                left ? ashl_optab : ashr_optab,
2169                                shifted, op1, target, unsignedp, methods1);
2170         }
2171
2172       /* We used to try extzv here for logical right shifts, but that was
2173          only useful for one machine, the VAX, and caused poor code
2174          generation there for lshrdi3, so the code was deleted and a
2175          define_expand for lshrsi3 was added to vax.md.  */
2176     }
2177
2178   gcc_assert (temp);
2179   return temp;
2180 }
2181 \f
2182 enum alg_code {
2183   alg_unknown,
2184   alg_zero,
2185   alg_m, alg_shift,
2186   alg_add_t_m2,
2187   alg_sub_t_m2,
2188   alg_add_factor,
2189   alg_sub_factor,
2190   alg_add_t2_m,
2191   alg_sub_t2_m,
2192   alg_impossible
2193 };
2194
2195 /* This structure holds the "cost" of a multiply sequence.  The
2196    "cost" field holds the total rtx_cost of every operator in the
2197    synthetic multiplication sequence, hence cost(a op b) is defined
2198    as rtx_cost(op) + cost(a) + cost(b), where cost(leaf) is zero.
2199    The "latency" field holds the minimum possible latency of the
2200    synthetic multiply, on a hypothetical infinitely parallel CPU.
2201    This is the critical path, or the maximum height, of the expression
2202    tree which is the sum of rtx_costs on the most expensive path from
2203    any leaf to the root.  Hence latency(a op b) is defined as zero for
2204    leaves and rtx_cost(op) + max(latency(a), latency(b)) otherwise.  */
2205
2206 struct mult_cost {
2207   short cost;     /* Total rtx_cost of the multiplication sequence.  */
2208   short latency;  /* The latency of the multiplication sequence.  */
2209 };
2210
2211 /* This macro is used to compare a pointer to a mult_cost against an
2212    single integer "rtx_cost" value.  This is equivalent to the macro
2213    CHEAPER_MULT_COST(X,Z) where Z = {Y,Y}.  */
2214 #define MULT_COST_LESS(X,Y) ((X)->cost < (Y)    \
2215                              || ((X)->cost == (Y) && (X)->latency < (Y)))
2216
2217 /* This macro is used to compare two pointers to mult_costs against
2218    each other.  The macro returns true if X is cheaper than Y.
2219    Currently, the cheaper of two mult_costs is the one with the
2220    lower "cost".  If "cost"s are tied, the lower latency is cheaper.  */
2221 #define CHEAPER_MULT_COST(X,Y)  ((X)->cost < (Y)->cost          \
2222                                  || ((X)->cost == (Y)->cost     \
2223                                      && (X)->latency < (Y)->latency))
2224
2225 /* This structure records a sequence of operations.
2226    `ops' is the number of operations recorded.
2227    `cost' is their total cost.
2228    The operations are stored in `op' and the corresponding
2229    logarithms of the integer coefficients in `log'.
2230
2231    These are the operations:
2232    alg_zero             total := 0;
2233    alg_m                total := multiplicand;
2234    alg_shift            total := total * coeff
2235    alg_add_t_m2         total := total + multiplicand * coeff;
2236    alg_sub_t_m2         total := total - multiplicand * coeff;
2237    alg_add_factor       total := total * coeff + total;
2238    alg_sub_factor       total := total * coeff - total;
2239    alg_add_t2_m         total := total * coeff + multiplicand;
2240    alg_sub_t2_m         total := total * coeff - multiplicand;
2241
2242    The first operand must be either alg_zero or alg_m.  */
2243
2244 struct algorithm
2245 {
2246   struct mult_cost cost;
2247   short ops;
2248   /* The size of the OP and LOG fields are not directly related to the
2249      word size, but the worst-case algorithms will be if we have few
2250      consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2251      In that case we will generate shift-by-2, add, shift-by-2, add,...,
2252      in total wordsize operations.  */
2253   enum alg_code op[MAX_BITS_PER_WORD];
2254   char log[MAX_BITS_PER_WORD];
2255 };
2256
2257 /* The entry for our multiplication cache/hash table.  */
2258 struct alg_hash_entry {
2259   /* The number we are multiplying by.  */
2260   unsigned HOST_WIDE_INT t;
2261
2262   /* The mode in which we are multiplying something by T.  */
2263   enum machine_mode mode;
2264
2265   /* The best multiplication algorithm for t.  */
2266   enum alg_code alg;
2267
2268   /* The cost of multiplication if ALG_CODE is not alg_impossible.
2269      Otherwise, the cost within which multiplication by T is
2270      impossible.  */
2271   struct mult_cost cost;
2272 };
2273
2274 /* The number of cache/hash entries.  */
2275 #if HOST_BITS_PER_WIDE_INT == 64
2276 #define NUM_ALG_HASH_ENTRIES 1031
2277 #else
2278 #define NUM_ALG_HASH_ENTRIES 307
2279 #endif
2280
2281 /* Each entry of ALG_HASH caches alg_code for some integer.  This is
2282    actually a hash table.  If we have a collision, that the older
2283    entry is kicked out.  */
2284 static struct alg_hash_entry alg_hash[NUM_ALG_HASH_ENTRIES];
2285
2286 /* Indicates the type of fixup needed after a constant multiplication.
2287    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2288    the result should be negated, and ADD_VARIANT means that the
2289    multiplicand should be added to the result.  */
2290 enum mult_variant {basic_variant, negate_variant, add_variant};
2291
2292 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2293                         const struct mult_cost *, enum machine_mode mode);
2294 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2295                                  struct algorithm *, enum mult_variant *, int);
2296 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2297                               const struct algorithm *, enum mult_variant);
2298 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2299                                                  int, rtx *, int *, int *);
2300 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2301 static rtx extract_high_half (enum machine_mode, rtx);
2302 static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2303 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2304                                        int, int);
2305 /* Compute and return the best algorithm for multiplying by T.
2306    The algorithm must cost less than cost_limit
2307    If retval.cost >= COST_LIMIT, no algorithm was found and all
2308    other field of the returned struct are undefined.
2309    MODE is the machine mode of the multiplication.  */
2310
2311 static void
2312 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2313             const struct mult_cost *cost_limit, enum machine_mode mode)
2314 {
2315   int m;
2316   struct algorithm *alg_in, *best_alg;
2317   struct mult_cost best_cost;
2318   struct mult_cost new_limit;
2319   int op_cost, op_latency;
2320   unsigned HOST_WIDE_INT q;
2321   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2322   int hash_index;
2323   bool cache_hit = false;
2324   enum alg_code cache_alg = alg_zero;
2325
2326   /* Indicate that no algorithm is yet found.  If no algorithm
2327      is found, this value will be returned and indicate failure.  */
2328   alg_out->cost.cost = cost_limit->cost + 1;
2329   alg_out->cost.latency = cost_limit->latency + 1;
2330
2331   if (cost_limit->cost < 0
2332       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2333     return;
2334
2335   /* Restrict the bits of "t" to the multiplication's mode.  */
2336   t &= GET_MODE_MASK (mode);
2337
2338   /* t == 1 can be done in zero cost.  */
2339   if (t == 1)
2340     {
2341       alg_out->ops = 1;
2342       alg_out->cost.cost = 0;
2343       alg_out->cost.latency = 0;
2344       alg_out->op[0] = alg_m;
2345       return;
2346     }
2347
2348   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2349      fail now.  */
2350   if (t == 0)
2351     {
2352       if (MULT_COST_LESS (cost_limit, zero_cost))
2353         return;
2354       else
2355         {
2356           alg_out->ops = 1;
2357           alg_out->cost.cost = zero_cost;
2358           alg_out->cost.latency = zero_cost;
2359           alg_out->op[0] = alg_zero;
2360           return;
2361         }
2362     }
2363
2364   /* We'll be needing a couple extra algorithm structures now.  */
2365
2366   alg_in = alloca (sizeof (struct algorithm));
2367   best_alg = alloca (sizeof (struct algorithm));
2368   best_cost = *cost_limit;
2369
2370   /* Compute the hash index.  */
2371   hash_index = (t ^ (unsigned int) mode) % NUM_ALG_HASH_ENTRIES;
2372
2373   /* See if we already know what to do for T.  */
2374   if (alg_hash[hash_index].t == t
2375       && alg_hash[hash_index].mode == mode
2376       && alg_hash[hash_index].alg != alg_unknown)
2377     {
2378       cache_alg = alg_hash[hash_index].alg;
2379
2380       if (cache_alg == alg_impossible)
2381         {
2382           /* The cache tells us that it's impossible to synthesize
2383              multiplication by T within alg_hash[hash_index].cost.  */
2384           if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit))
2385             /* COST_LIMIT is at least as restrictive as the one
2386                recorded in the hash table, in which case we have no
2387                hope of synthesizing a multiplication.  Just
2388                return.  */
2389             return;
2390
2391           /* If we get here, COST_LIMIT is less restrictive than the
2392              one recorded in the hash table, so we may be able to
2393              synthesize a multiplication.  Proceed as if we didn't
2394              have the cache entry.  */
2395         }
2396       else
2397         {
2398           if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost))
2399             /* The cached algorithm shows that this multiplication
2400                requires more cost than COST_LIMIT.  Just return.  This
2401                way, we don't clobber this cache entry with
2402                alg_impossible but retain useful information.  */
2403             return;
2404
2405           cache_hit = true;
2406
2407           switch (cache_alg)
2408             {
2409             case alg_shift:
2410               goto do_alg_shift;
2411
2412             case alg_add_t_m2:
2413             case alg_sub_t_m2:
2414               goto do_alg_addsub_t_m2;
2415
2416             case alg_add_factor:
2417             case alg_sub_factor:
2418               goto do_alg_addsub_factor;
2419
2420             case alg_add_t2_m:
2421               goto do_alg_add_t2_m;
2422
2423             case alg_sub_t2_m:
2424               goto do_alg_sub_t2_m;
2425
2426             default:
2427               gcc_unreachable ();
2428             }
2429         }
2430     }
2431
2432   /* If we have a group of zero bits at the low-order part of T, try
2433      multiplying by the remaining bits and then doing a shift.  */
2434
2435   if ((t & 1) == 0)
2436     {
2437     do_alg_shift:
2438       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2439       if (m < maxm)
2440         {
2441           q = t >> m;
2442           /* The function expand_shift will choose between a shift and
2443              a sequence of additions, so the observed cost is given as
2444              MIN (m * add_cost[mode], shift_cost[mode][m]).  */
2445           op_cost = m * add_cost[mode];
2446           if (shift_cost[mode][m] < op_cost)
2447             op_cost = shift_cost[mode][m];
2448           new_limit.cost = best_cost.cost - op_cost;
2449           new_limit.latency = best_cost.latency - op_cost;
2450           synth_mult (alg_in, q, &new_limit, mode);
2451
2452           alg_in->cost.cost += op_cost;
2453           alg_in->cost.latency += op_cost;
2454           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2455             {
2456               struct algorithm *x;
2457               best_cost = alg_in->cost;
2458               x = alg_in, alg_in = best_alg, best_alg = x;
2459               best_alg->log[best_alg->ops] = m;
2460               best_alg->op[best_alg->ops] = alg_shift;
2461             }
2462         }
2463       if (cache_hit)
2464         goto done;
2465     }
2466
2467   /* If we have an odd number, add or subtract one.  */
2468   if ((t & 1) != 0)
2469     {
2470       unsigned HOST_WIDE_INT w;
2471
2472     do_alg_addsub_t_m2:
2473       for (w = 1; (w & t) != 0; w <<= 1)
2474         ;
2475       /* If T was -1, then W will be zero after the loop.  This is another
2476          case where T ends with ...111.  Handling this with (T + 1) and
2477          subtract 1 produces slightly better code and results in algorithm
2478          selection much faster than treating it like the ...0111 case
2479          below.  */
2480       if (w == 0
2481           || (w > 2
2482               /* Reject the case where t is 3.
2483                  Thus we prefer addition in that case.  */
2484               && t != 3))
2485         {
2486           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2487
2488           op_cost = add_cost[mode];
2489           new_limit.cost = best_cost.cost - op_cost;
2490           new_limit.latency = best_cost.latency - op_cost;
2491           synth_mult (alg_in, t + 1, &new_limit, mode);
2492
2493           alg_in->cost.cost += op_cost;
2494           alg_in->cost.latency += op_cost;
2495           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2496             {
2497               struct algorithm *x;
2498               best_cost = alg_in->cost;
2499               x = alg_in, alg_in = best_alg, best_alg = x;
2500               best_alg->log[best_alg->ops] = 0;
2501               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2502             }
2503         }
2504       else
2505         {
2506           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2507
2508           op_cost = add_cost[mode];
2509           new_limit.cost = best_cost.cost - op_cost;
2510           new_limit.latency = best_cost.latency - op_cost;
2511           synth_mult (alg_in, t - 1, &new_limit, mode);
2512
2513           alg_in->cost.cost += op_cost;
2514           alg_in->cost.latency += op_cost;
2515           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2516             {
2517               struct algorithm *x;
2518               best_cost = alg_in->cost;
2519               x = alg_in, alg_in = best_alg, best_alg = x;
2520               best_alg->log[best_alg->ops] = 0;
2521               best_alg->op[best_alg->ops] = alg_add_t_m2;
2522             }
2523         }
2524       if (cache_hit)
2525         goto done;
2526     }
2527
2528   /* Look for factors of t of the form
2529      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2530      If we find such a factor, we can multiply by t using an algorithm that
2531      multiplies by q, shift the result by m and add/subtract it to itself.
2532
2533      We search for large factors first and loop down, even if large factors
2534      are less probable than small; if we find a large factor we will find a
2535      good sequence quickly, and therefore be able to prune (by decreasing
2536      COST_LIMIT) the search.  */
2537
2538  do_alg_addsub_factor:
2539   for (m = floor_log2 (t - 1); m >= 2; m--)
2540     {
2541       unsigned HOST_WIDE_INT d;
2542
2543       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2544       if (t % d == 0 && t > d && m < maxm
2545           && (!cache_hit || cache_alg == alg_add_factor))
2546         {
2547           /* If the target has a cheap shift-and-add instruction use
2548              that in preference to a shift insn followed by an add insn.
2549              Assume that the shift-and-add is "atomic" with a latency
2550              equal to its cost, otherwise assume that on superscalar
2551              hardware the shift may be executed concurrently with the
2552              earlier steps in the algorithm.  */
2553           op_cost = add_cost[mode] + shift_cost[mode][m];
2554           if (shiftadd_cost[mode][m] < op_cost)
2555             {
2556               op_cost = shiftadd_cost[mode][m];
2557               op_latency = op_cost;
2558             }
2559           else
2560             op_latency = add_cost[mode];
2561
2562           new_limit.cost = best_cost.cost - op_cost;
2563           new_limit.latency = best_cost.latency - op_latency;
2564           synth_mult (alg_in, t / d, &new_limit, mode);
2565
2566           alg_in->cost.cost += op_cost;
2567           alg_in->cost.latency += op_latency;
2568           if (alg_in->cost.latency < op_cost)
2569             alg_in->cost.latency = op_cost;
2570           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2571             {
2572               struct algorithm *x;
2573               best_cost = alg_in->cost;
2574               x = alg_in, alg_in = best_alg, best_alg = x;
2575               best_alg->log[best_alg->ops] = m;
2576               best_alg->op[best_alg->ops] = alg_add_factor;
2577             }
2578           /* Other factors will have been taken care of in the recursion.  */
2579           break;
2580         }
2581
2582       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2583       if (t % d == 0 && t > d && m < maxm
2584           && (!cache_hit || cache_alg == alg_sub_factor))
2585         {
2586           /* If the target has a cheap shift-and-subtract insn use
2587              that in preference to a shift insn followed by a sub insn.
2588              Assume that the shift-and-sub is "atomic" with a latency
2589              equal to it's cost, otherwise assume that on superscalar
2590              hardware the shift may be executed concurrently with the
2591              earlier steps in the algorithm.  */
2592           op_cost = add_cost[mode] + shift_cost[mode][m];
2593           if (shiftsub_cost[mode][m] < op_cost)
2594             {
2595               op_cost = shiftsub_cost[mode][m];
2596               op_latency = op_cost;
2597             }
2598           else
2599             op_latency = add_cost[mode];
2600
2601           new_limit.cost = best_cost.cost - op_cost;
2602           new_limit.latency = best_cost.latency - op_latency;
2603           synth_mult (alg_in, t / d, &new_limit, mode);
2604
2605           alg_in->cost.cost += op_cost;
2606           alg_in->cost.latency += op_latency;
2607           if (alg_in->cost.latency < op_cost)
2608             alg_in->cost.latency = op_cost;
2609           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2610             {
2611               struct algorithm *x;
2612               best_cost = alg_in->cost;
2613               x = alg_in, alg_in = best_alg, best_alg = x;
2614               best_alg->log[best_alg->ops] = m;
2615               best_alg->op[best_alg->ops] = alg_sub_factor;
2616             }
2617           break;
2618         }
2619     }
2620   if (cache_hit)
2621     goto done;
2622
2623   /* Try shift-and-add (load effective address) instructions,
2624      i.e. do a*3, a*5, a*9.  */
2625   if ((t & 1) != 0)
2626     {
2627     do_alg_add_t2_m:
2628       q = t - 1;
2629       q = q & -q;
2630       m = exact_log2 (q);
2631       if (m >= 0 && m < maxm)
2632         {
2633           op_cost = shiftadd_cost[mode][m];
2634           new_limit.cost = best_cost.cost - op_cost;
2635           new_limit.latency = best_cost.latency - op_cost;
2636           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2637
2638           alg_in->cost.cost += op_cost;
2639           alg_in->cost.latency += op_cost;
2640           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2641             {
2642               struct algorithm *x;
2643               best_cost = alg_in->cost;
2644               x = alg_in, alg_in = best_alg, best_alg = x;
2645               best_alg->log[best_alg->ops] = m;
2646               best_alg->op[best_alg->ops] = alg_add_t2_m;
2647             }
2648         }
2649       if (cache_hit)
2650         goto done;
2651
2652     do_alg_sub_t2_m:
2653       q = t + 1;
2654       q = q & -q;
2655       m = exact_log2 (q);
2656       if (m >= 0 && m < maxm)
2657         {
2658           op_cost = shiftsub_cost[mode][m];
2659           new_limit.cost = best_cost.cost - op_cost;
2660           new_limit.latency = best_cost.latency - op_cost;
2661           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2662
2663           alg_in->cost.cost += op_cost;
2664           alg_in->cost.latency += op_cost;
2665           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2666             {
2667               struct algorithm *x;
2668               best_cost = alg_in->cost;
2669               x = alg_in, alg_in = best_alg, best_alg = x;
2670               best_alg->log[best_alg->ops] = m;
2671               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2672             }
2673         }
2674       if (cache_hit)
2675         goto done;
2676     }
2677
2678  done:
2679   /* If best_cost has not decreased, we have not found any algorithm.  */
2680   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2681     {
2682       /* We failed to find an algorithm.  Record alg_impossible for
2683          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2684          we are asked to find an algorithm for T within the same or
2685          lower COST_LIMIT, we can immediately return to the
2686          caller.  */
2687       alg_hash[hash_index].t = t;
2688       alg_hash[hash_index].mode = mode;
2689       alg_hash[hash_index].alg = alg_impossible;
2690       alg_hash[hash_index].cost = *cost_limit;
2691       return;
2692     }
2693
2694   /* Cache the result.  */
2695   if (!cache_hit)
2696     {
2697       alg_hash[hash_index].t = t;
2698       alg_hash[hash_index].mode = mode;
2699       alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2700       alg_hash[hash_index].cost.cost = best_cost.cost;
2701       alg_hash[hash_index].cost.latency = best_cost.latency;
2702     }
2703
2704   /* If we are getting a too long sequence for `struct algorithm'
2705      to record, make this search fail.  */
2706   if (best_alg->ops == MAX_BITS_PER_WORD)
2707     return;
2708
2709   /* Copy the algorithm from temporary space to the space at alg_out.
2710      We avoid using structure assignment because the majority of
2711      best_alg is normally undefined, and this is a critical function.  */
2712   alg_out->ops = best_alg->ops + 1;
2713   alg_out->cost = best_cost;
2714   memcpy (alg_out->op, best_alg->op,
2715           alg_out->ops * sizeof *alg_out->op);
2716   memcpy (alg_out->log, best_alg->log,
2717           alg_out->ops * sizeof *alg_out->log);
2718 }
2719 \f
2720 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2721    Try three variations:
2722
2723        - a shift/add sequence based on VAL itself
2724        - a shift/add sequence based on -VAL, followed by a negation
2725        - a shift/add sequence based on VAL - 1, followed by an addition.
2726
2727    Return true if the cheapest of these cost less than MULT_COST,
2728    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2729
2730 static bool
2731 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2732                      struct algorithm *alg, enum mult_variant *variant,
2733                      int mult_cost)
2734 {
2735   struct algorithm alg2;
2736   struct mult_cost limit;
2737   int op_cost;
2738
2739   /* Fail quickly for impossible bounds.  */
2740   if (mult_cost < 0)
2741     return false;
2742
2743   /* Ensure that mult_cost provides a reasonable upper bound.
2744      Any constant multiplication can be performed with less
2745      than 2 * bits additions.  */
2746   op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[mode];
2747   if (mult_cost > op_cost)
2748     mult_cost = op_cost;
2749
2750   *variant = basic_variant;
2751   limit.cost = mult_cost;
2752   limit.latency = mult_cost;
2753   synth_mult (alg, val, &limit, mode);
2754
2755   /* This works only if the inverted value actually fits in an
2756      `unsigned int' */
2757   if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2758     {
2759       op_cost = neg_cost[mode];
2760       if (MULT_COST_LESS (&alg->cost, mult_cost))
2761         {
2762           limit.cost = alg->cost.cost - op_cost;
2763           limit.latency = alg->cost.latency - op_cost;
2764         }
2765       else
2766         {
2767           limit.cost = mult_cost - op_cost;
2768           limit.latency = mult_cost - op_cost;
2769         }
2770
2771       synth_mult (&alg2, -val, &limit, mode);
2772       alg2.cost.cost += op_cost;
2773       alg2.cost.latency += op_cost;
2774       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2775         *alg = alg2, *variant = negate_variant;
2776     }
2777
2778   /* This proves very useful for division-by-constant.  */
2779   op_cost = add_cost[mode];
2780   if (MULT_COST_LESS (&alg->cost, mult_cost))
2781     {
2782       limit.cost = alg->cost.cost - op_cost;
2783       limit.latency = alg->cost.latency - op_cost;
2784     }
2785   else
2786     {
2787       limit.cost = mult_cost - op_cost;
2788       limit.latency = mult_cost - op_cost;
2789     }
2790
2791   synth_mult (&alg2, val - 1, &limit, mode);
2792   alg2.cost.cost += op_cost;
2793   alg2.cost.latency += op_cost;
2794   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2795     *alg = alg2, *variant = add_variant;
2796
2797   return MULT_COST_LESS (&alg->cost, mult_cost);
2798 }
2799
2800 /* A subroutine of expand_mult, used for constant multiplications.
2801    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2802    convenient.  Use the shift/add sequence described by ALG and apply
2803    the final fixup specified by VARIANT.  */
2804
2805 static rtx
2806 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2807                    rtx target, const struct algorithm *alg,
2808                    enum mult_variant variant)
2809 {
2810   HOST_WIDE_INT val_so_far;
2811   rtx insn, accum, tem;
2812   int opno;
2813   enum machine_mode nmode;
2814
2815   /* Avoid referencing memory over and over and invalid sharing
2816      on SUBREGs.  */
2817   op0 = force_reg (mode, op0);
2818
2819   /* ACCUM starts out either as OP0 or as a zero, depending on
2820      the first operation.  */
2821
2822   if (alg->op[0] == alg_zero)
2823     {
2824       accum = copy_to_mode_reg (mode, const0_rtx);
2825       val_so_far = 0;
2826     }
2827   else if (alg->op[0] == alg_m)
2828     {
2829       accum = copy_to_mode_reg (mode, op0);
2830       val_so_far = 1;
2831     }
2832   else
2833     gcc_unreachable ();
2834
2835   for (opno = 1; opno < alg->ops; opno++)
2836     {
2837       int log = alg->log[opno];
2838       rtx shift_subtarget = optimize ? 0 : accum;
2839       rtx add_target
2840         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2841            && !optimize)
2842           ? target : 0;
2843       rtx accum_target = optimize ? 0 : accum;
2844
2845       switch (alg->op[opno])
2846         {
2847         case alg_shift:
2848           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2849                                 build_int_cst (NULL_TREE, log),
2850                                 NULL_RTX, 0);
2851           val_so_far <<= log;
2852           break;
2853
2854         case alg_add_t_m2:
2855           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2856                               build_int_cst (NULL_TREE, log),
2857                               NULL_RTX, 0);
2858           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2859                                  add_target ? add_target : accum_target);
2860           val_so_far += (HOST_WIDE_INT) 1 << log;
2861           break;
2862
2863         case alg_sub_t_m2:
2864           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2865                               build_int_cst (NULL_TREE, log),
2866                               NULL_RTX, 0);
2867           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2868                                  add_target ? add_target : accum_target);
2869           val_so_far -= (HOST_WIDE_INT) 1 << log;
2870           break;
2871
2872         case alg_add_t2_m:
2873           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2874                                 build_int_cst (NULL_TREE, log),
2875                                 shift_subtarget,
2876                                 0);
2877           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2878                                  add_target ? add_target : accum_target);
2879           val_so_far = (val_so_far << log) + 1;
2880           break;
2881
2882         case alg_sub_t2_m:
2883           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2884                                 build_int_cst (NULL_TREE, log),
2885                                 shift_subtarget, 0);
2886           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2887                                  add_target ? add_target : accum_target);
2888           val_so_far = (val_so_far << log) - 1;
2889           break;
2890
2891         case alg_add_factor:
2892           tem = expand_shift (LSHIFT_EXPR, mode, accum,
2893                               build_int_cst (NULL_TREE, log),
2894                               NULL_RTX, 0);
2895           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2896                                  add_target ? add_target : accum_target);
2897           val_so_far += val_so_far << log;
2898           break;
2899
2900         case alg_sub_factor:
2901           tem = expand_shift (LSHIFT_EXPR, mode, accum,
2902                               build_int_cst (NULL_TREE, log),
2903                               NULL_RTX, 0);
2904           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2905                                  (add_target
2906                                   ? add_target : (optimize ? 0 : tem)));
2907           val_so_far = (val_so_far << log) - val_so_far;
2908           break;
2909
2910         default:
2911           gcc_unreachable ();
2912         }
2913
2914       /* Write a REG_EQUAL note on the last insn so that we can cse
2915          multiplication sequences.  Note that if ACCUM is a SUBREG,
2916          we've set the inner register and must properly indicate
2917          that.  */
2918
2919       tem = op0, nmode = mode;
2920       if (GET_CODE (accum) == SUBREG)
2921         {
2922           nmode = GET_MODE (SUBREG_REG (accum));
2923           tem = gen_lowpart (nmode, op0);
2924         }
2925
2926       insn = get_last_insn ();
2927       set_unique_reg_note (insn, REG_EQUAL,
2928                            gen_rtx_MULT (nmode, tem,
2929                                          GEN_INT (val_so_far)));
2930     }
2931
2932   if (variant == negate_variant)
2933     {
2934       val_so_far = -val_so_far;
2935       accum = expand_unop (mode, neg_optab, accum, target, 0);
2936     }
2937   else if (variant == add_variant)
2938     {
2939       val_so_far = val_so_far + 1;
2940       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
2941     }
2942
2943   /* Compare only the bits of val and val_so_far that are significant
2944      in the result mode, to avoid sign-/zero-extension confusion.  */
2945   val &= GET_MODE_MASK (mode);
2946   val_so_far &= GET_MODE_MASK (mode);
2947   gcc_assert (val == val_so_far);
2948
2949   return accum;
2950 }
2951
2952 /* Perform a multiplication and return an rtx for the result.
2953    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
2954    TARGET is a suggestion for where to store the result (an rtx).
2955
2956    We check specially for a constant integer as OP1.
2957    If you want this check for OP0 as well, then before calling
2958    you should swap the two operands if OP0 would be constant.  */
2959
2960 rtx
2961 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
2962              int unsignedp)
2963 {
2964   enum mult_variant variant;
2965   struct algorithm algorithm;
2966   int max_cost;
2967
2968   /* Handling const0_rtx here allows us to use zero as a rogue value for
2969      coeff below.  */
2970   if (op1 == const0_rtx)
2971     return const0_rtx;
2972   if (op1 == const1_rtx)
2973     return op0;
2974   if (op1 == constm1_rtx)
2975     return expand_unop (mode,
2976                         GET_MODE_CLASS (mode) == MODE_INT
2977                         && !unsignedp && flag_trapv
2978                         ? negv_optab : neg_optab,
2979                         op0, target, 0);
2980
2981   /* These are the operations that are potentially turned into a sequence
2982      of shifts and additions.  */
2983   if (SCALAR_INT_MODE_P (mode)
2984       && (unsignedp || !flag_trapv))
2985     {
2986       HOST_WIDE_INT coeff = 0;
2987       rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
2988
2989       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
2990          less than or equal in size to `unsigned int' this doesn't matter.
2991          If the mode is larger than `unsigned int', then synth_mult works
2992          only if the constant value exactly fits in an `unsigned int' without
2993          any truncation.  This means that multiplying by negative values does
2994          not work; results are off by 2^32 on a 32 bit machine.  */
2995
2996       if (GET_CODE (op1) == CONST_INT)
2997         {
2998           /* Attempt to handle multiplication of DImode values by negative
2999              coefficients, by performing the multiplication by a positive
3000              multiplier and then inverting the result.  */
3001           if (INTVAL (op1) < 0
3002               && GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3003             {
3004               /* Its safe to use -INTVAL (op1) even for INT_MIN, as the
3005                  result is interpreted as an unsigned coefficient.
3006                  Exclude cost of op0 from max_cost to match the cost
3007                  calculation of the synth_mult.  */
3008               max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET)
3009                          - neg_cost[mode];
3010               if (max_cost > 0
3011                   && choose_mult_variant (mode, -INTVAL (op1), &algorithm,
3012                                           &variant, max_cost))
3013                 {
3014                   rtx temp = expand_mult_const (mode, op0, -INTVAL (op1),
3015                                                 NULL_RTX, &algorithm,
3016                                                 variant);
3017                   return expand_unop (mode, neg_optab, temp, target, 0);
3018                 }
3019             }
3020           else coeff = INTVAL (op1);
3021         }
3022       else if (GET_CODE (op1) == CONST_DOUBLE)
3023         {
3024           /* If we are multiplying in DImode, it may still be a win
3025              to try to work with shifts and adds.  */
3026           if (CONST_DOUBLE_HIGH (op1) == 0)
3027             coeff = CONST_DOUBLE_LOW (op1);
3028           else if (CONST_DOUBLE_LOW (op1) == 0
3029                    && EXACT_POWER_OF_2_OR_ZERO_P (CONST_DOUBLE_HIGH (op1)))
3030             {
3031               int shift = floor_log2 (CONST_DOUBLE_HIGH (op1))
3032                           + HOST_BITS_PER_WIDE_INT;
3033               return expand_shift (LSHIFT_EXPR, mode, op0,
3034                                    build_int_cst (NULL_TREE, shift),
3035                                    target, unsignedp);
3036             }
3037         }
3038
3039       /* We used to test optimize here, on the grounds that it's better to
3040          produce a smaller program when -O is not used.  But this causes
3041          such a terrible slowdown sometimes that it seems better to always
3042          use synth_mult.  */
3043       if (coeff != 0)
3044         {
3045           /* Special case powers of two.  */
3046           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3047             return expand_shift (LSHIFT_EXPR, mode, op0,
3048                                  build_int_cst (NULL_TREE, floor_log2 (coeff)),
3049                                  target, unsignedp);
3050
3051           /* Exclude cost of op0 from max_cost to match the cost
3052              calculation of the synth_mult.  */
3053           max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET);
3054           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3055                                    max_cost))
3056             return expand_mult_const (mode, op0, coeff, target,
3057                                       &algorithm, variant);
3058         }
3059     }
3060
3061   if (GET_CODE (op0) == CONST_DOUBLE)
3062     {
3063       rtx temp = op0;
3064       op0 = op1;
3065       op1 = temp;
3066     }
3067
3068   /* Expand x*2.0 as x+x.  */
3069   if (GET_CODE (op1) == CONST_DOUBLE
3070       && SCALAR_FLOAT_MODE_P (mode))
3071     {
3072       REAL_VALUE_TYPE d;
3073       REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3074
3075       if (REAL_VALUES_EQUAL (d, dconst2))
3076         {
3077           op0 = force_reg (GET_MODE (op0), op0);
3078           return expand_binop (mode, add_optab, op0, op0,
3079                                target, unsignedp, OPTAB_LIB_WIDEN);
3080         }
3081     }
3082
3083   /* This used to use umul_optab if unsigned, but for non-widening multiply
3084      there is no difference between signed and unsigned.  */
3085   op0 = expand_binop (mode,
3086                       ! unsignedp
3087                       && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3088                       ? smulv_optab : smul_optab,
3089                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3090   gcc_assert (op0);
3091   return op0;
3092 }
3093 \f
3094 /* Return the smallest n such that 2**n >= X.  */
3095
3096 int
3097 ceil_log2 (unsigned HOST_WIDE_INT x)
3098 {
3099   return floor_log2 (x - 1) + 1;
3100 }
3101
3102 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3103    replace division by D, and put the least significant N bits of the result
3104    in *MULTIPLIER_PTR and return the most significant bit.
3105
3106    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3107    needed precision is in PRECISION (should be <= N).
3108
3109    PRECISION should be as small as possible so this function can choose
3110    multiplier more freely.
3111
3112    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3113    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3114
3115    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3116    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3117
3118 static
3119 unsigned HOST_WIDE_INT
3120 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3121                    rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr)
3122 {
3123   HOST_WIDE_INT mhigh_hi, mlow_hi;
3124   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3125   int lgup, post_shift;
3126   int pow, pow2;
3127   unsigned HOST_WIDE_INT nl, dummy1;
3128   HOST_WIDE_INT nh, dummy2;
3129
3130   /* lgup = ceil(log2(divisor)); */
3131   lgup = ceil_log2 (d);
3132
3133   gcc_assert (lgup <= n);
3134
3135   pow = n + lgup;
3136   pow2 = n + lgup - precision;
3137
3138   /* We could handle this with some effort, but this case is much
3139      better handled directly with a scc insn, so rely on caller using
3140      that.  */
3141   gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3142
3143   /* mlow = 2^(N + lgup)/d */
3144  if (pow >= HOST_BITS_PER_WIDE_INT)
3145     {
3146       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3147       nl = 0;
3148     }
3149   else
3150     {
3151       nh = 0;
3152       nl = (unsigned HOST_WIDE_INT) 1 << pow;
3153     }
3154   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3155                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3156
3157   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3158   if (pow2 >= HOST_BITS_PER_WIDE_INT)
3159     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3160   else
3161     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3162   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3163                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3164
3165   gcc_assert (!mhigh_hi || nh - d < d);
3166   gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3167   /* Assert that mlow < mhigh.  */
3168   gcc_assert (mlow_hi < mhigh_hi
3169               || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3170
3171   /* If precision == N, then mlow, mhigh exceed 2^N
3172      (but they do not exceed 2^(N+1)).  */
3173
3174   /* Reduce to lowest terms.  */
3175   for (post_shift = lgup; post_shift > 0; post_shift--)
3176     {
3177       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3178       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3179       if (ml_lo >= mh_lo)
3180         break;
3181
3182       mlow_hi = 0;
3183       mlow_lo = ml_lo;
3184       mhigh_hi = 0;
3185       mhigh_lo = mh_lo;
3186     }
3187
3188   *post_shift_ptr = post_shift;
3189   *lgup_ptr = lgup;
3190   if (n < HOST_BITS_PER_WIDE_INT)
3191     {
3192       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3193       *multiplier_ptr = GEN_INT (mhigh_lo & mask);
3194       return mhigh_lo >= mask;
3195     }
3196   else
3197     {
3198       *multiplier_ptr = GEN_INT (mhigh_lo);
3199       return mhigh_hi;
3200     }
3201 }
3202
3203 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3204    congruent to 1 (mod 2**N).  */
3205
3206 static unsigned HOST_WIDE_INT
3207 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3208 {
3209   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3210
3211   /* The algorithm notes that the choice y = x satisfies
3212      x*y == 1 mod 2^3, since x is assumed odd.
3213      Each iteration doubles the number of bits of significance in y.  */
3214
3215   unsigned HOST_WIDE_INT mask;
3216   unsigned HOST_WIDE_INT y = x;
3217   int nbit = 3;
3218
3219   mask = (n == HOST_BITS_PER_WIDE_INT
3220           ? ~(unsigned HOST_WIDE_INT) 0
3221           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3222
3223   while (nbit < n)
3224     {
3225       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3226       nbit *= 2;
3227     }
3228   return y;
3229 }
3230
3231 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3232    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3233    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3234    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3235    become signed.
3236
3237    The result is put in TARGET if that is convenient.
3238
3239    MODE is the mode of operation.  */
3240
3241 rtx
3242 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3243                              rtx op1, rtx target, int unsignedp)
3244 {
3245   rtx tem;
3246   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3247
3248   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3249                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3250                       NULL_RTX, 0);
3251   tem = expand_and (mode, tem, op1, NULL_RTX);
3252   adj_operand
3253     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3254                      adj_operand);
3255
3256   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3257                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3258                       NULL_RTX, 0);
3259   tem = expand_and (mode, tem, op0, NULL_RTX);
3260   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3261                           target);
3262
3263   return target;
3264 }
3265
3266 /* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */
3267
3268 static rtx
3269 extract_high_half (enum machine_mode mode, rtx op)
3270 {
3271   enum machine_mode wider_mode;
3272
3273   if (mode == word_mode)
3274     return gen_highpart (mode, op);
3275
3276   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3277
3278   wider_mode = GET_MODE_WIDER_MODE (mode);
3279   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3280                      build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode)), 0, 1);
3281   return convert_modes (mode, wider_mode, op, 0);
3282 }
3283
3284 /* Like expand_mult_highpart, but only consider using a multiplication
3285    optab.  OP1 is an rtx for the constant operand.  */
3286
3287 static rtx
3288 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3289                             rtx target, int unsignedp, int max_cost)
3290 {
3291   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3292   enum machine_mode wider_mode;
3293   optab moptab;
3294   rtx tem;
3295   int size;
3296
3297   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3298
3299   wider_mode = GET_MODE_WIDER_MODE (mode);
3300   size = GET_MODE_BITSIZE (mode);
3301
3302   /* Firstly, try using a multiplication insn that only generates the needed
3303      high part of the product, and in the sign flavor of unsignedp.  */
3304   if (mul_highpart_cost[mode] < max_cost)
3305     {
3306       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3307       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3308                           unsignedp, OPTAB_DIRECT);
3309       if (tem)
3310         return tem;
3311     }
3312
3313   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3314      Need to adjust the result after the multiplication.  */
3315   if (size - 1 < BITS_PER_WORD
3316       && (mul_highpart_cost[mode] + 2 * shift_cost[mode][size-1]
3317           + 4 * add_cost[mode] < max_cost))
3318     {
3319       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3320       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3321                           unsignedp, OPTAB_DIRECT);
3322       if (tem)
3323         /* We used the wrong signedness.  Adjust the result.  */
3324         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3325                                             tem, unsignedp);
3326     }
3327
3328   /* Try widening multiplication.  */
3329   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3330   if (optab_handler (moptab, wider_mode)->insn_code != CODE_FOR_nothing
3331       && mul_widen_cost[wider_mode] < max_cost)
3332     {
3333       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3334                           unsignedp, OPTAB_WIDEN);
3335       if (tem)
3336         return extract_high_half (mode, tem);
3337     }
3338
3339   /* Try widening the mode and perform a non-widening multiplication.  */
3340   if (optab_handler (smul_optab, wider_mode)->insn_code != CODE_FOR_nothing
3341       && size - 1 < BITS_PER_WORD
3342       && mul_cost[wider_mode] + shift_cost[mode][size-1] < max_cost)
3343     {
3344       rtx insns, wop0, wop1;
3345
3346       /* We need to widen the operands, for example to ensure the
3347          constant multiplier is correctly sign or zero extended.
3348          Use a sequence to clean-up any instructions emitted by
3349          the conversions if things don't work out.  */
3350       start_sequence ();
3351       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3352       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3353       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3354                           unsignedp, OPTAB_WIDEN);
3355       insns = get_insns ();
3356       end_sequence ();
3357
3358       if (tem)
3359         {
3360           emit_insn (insns);
3361           return extract_high_half (mode, tem);
3362         }
3363     }
3364
3365   /* Try widening multiplication of opposite signedness, and adjust.  */
3366   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3367   if (optab_handler (moptab, wider_mode)->insn_code != CODE_FOR_nothing
3368       && size - 1 < BITS_PER_WORD
3369       && (mul_widen_cost[wider_mode] + 2 * shift_cost[mode][size-1]
3370           + 4 * add_cost[mode] < max_cost))
3371     {
3372       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3373                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3374       if (tem != 0)
3375         {
3376           tem = extract_high_half (mode, tem);
3377           /* We used the wrong signedness.  Adjust the result.  */
3378           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3379                                               target, unsignedp);
3380         }
3381     }
3382
3383   return 0;
3384 }
3385
3386 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3387    putting the high half of the result in TARGET if that is convenient,
3388    and return where the result is.  If the operation can not be performed,
3389    0 is returned.
3390
3391    MODE is the mode of operation and result.
3392
3393    UNSIGNEDP nonzero means unsigned multiply.
3394
3395    MAX_COST is the total allowed cost for the expanded RTL.  */
3396
3397 static rtx
3398 expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3399                       rtx target, int unsignedp, int max_cost)
3400 {
3401   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3402   unsigned HOST_WIDE_INT cnst1;
3403   int extra_cost;
3404   bool sign_adjust = false;
3405   enum mult_variant variant;
3406   struct algorithm alg;
3407   rtx tem;
3408
3409   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3410   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3411   gcc_assert (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT);
3412
3413   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3414
3415   /* We can't optimize modes wider than BITS_PER_WORD.
3416      ??? We might be able to perform double-word arithmetic if
3417      mode == word_mode, however all the cost calculations in
3418      synth_mult etc. assume single-word operations.  */
3419   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3420     return expand_mult_highpart_optab (mode, op0, op1, target,
3421                                        unsignedp, max_cost);
3422
3423   extra_cost = shift_cost[mode][GET_MODE_BITSIZE (mode) - 1];
3424
3425   /* Check whether we try to multiply by a negative constant.  */
3426   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3427     {
3428       sign_adjust = true;
3429       extra_cost += add_cost[mode];
3430     }
3431
3432   /* See whether shift/add multiplication is cheap enough.  */
3433   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3434                            max_cost - extra_cost))
3435     {
3436       /* See whether the specialized multiplication optabs are
3437          cheaper than the shift/add version.  */
3438       tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3439                                         alg.cost.cost + extra_cost);
3440       if (tem)
3441         return tem;
3442
3443       tem = convert_to_mode (wider_mode, op0, unsignedp);
3444       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3445       tem = extract_high_half (mode, tem);
3446
3447       /* Adjust result for signedness.  */
3448       if (sign_adjust)
3449         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3450
3451       return tem;
3452     }
3453   return expand_mult_highpart_optab (mode, op0, op1, target,
3454                                      unsignedp, max_cost);
3455 }
3456
3457
3458 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3459
3460 static rtx
3461 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3462 {
3463   unsigned HOST_WIDE_INT masklow, maskhigh;
3464   rtx result, temp, shift, label;
3465   int logd;
3466
3467   logd = floor_log2 (d);
3468   result = gen_reg_rtx (mode);
3469
3470   /* Avoid conditional branches when they're expensive.  */
3471   if (BRANCH_COST >= 2
3472       && !optimize_size)
3473     {
3474       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3475                                       mode, 0, -1);
3476       if (signmask)
3477         {
3478           signmask = force_reg (mode, signmask);
3479           masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3480           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3481
3482           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3483              which instruction sequence to use.  If logical right shifts
3484              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3485              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3486
3487           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3488           if (optab_handler (lshr_optab, mode)->insn_code == CODE_FOR_nothing
3489               || rtx_cost (temp, SET) > COSTS_N_INSNS (2))
3490             {
3491               temp = expand_binop (mode, xor_optab, op0, signmask,
3492                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3493               temp = expand_binop (mode, sub_optab, temp, signmask,
3494                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3495               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3496                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3497               temp = expand_binop (mode, xor_optab, temp, signmask,
3498                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3499               temp = expand_binop (mode, sub_optab, temp, signmask,
3500                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3501             }
3502           else
3503             {
3504               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3505                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3506               signmask = force_reg (mode, signmask);
3507
3508               temp = expand_binop (mode, add_optab, op0, signmask,
3509                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3510               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3511                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3512               temp = expand_binop (mode, sub_optab, temp, signmask,
3513                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3514             }
3515           return temp;
3516         }
3517     }
3518
3519   /* Mask contains the mode's signbit and the significant bits of the
3520      modulus.  By including the signbit in the operation, many targets
3521      can avoid an explicit compare operation in the following comparison
3522      against zero.  */
3523
3524   masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3525   if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3526     {
3527       masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3528       maskhigh = -1;
3529     }
3530   else
3531     maskhigh = (HOST_WIDE_INT) -1
3532                  << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3533
3534   temp = expand_binop (mode, and_optab, op0,
3535                        immed_double_const (masklow, maskhigh, mode),
3536                        result, 1, OPTAB_LIB_WIDEN);
3537   if (temp != result)
3538     emit_move_insn (result, temp);
3539
3540   label = gen_label_rtx ();
3541   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3542
3543   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3544                        0, OPTAB_LIB_WIDEN);
3545   masklow = (HOST_WIDE_INT) -1 << logd;
3546   maskhigh = -1;
3547   temp = expand_binop (mode, ior_optab, temp,
3548                        immed_double_const (masklow, maskhigh, mode),
3549                        result, 1, OPTAB_LIB_WIDEN);
3550   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3551                        0, OPTAB_LIB_WIDEN);
3552   if (temp != result)
3553     emit_move_insn (result, temp);
3554   emit_label (label);
3555   return result;
3556 }
3557
3558 /* Expand signed division of OP0 by a power of two D in mode MODE.
3559    This routine is only called for positive values of D.  */
3560
3561 static rtx
3562 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3563 {
3564   rtx temp, label;
3565   tree shift;
3566   int logd;
3567
3568   logd = floor_log2 (d);
3569   shift = build_int_cst (NULL_TREE, logd);
3570
3571   if (d == 2 && BRANCH_COST >= 1)
3572     {
3573       temp = gen_reg_rtx (mode);
3574       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3575       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3576                            0, OPTAB_LIB_WIDEN);
3577       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3578     }
3579
3580 #ifdef HAVE_conditional_move
3581   if (BRANCH_COST >= 2)
3582     {
3583       rtx temp2;
3584
3585       /* ??? emit_conditional_move forces a stack adjustment via
3586          compare_from_rtx so, if the sequence is discarded, it will
3587          be lost.  Do it now instead.  */
3588       do_pending_stack_adjust ();
3589
3590       start_sequence ();
3591       temp2 = copy_to_mode_reg (mode, op0);
3592       temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3593                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3594       temp = force_reg (mode, temp);
3595
3596       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3597       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3598                                      mode, temp, temp2, mode, 0);
3599       if (temp2)
3600         {
3601           rtx seq = get_insns ();
3602           end_sequence ();
3603           emit_insn (seq);
3604           return expand_shift (RSHIFT_EXPR, mode, temp2, shift, NULL_RTX, 0);
3605         }
3606       end_sequence ();
3607     }
3608 #endif
3609
3610   if (BRANCH_COST >= 2)
3611     {
3612       int ushift = GET_MODE_BITSIZE (mode) - logd;
3613
3614       temp = gen_reg_rtx (mode);
3615       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3616       if (shift_cost[mode][ushift] > COSTS_N_INSNS (1))
3617         temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3618                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3619       else
3620         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3621                              build_int_cst (NULL_TREE, ushift),
3622                              NULL_RTX, 1);
3623       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3624                            0, OPTAB_LIB_WIDEN);
3625       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3626     }
3627
3628   label = gen_label_rtx ();
3629   temp = copy_to_mode_reg (mode, op0);
3630   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3631   expand_inc (temp, GEN_INT (d - 1));
3632   emit_label (label);
3633   return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3634 }
3635 \f
3636 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3637    if that is convenient, and returning where the result is.
3638    You may request either the quotient or the remainder as the result;
3639    specify REM_FLAG nonzero to get the remainder.
3640
3641    CODE is the expression code for which kind of division this is;
3642    it controls how rounding is done.  MODE is the machine mode to use.
3643    UNSIGNEDP nonzero means do unsigned division.  */
3644
3645 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3646    and then correct it by or'ing in missing high bits
3647    if result of ANDI is nonzero.
3648    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3649    This could optimize to a bfexts instruction.
3650    But C doesn't use these operations, so their optimizations are
3651    left for later.  */
3652 /* ??? For modulo, we don't actually need the highpart of the first product,
3653    the low part will do nicely.  And for small divisors, the second multiply
3654    can also be a low-part only multiply or even be completely left out.
3655    E.g. to calculate the remainder of a division by 3 with a 32 bit
3656    multiply, multiply with 0x55555556 and extract the upper two bits;
3657    the result is exact for inputs up to 0x1fffffff.
3658    The input range can be reduced by using cross-sum rules.
3659    For odd divisors >= 3, the following table gives right shift counts
3660    so that if a number is shifted by an integer multiple of the given
3661    amount, the remainder stays the same:
3662    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3663    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3664    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3665    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3666    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3667
3668    Cross-sum rules for even numbers can be derived by leaving as many bits
3669    to the right alone as the divisor has zeros to the right.
3670    E.g. if x is an unsigned 32 bit number:
3671    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3672    */
3673
3674 rtx
3675 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3676                rtx op0, rtx op1, rtx target, int unsignedp)
3677 {
3678   enum machine_mode compute_mode;
3679   rtx tquotient;
3680   rtx quotient = 0, remainder = 0;
3681   rtx last;
3682   int size;
3683   rtx insn, set;
3684   optab optab1, optab2;
3685   int op1_is_constant, op1_is_pow2 = 0;
3686   int max_cost, extra_cost;
3687   static HOST_WIDE_INT last_div_const = 0;
3688   static HOST_WIDE_INT ext_op1;
3689
3690   op1_is_constant = GET_CODE (op1) == CONST_INT;
3691   if (op1_is_constant)
3692     {
3693       ext_op1 = INTVAL (op1);
3694       if (unsignedp)
3695         ext_op1 &= GET_MODE_MASK (mode);
3696       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3697                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3698     }
3699
3700   /*
3701      This is the structure of expand_divmod:
3702
3703      First comes code to fix up the operands so we can perform the operations
3704      correctly and efficiently.
3705
3706      Second comes a switch statement with code specific for each rounding mode.
3707      For some special operands this code emits all RTL for the desired
3708      operation, for other cases, it generates only a quotient and stores it in
3709      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3710      to indicate that it has not done anything.
3711
3712      Last comes code that finishes the operation.  If QUOTIENT is set and
3713      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3714      QUOTIENT is not set, it is computed using trunc rounding.
3715
3716      We try to generate special code for division and remainder when OP1 is a
3717      constant.  If |OP1| = 2**n we can use shifts and some other fast
3718      operations.  For other values of OP1, we compute a carefully selected
3719      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3720      by m.
3721
3722      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3723      half of the product.  Different strategies for generating the product are
3724      implemented in expand_mult_highpart.
3725
3726      If what we actually want is the remainder, we generate that by another
3727      by-constant multiplication and a subtraction.  */
3728
3729   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3730      code below will malfunction if we are, so check here and handle
3731      the special case if so.  */
3732   if (op1 == const1_rtx)
3733     return rem_flag ? const0_rtx : op0;
3734
3735     /* When dividing by -1, we could get an overflow.
3736      negv_optab can handle overflows.  */
3737   if (! unsignedp && op1 == constm1_rtx)
3738     {
3739       if (rem_flag)
3740         return const0_rtx;
3741       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3742                           ? negv_optab : neg_optab, op0, target, 0);
3743     }
3744
3745   if (target
3746       /* Don't use the function value register as a target
3747          since we have to read it as well as write it,
3748          and function-inlining gets confused by this.  */
3749       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3750           /* Don't clobber an operand while doing a multi-step calculation.  */
3751           || ((rem_flag || op1_is_constant)
3752               && (reg_mentioned_p (target, op0)
3753                   || (MEM_P (op0) && MEM_P (target))))
3754           || reg_mentioned_p (target, op1)
3755           || (MEM_P (op1) && MEM_P (target))))
3756     target = 0;
3757
3758   /* Get the mode in which to perform this computation.  Normally it will
3759      be MODE, but sometimes we can't do the desired operation in MODE.
3760      If so, pick a wider mode in which we can do the operation.  Convert
3761      to that mode at the start to avoid repeated conversions.
3762
3763      First see what operations we need.  These depend on the expression
3764      we are evaluating.  (We assume that divxx3 insns exist under the
3765      same conditions that modxx3 insns and that these insns don't normally
3766      fail.  If these assumptions are not correct, we may generate less
3767      efficient code in some cases.)
3768
3769      Then see if we find a mode in which we can open-code that operation
3770      (either a division, modulus, or shift).  Finally, check for the smallest
3771      mode for which we can do the operation with a library call.  */
3772
3773   /* We might want to refine this now that we have division-by-constant
3774      optimization.  Since expand_mult_highpart tries so many variants, it is
3775      not straightforward to generalize this.  Maybe we should make an array
3776      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3777
3778   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3779             ? (unsignedp ? lshr_optab : ashr_optab)
3780             : (unsignedp ? udiv_optab : sdiv_optab));
3781   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3782             ? optab1
3783             : (unsignedp ? udivmod_optab : sdivmod_optab));
3784
3785   for (compute_mode = mode; compute_mode != VOIDmode;
3786        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3787     if (optab_handler (optab1, compute_mode)->insn_code != CODE_FOR_nothing
3788         || optab_handler (optab2, compute_mode)->insn_code != CODE_FOR_nothing)
3789       break;
3790
3791   if (compute_mode == VOIDmode)
3792     for (compute_mode = mode; compute_mode != VOIDmode;
3793          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3794       if (optab_libfunc (optab1, compute_mode)
3795           || optab_libfunc (optab2, compute_mode))
3796         break;
3797
3798   /* If we still couldn't find a mode, use MODE, but expand_binop will
3799      probably die.  */
3800   if (compute_mode == VOIDmode)
3801     compute_mode = mode;
3802
3803   if (target && GET_MODE (target) == compute_mode)
3804     tquotient = target;
3805   else
3806     tquotient = gen_reg_rtx (compute_mode);
3807
3808   size = GET_MODE_BITSIZE (compute_mode);
3809 #if 0
3810   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3811      (mode), and thereby get better code when OP1 is a constant.  Do that
3812      later.  It will require going over all usages of SIZE below.  */
3813   size = GET_MODE_BITSIZE (mode);
3814 #endif
3815
3816   /* Only deduct something for a REM if the last divide done was
3817      for a different constant.   Then set the constant of the last
3818      divide.  */
3819   max_cost = unsignedp ? udiv_cost[compute_mode] : sdiv_cost[compute_mode];
3820   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
3821                      && INTVAL (op1) == last_div_const))
3822     max_cost -= mul_cost[compute_mode] + add_cost[compute_mode];
3823
3824   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3825
3826   /* Now convert to the best mode to use.  */
3827   if (compute_mode != mode)
3828     {
3829       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3830       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3831
3832       /* convert_modes may have placed op1 into a register, so we
3833          must recompute the following.  */
3834       op1_is_constant = GET_CODE (op1) == CONST_INT;
3835       op1_is_pow2 = (op1_is_constant
3836                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3837                           || (! unsignedp
3838                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3839     }
3840
3841   /* If one of the operands is a volatile MEM, copy it into a register.  */
3842
3843   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3844     op0 = force_reg (compute_mode, op0);
3845   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3846     op1 = force_reg (compute_mode, op1);
3847
3848   /* If we need the remainder or if OP1 is constant, we need to
3849      put OP0 in a register in case it has any queued subexpressions.  */
3850   if (rem_flag || op1_is_constant)
3851     op0 = force_reg (compute_mode, op0);
3852
3853   last = get_last_insn ();
3854
3855   /* Promote floor rounding to trunc rounding for unsigned operations.  */
3856   if (unsignedp)
3857     {
3858       if (code == FLOOR_DIV_EXPR)
3859         code = TRUNC_DIV_EXPR;
3860       if (code == FLOOR_MOD_EXPR)
3861         code = TRUNC_MOD_EXPR;
3862       if (code == EXACT_DIV_EXPR && op1_is_pow2)
3863         code = TRUNC_DIV_EXPR;
3864     }
3865
3866   if (op1 != const0_rtx)
3867     switch (code)
3868       {
3869       case TRUNC_MOD_EXPR:
3870       case TRUNC_DIV_EXPR:
3871         if (op1_is_constant)
3872           {
3873             if (unsignedp)
3874               {
3875                 unsigned HOST_WIDE_INT mh;
3876                 int pre_shift, post_shift;
3877                 int dummy;
3878                 rtx ml;
3879                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
3880                                             & GET_MODE_MASK (compute_mode));
3881
3882                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3883                   {
3884                     pre_shift = floor_log2 (d);
3885                     if (rem_flag)
3886                       {
3887                         remainder
3888                           = expand_binop (compute_mode, and_optab, op0,
3889                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3890                                           remainder, 1,
3891                                           OPTAB_LIB_WIDEN);
3892                         if (remainder)
3893                           return gen_lowpart (mode, remainder);
3894                       }
3895                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3896                                              build_int_cst (NULL_TREE,
3897                                                             pre_shift),
3898                                              tquotient, 1);
3899                   }
3900                 else if (size <= HOST_BITS_PER_WIDE_INT)
3901                   {
3902                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
3903                       {
3904                         /* Most significant bit of divisor is set; emit an scc
3905                            insn.  */
3906                         quotient = emit_store_flag (tquotient, GEU, op0, op1,
3907                                                     compute_mode, 1, 1);
3908                         if (quotient == 0)
3909                           goto fail1;
3910                       }
3911                     else
3912                       {
3913                         /* Find a suitable multiplier and right shift count
3914                            instead of multiplying with D.  */
3915
3916                         mh = choose_multiplier (d, size, size,
3917                                                 &ml, &post_shift, &dummy);
3918
3919                         /* If the suggested multiplier is more than SIZE bits,
3920                            we can do better for even divisors, using an
3921                            initial right shift.  */
3922                         if (mh != 0 && (d & 1) == 0)
3923                           {
3924                             pre_shift = floor_log2 (d & -d);
3925                             mh = choose_multiplier (d >> pre_shift, size,
3926                                                     size - pre_shift,
3927                                                     &ml, &post_shift, &dummy);
3928                             gcc_assert (!mh);
3929                           }
3930                         else
3931                           pre_shift = 0;
3932
3933                         if (mh != 0)
3934                           {
3935                             rtx t1, t2, t3, t4;
3936
3937                             if (post_shift - 1 >= BITS_PER_WORD)
3938                               goto fail1;
3939
3940                             extra_cost
3941                               = (shift_cost[compute_mode][post_shift - 1]
3942                                  + shift_cost[compute_mode][1]
3943                                  + 2 * add_cost[compute_mode]);
3944                             t1 = expand_mult_highpart (compute_mode, op0, ml,
3945                                                        NULL_RTX, 1,
3946                                                        max_cost - extra_cost);
3947                             if (t1 == 0)
3948                               goto fail1;
3949                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
3950                                                                op0, t1),
3951                                                 NULL_RTX);
3952                             t3 = expand_shift
3953                               (RSHIFT_EXPR, compute_mode, t2,
3954                                build_int_cst (NULL_TREE, 1),
3955                                NULL_RTX,1);
3956                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
3957                                                               t1, t3),
3958                                                 NULL_RTX);
3959                             quotient = expand_shift
3960                               (RSHIFT_EXPR, compute_mode, t4,
3961                                build_int_cst (NULL_TREE, post_shift - 1),
3962                                tquotient, 1);
3963                           }
3964                         else
3965                           {
3966                             rtx t1, t2;
3967
3968                             if (pre_shift >= BITS_PER_WORD
3969                                 || post_shift >= BITS_PER_WORD)
3970                               goto fail1;
3971
3972                             t1 = expand_shift
3973                               (RSHIFT_EXPR, compute_mode, op0,
3974                                build_int_cst (NULL_TREE, pre_shift),
3975                                NULL_RTX, 1);
3976                             extra_cost
3977                               = (shift_cost[compute_mode][pre_shift]
3978                                  + shift_cost[compute_mode][post_shift]);
3979                             t2 = expand_mult_highpart (compute_mode, t1, ml,
3980                                                        NULL_RTX, 1,
3981                                                        max_cost - extra_cost);
3982                             if (t2 == 0)
3983                               goto fail1;
3984                             quotient = expand_shift
3985                               (RSHIFT_EXPR, compute_mode, t2,
3986                                build_int_cst (NULL_TREE, post_shift),
3987                                tquotient, 1);
3988                           }
3989                       }
3990                   }
3991                 else            /* Too wide mode to use tricky code */
3992                   break;
3993
3994                 insn = get_last_insn ();
3995                 if (insn != last
3996                     && (set = single_set (insn)) != 0
3997                     && SET_DEST (set) == quotient)
3998                   set_unique_reg_note (insn,
3999                                        REG_EQUAL,
4000                                        gen_rtx_UDIV (compute_mode, op0, op1));
4001               }
4002             else                /* TRUNC_DIV, signed */
4003               {
4004                 unsigned HOST_WIDE_INT ml;
4005                 int lgup, post_shift;
4006                 rtx mlr;
4007                 HOST_WIDE_INT d = INTVAL (op1);
4008                 unsigned HOST_WIDE_INT abs_d;
4009
4010                 /* Since d might be INT_MIN, we have to cast to
4011                    unsigned HOST_WIDE_INT before negating to avoid
4012                    undefined signed overflow.  */
4013                 abs_d = (d >= 0
4014                          ? (unsigned HOST_WIDE_INT) d
4015                          : - (unsigned HOST_WIDE_INT) d);
4016
4017                 /* n rem d = n rem -d */
4018                 if (rem_flag && d < 0)
4019                   {
4020                     d = abs_d;
4021                     op1 = gen_int_mode (abs_d, compute_mode);
4022                   }
4023
4024                 if (d == 1)
4025                   quotient = op0;
4026                 else if (d == -1)
4027                   quotient = expand_unop (compute_mode, neg_optab, op0,
4028                                           tquotient, 0);
4029                 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4030                   {
4031                     /* This case is not handled correctly below.  */
4032                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4033                                                 compute_mode, 1, 1);
4034                     if (quotient == 0)
4035                       goto fail1;
4036                   }
4037                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4038                          && (rem_flag ? smod_pow2_cheap[compute_mode]
4039                                       : sdiv_pow2_cheap[compute_mode])
4040                          /* We assume that cheap metric is true if the
4041                             optab has an expander for this mode.  */
4042                          && ((optab_handler ((rem_flag ? smod_optab
4043                                               : sdiv_optab),
4044                                               compute_mode)->insn_code
4045                               != CODE_FOR_nothing)
4046                              || (optab_handler(sdivmod_optab,
4047                                                compute_mode)
4048                                  ->insn_code != CODE_FOR_nothing)))
4049                   ;
4050                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4051                   {
4052                     if (rem_flag)
4053                       {
4054                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4055                         if (remainder)
4056                           return gen_lowpart (mode, remainder);
4057                       }
4058
4059                     if (sdiv_pow2_cheap[compute_mode]
4060                         && ((optab_handler (sdiv_optab, compute_mode)->insn_code
4061                              != CODE_FOR_nothing)
4062                             || (optab_handler (sdivmod_optab, compute_mode)->insn_code
4063                                 != CODE_FOR_nothing)))
4064                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4065                                                 compute_mode, op0,
4066                                                 gen_int_mode (abs_d,
4067                                                               compute_mode),
4068                                                 NULL_RTX, 0);
4069                     else
4070                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4071
4072                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4073                        negate the quotient.  */
4074                     if (d < 0)
4075                       {
4076                         insn = get_last_insn ();
4077                         if (insn != last
4078                             && (set = single_set (insn)) != 0
4079                             && SET_DEST (set) == quotient
4080                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4081                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4082                           set_unique_reg_note (insn,
4083                                                REG_EQUAL,
4084                                                gen_rtx_DIV (compute_mode,
4085                                                             op0,
4086                                                             GEN_INT
4087                                                             (trunc_int_for_mode
4088                                                              (abs_d,
4089                                                               compute_mode))));
4090
4091                         quotient = expand_unop (compute_mode, neg_optab,
4092                                                 quotient, quotient, 0);
4093                       }
4094                   }
4095                 else if (size <= HOST_BITS_PER_WIDE_INT)
4096                   {
4097                     choose_multiplier (abs_d, size, size - 1,
4098                                        &mlr, &post_shift, &lgup);
4099                     ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
4100                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4101                       {
4102                         rtx t1, t2, t3;
4103
4104                         if (post_shift >= BITS_PER_WORD
4105                             || size - 1 >= BITS_PER_WORD)
4106                           goto fail1;
4107
4108                         extra_cost = (shift_cost[compute_mode][post_shift]
4109                                       + shift_cost[compute_mode][size - 1]
4110                                       + add_cost[compute_mode]);
4111                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4112                                                    NULL_RTX, 0,
4113                                                    max_cost - extra_cost);
4114                         if (t1 == 0)
4115                           goto fail1;
4116                         t2 = expand_shift
4117                           (RSHIFT_EXPR, compute_mode, t1,
4118                            build_int_cst (NULL_TREE, post_shift),
4119                            NULL_RTX, 0);
4120                         t3 = expand_shift
4121                           (RSHIFT_EXPR, compute_mode, op0,
4122                            build_int_cst (NULL_TREE, size - 1),
4123                            NULL_RTX, 0);
4124                         if (d < 0)
4125                           quotient
4126                             = force_operand (gen_rtx_MINUS (compute_mode,
4127                                                             t3, t2),
4128                                              tquotient);
4129                         else
4130                           quotient
4131                             = force_operand (gen_rtx_MINUS (compute_mode,
4132                                                             t2, t3),
4133                                              tquotient);
4134                       }
4135                     else
4136                       {
4137                         rtx t1, t2, t3, t4;
4138
4139                         if (post_shift >= BITS_PER_WORD
4140                             || size - 1 >= BITS_PER_WORD)
4141                           goto fail1;
4142
4143                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4144                         mlr = gen_int_mode (ml, compute_mode);
4145                         extra_cost = (shift_cost[compute_mode][post_shift]
4146                                       + shift_cost[compute_mode][size - 1]
4147                                       + 2 * add_cost[compute_mode]);
4148                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4149                                                    NULL_RTX, 0,
4150                                                    max_cost - extra_cost);
4151                         if (t1 == 0)
4152                           goto fail1;
4153                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4154                                                           t1, op0),
4155                                             NULL_RTX);
4156                         t3 = expand_shift
4157                           (RSHIFT_EXPR, compute_mode, t2,
4158                            build_int_cst (NULL_TREE, post_shift),
4159                            NULL_RTX, 0);
4160                         t4 = expand_shift
4161                           (RSHIFT_EXPR, compute_mode, op0,
4162                            build_int_cst (NULL_TREE, size - 1),
4163                            NULL_RTX, 0);
4164                         if (d < 0)
4165                           quotient
4166                             = force_operand (gen_rtx_MINUS (compute_mode,
4167                                                             t4, t3),
4168                                              tquotient);
4169                         else
4170                           quotient
4171                             = force_operand (gen_rtx_MINUS (compute_mode,
4172                                                             t3, t4),
4173                                              tquotient);
4174                       }
4175                   }
4176                 else            /* Too wide mode to use tricky code */
4177                   break;
4178
4179                 insn = get_last_insn ();
4180                 if (insn != last
4181                     && (set = single_set (insn)) != 0
4182                     && SET_DEST (set) == quotient)
4183                   set_unique_reg_note (insn,
4184                                        REG_EQUAL,
4185                                        gen_rtx_DIV (compute_mode, op0, op1));
4186               }
4187             break;
4188           }
4189       fail1:
4190         delete_insns_since (last);
4191         break;
4192
4193       case FLOOR_DIV_EXPR:
4194       case FLOOR_MOD_EXPR:
4195       /* We will come here only for signed operations.  */
4196         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4197           {
4198             unsigned HOST_WIDE_INT mh;
4199             int pre_shift, lgup, post_shift;
4200             HOST_WIDE_INT d = INTVAL (op1);
4201             rtx ml;
4202
4203             if (d > 0)
4204               {
4205                 /* We could just as easily deal with negative constants here,
4206                    but it does not seem worth the trouble for GCC 2.6.  */
4207                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4208                   {
4209                     pre_shift = floor_log2 (d);
4210                     if (rem_flag)
4211                       {
4212                         remainder = expand_binop (compute_mode, and_optab, op0,
4213                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4214                                                   remainder, 0, OPTAB_LIB_WIDEN);
4215                         if (remainder)
4216                           return gen_lowpart (mode, remainder);
4217                       }
4218                     quotient = expand_shift
4219                       (RSHIFT_EXPR, compute_mode, op0,
4220                        build_int_cst (NULL_TREE, pre_shift),
4221                        tquotient, 0);
4222                   }
4223                 else
4224                   {
4225                     rtx t1, t2, t3, t4;
4226
4227                     mh = choose_multiplier (d, size, size - 1,
4228                                             &ml, &post_shift, &lgup);
4229                     gcc_assert (!mh);
4230
4231                     if (post_shift < BITS_PER_WORD
4232                         && size - 1 < BITS_PER_WORD)
4233                       {
4234                         t1 = expand_shift
4235                           (RSHIFT_EXPR, compute_mode, op0,
4236                            build_int_cst (NULL_TREE, size - 1),
4237                            NULL_RTX, 0);
4238                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4239                                            NULL_RTX, 0, OPTAB_WIDEN);
4240                         extra_cost = (shift_cost[compute_mode][post_shift]
4241                                       + shift_cost[compute_mode][size - 1]
4242                                       + 2 * add_cost[compute_mode]);
4243                         t3 = expand_mult_highpart (compute_mode, t2, ml,
4244                                                    NULL_RTX, 1,
4245                                                    max_cost - extra_cost);
4246                         if (t3 != 0)
4247                           {
4248                             t4 = expand_shift
4249                               (RSHIFT_EXPR, compute_mode, t3,
4250                                build_int_cst (NULL_TREE, post_shift),
4251                                NULL_RTX, 1);
4252                             quotient = expand_binop (compute_mode, xor_optab,
4253                                                      t4, t1, tquotient, 0,
4254                                                      OPTAB_WIDEN);
4255                           }
4256                       }
4257                   }
4258               }
4259             else
4260               {
4261                 rtx nsign, t1, t2, t3, t4;
4262                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4263                                                   op0, constm1_rtx), NULL_RTX);
4264                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4265                                    0, OPTAB_WIDEN);
4266                 nsign = expand_shift
4267                   (RSHIFT_EXPR, compute_mode, t2,
4268                    build_int_cst (NULL_TREE, size - 1),
4269                    NULL_RTX, 0);
4270                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4271                                     NULL_RTX);
4272                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4273                                     NULL_RTX, 0);
4274                 if (t4)
4275                   {
4276                     rtx t5;
4277                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4278                                       NULL_RTX, 0);
4279                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4280                                                             t4, t5),
4281                                               tquotient);
4282                   }
4283               }
4284           }
4285
4286         if (quotient != 0)
4287           break;
4288         delete_insns_since (last);
4289
4290         /* Try using an instruction that produces both the quotient and
4291            remainder, using truncation.  We can easily compensate the quotient
4292            or remainder to get floor rounding, once we have the remainder.
4293            Notice that we compute also the final remainder value here,
4294            and return the result right away.  */
4295         if (target == 0 || GET_MODE (target) != compute_mode)
4296           target = gen_reg_rtx (compute_mode);
4297
4298         if (rem_flag)
4299           {
4300             remainder
4301               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4302             quotient = gen_reg_rtx (compute_mode);
4303           }
4304         else
4305           {
4306             quotient
4307               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4308             remainder = gen_reg_rtx (compute_mode);
4309           }
4310
4311         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4312                                  quotient, remainder, 0))
4313           {
4314             /* This could be computed with a branch-less sequence.
4315                Save that for later.  */
4316             rtx tem;
4317             rtx label = gen_label_rtx ();
4318             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4319             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4320                                 NULL_RTX, 0, OPTAB_WIDEN);
4321             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4322             expand_dec (quotient, const1_rtx);
4323             expand_inc (remainder, op1);
4324             emit_label (label);
4325             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4326           }
4327
4328         /* No luck with division elimination or divmod.  Have to do it
4329            by conditionally adjusting op0 *and* the result.  */
4330         {
4331           rtx label1, label2, label3, label4, label5;
4332           rtx adjusted_op0;
4333           rtx tem;
4334
4335           quotient = gen_reg_rtx (compute_mode);
4336           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4337           label1 = gen_label_rtx ();
4338           label2 = gen_label_rtx ();
4339           label3 = gen_label_rtx ();
4340           label4 = gen_label_rtx ();
4341           label5 = gen_label_rtx ();
4342           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4343           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4344           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4345                               quotient, 0, OPTAB_LIB_WIDEN);
4346           if (tem != quotient)
4347             emit_move_insn (quotient, tem);
4348           emit_jump_insn (gen_jump (label5));
4349           emit_barrier ();
4350           emit_label (label1);
4351           expand_inc (adjusted_op0, const1_rtx);
4352           emit_jump_insn (gen_jump (label4));
4353           emit_barrier ();
4354           emit_label (label2);
4355           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4356           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4357                               quotient, 0, OPTAB_LIB_WIDEN);
4358           if (tem != quotient)
4359             emit_move_insn (quotient, tem);
4360           emit_jump_insn (gen_jump (label5));
4361           emit_barrier ();
4362           emit_label (label3);
4363           expand_dec (adjusted_op0, const1_rtx);
4364           emit_label (label4);
4365           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4366                               quotient, 0, OPTAB_LIB_WIDEN);
4367           if (tem != quotient)
4368             emit_move_insn (quotient, tem);
4369           expand_dec (quotient, const1_rtx);
4370           emit_label (label5);
4371         }
4372         break;
4373
4374       case CEIL_DIV_EXPR:
4375       case CEIL_MOD_EXPR:
4376         if (unsignedp)
4377           {
4378             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4379               {
4380                 rtx t1, t2, t3;
4381                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4382                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4383                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4384                                    tquotient, 1);
4385                 t2 = expand_binop (compute_mode, and_optab, op0,
4386                                    GEN_INT (d - 1),
4387                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4388                 t3 = gen_reg_rtx (compute_mode);
4389                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4390                                       compute_mode, 1, 1);
4391                 if (t3 == 0)
4392                   {
4393                     rtx lab;
4394                     lab = gen_label_rtx ();
4395                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4396                     expand_inc (t1, const1_rtx);
4397                     emit_label (lab);
4398                     quotient = t1;
4399                   }
4400                 else
4401                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4402                                                           t1, t3),
4403                                             tquotient);
4404                 break;
4405               }
4406
4407             /* Try using an instruction that produces both the quotient and
4408                remainder, using truncation.  We can easily compensate the
4409                quotient or remainder to get ceiling rounding, once we have the
4410                remainder.  Notice that we compute also the final remainder
4411                value here, and return the result right away.  */
4412             if (target == 0 || GET_MODE (target) != compute_mode)
4413               target = gen_reg_rtx (compute_mode);
4414
4415             if (rem_flag)
4416               {
4417                 remainder = (REG_P (target)
4418                              ? target : gen_reg_rtx (compute_mode));
4419                 quotient = gen_reg_rtx (compute_mode);
4420               }
4421             else
4422               {
4423                 quotient = (REG_P (target)
4424                             ? target : gen_reg_rtx (compute_mode));
4425                 remainder = gen_reg_rtx (compute_mode);
4426               }
4427
4428             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4429                                      remainder, 1))
4430               {
4431                 /* This could be computed with a branch-less sequence.
4432                    Save that for later.  */
4433                 rtx label = gen_label_rtx ();
4434                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4435                                  compute_mode, label);
4436                 expand_inc (quotient, const1_rtx);
4437                 expand_dec (remainder, op1);
4438                 emit_label (label);
4439                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4440               }
4441
4442             /* No luck with division elimination or divmod.  Have to do it
4443                by conditionally adjusting op0 *and* the result.  */
4444             {
4445               rtx label1, label2;
4446               rtx adjusted_op0, tem;
4447
4448               quotient = gen_reg_rtx (compute_mode);
4449               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4450               label1 = gen_label_rtx ();
4451               label2 = gen_label_rtx ();
4452               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4453                                compute_mode, label1);
4454               emit_move_insn  (quotient, const0_rtx);
4455               emit_jump_insn (gen_jump (label2));
4456               emit_barrier ();
4457               emit_label (label1);
4458               expand_dec (adjusted_op0, const1_rtx);
4459               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4460                                   quotient, 1, OPTAB_LIB_WIDEN);
4461               if (tem != quotient)
4462                 emit_move_insn (quotient, tem);
4463               expand_inc (quotient, const1_rtx);
4464               emit_label (label2);
4465             }
4466           }
4467         else /* signed */
4468           {
4469             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4470                 && INTVAL (op1) >= 0)
4471               {
4472                 /* This is extremely similar to the code for the unsigned case
4473                    above.  For 2.7 we should merge these variants, but for
4474                    2.6.1 I don't want to touch the code for unsigned since that
4475                    get used in C.  The signed case will only be used by other
4476                    languages (Ada).  */
4477
4478                 rtx t1, t2, t3;
4479                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4480                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4481                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4482                                    tquotient, 0);
4483                 t2 = expand_binop (compute_mode, and_optab, op0,
4484                                    GEN_INT (d - 1),
4485                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4486                 t3 = gen_reg_rtx (compute_mode);
4487                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4488                                       compute_mode, 1, 1);
4489                 if (t3 == 0)
4490                   {
4491                     rtx lab;
4492                     lab = gen_label_rtx ();
4493                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4494                     expand_inc (t1, const1_rtx);
4495                     emit_label (lab);
4496                     quotient = t1;
4497                   }
4498                 else
4499                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4500                                                           t1, t3),
4501                                             tquotient);
4502                 break;
4503               }
4504
4505             /* Try using an instruction that produces both the quotient and
4506                remainder, using truncation.  We can easily compensate the
4507                quotient or remainder to get ceiling rounding, once we have the
4508                remainder.  Notice that we compute also the final remainder
4509                value here, and return the result right away.  */
4510             if (target == 0 || GET_MODE (target) != compute_mode)
4511               target = gen_reg_rtx (compute_mode);
4512             if (rem_flag)
4513               {
4514                 remainder= (REG_P (target)
4515                             ? target : gen_reg_rtx (compute_mode));
4516                 quotient = gen_reg_rtx (compute_mode);
4517               }
4518             else
4519               {
4520                 quotient = (REG_P (target)
4521                             ? target : gen_reg_rtx (compute_mode));
4522                 remainder = gen_reg_rtx (compute_mode);
4523               }
4524
4525             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4526                                      remainder, 0))
4527               {
4528                 /* This could be computed with a branch-less sequence.
4529                    Save that for later.  */
4530                 rtx tem;
4531                 rtx label = gen_label_rtx ();
4532                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4533                                  compute_mode, label);
4534                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4535                                     NULL_RTX, 0, OPTAB_WIDEN);
4536                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4537                 expand_inc (quotient, const1_rtx);
4538                 expand_dec (remainder, op1);
4539                 emit_label (label);
4540                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4541               }
4542
4543             /* No luck with division elimination or divmod.  Have to do it
4544                by conditionally adjusting op0 *and* the result.  */
4545             {
4546               rtx label1, label2, label3, label4, label5;
4547               rtx adjusted_op0;
4548               rtx tem;
4549
4550               quotient = gen_reg_rtx (compute_mode);
4551               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4552               label1 = gen_label_rtx ();
4553               label2 = gen_label_rtx ();
4554               label3 = gen_label_rtx ();
4555               label4 = gen_label_rtx ();
4556               label5 = gen_label_rtx ();
4557               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4558               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4559                                compute_mode, label1);
4560               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4561                                   quotient, 0, OPTAB_LIB_WIDEN);
4562               if (tem != quotient)
4563                 emit_move_insn (quotient, tem);
4564               emit_jump_insn (gen_jump (label5));
4565               emit_barrier ();
4566               emit_label (label1);
4567               expand_dec (adjusted_op0, const1_rtx);
4568               emit_jump_insn (gen_jump (label4));
4569               emit_barrier ();
4570               emit_label (label2);
4571               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4572                                compute_mode, label3);
4573               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4574                                   quotient, 0, OPTAB_LIB_WIDEN);
4575               if (tem != quotient)
4576                 emit_move_insn (quotient, tem);
4577               emit_jump_insn (gen_jump (label5));
4578               emit_barrier ();
4579               emit_label (label3);
4580               expand_inc (adjusted_op0, const1_rtx);
4581               emit_label (label4);
4582               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4583                                   quotient, 0, OPTAB_LIB_WIDEN);
4584               if (tem != quotient)
4585                 emit_move_insn (quotient, tem);
4586               expand_inc (quotient, const1_rtx);
4587               emit_label (label5);
4588             }
4589           }
4590         break;
4591
4592       case EXACT_DIV_EXPR:
4593         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4594           {
4595             HOST_WIDE_INT d = INTVAL (op1);
4596             unsigned HOST_WIDE_INT ml;
4597             int pre_shift;
4598             rtx t1;
4599
4600             pre_shift = floor_log2 (d & -d);
4601             ml = invert_mod2n (d >> pre_shift, size);
4602             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4603                                build_int_cst (NULL_TREE, pre_shift),
4604                                NULL_RTX, unsignedp);
4605             quotient = expand_mult (compute_mode, t1,
4606                                     gen_int_mode (ml, compute_mode),
4607                                     NULL_RTX, 1);
4608
4609             insn = get_last_insn ();
4610             set_unique_reg_note (insn,
4611                                  REG_EQUAL,
4612                                  gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4613                                                  compute_mode,
4614                                                  op0, op1));
4615           }
4616         break;
4617
4618       case ROUND_DIV_EXPR:
4619       case ROUND_MOD_EXPR:
4620         if (unsignedp)
4621           {
4622             rtx tem;
4623             rtx label;
4624             label = gen_label_rtx ();
4625             quotient = gen_reg_rtx (compute_mode);
4626             remainder = gen_reg_rtx (compute_mode);
4627             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4628               {
4629                 rtx tem;
4630                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4631                                          quotient, 1, OPTAB_LIB_WIDEN);
4632                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4633                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4634                                           remainder, 1, OPTAB_LIB_WIDEN);
4635               }
4636             tem = plus_constant (op1, -1);
4637             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4638                                 build_int_cst (NULL_TREE, 1),
4639                                 NULL_RTX, 1);
4640             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4641             expand_inc (quotient, const1_rtx);
4642             expand_dec (remainder, op1);
4643             emit_label (label);
4644           }
4645         else
4646           {
4647             rtx abs_rem, abs_op1, tem, mask;
4648             rtx label;
4649             label = gen_label_rtx ();
4650             quotient = gen_reg_rtx (compute_mode);
4651             remainder = gen_reg_rtx (compute_mode);
4652             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4653               {
4654                 rtx tem;
4655                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4656                                          quotient, 0, OPTAB_LIB_WIDEN);
4657                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4658                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4659                                           remainder, 0, OPTAB_LIB_WIDEN);
4660               }
4661             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4662             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4663             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4664                                 build_int_cst (NULL_TREE, 1),
4665                                 NULL_RTX, 1);
4666             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4667             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4668                                 NULL_RTX, 0, OPTAB_WIDEN);
4669             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4670                                  build_int_cst (NULL_TREE, size - 1),
4671                                  NULL_RTX, 0);
4672             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4673                                 NULL_RTX, 0, OPTAB_WIDEN);
4674             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4675                                 NULL_RTX, 0, OPTAB_WIDEN);
4676             expand_inc (quotient, tem);
4677             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4678                                 NULL_RTX, 0, OPTAB_WIDEN);
4679             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4680                                 NULL_RTX, 0, OPTAB_WIDEN);
4681             expand_dec (remainder, tem);
4682             emit_label (label);
4683           }
4684         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4685
4686       default:
4687         gcc_unreachable ();
4688       }
4689
4690   if (quotient == 0)
4691     {
4692       if (target && GET_MODE (target) != compute_mode)
4693         target = 0;
4694
4695       if (rem_flag)
4696         {
4697           /* Try to produce the remainder without producing the quotient.
4698              If we seem to have a divmod pattern that does not require widening,
4699              don't try widening here.  We should really have a WIDEN argument
4700              to expand_twoval_binop, since what we'd really like to do here is
4701              1) try a mod insn in compute_mode
4702              2) try a divmod insn in compute_mode
4703              3) try a div insn in compute_mode and multiply-subtract to get
4704                 remainder
4705              4) try the same things with widening allowed.  */
4706           remainder
4707             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4708                                  op0, op1, target,
4709                                  unsignedp,
4710                                  ((optab_handler (optab2, compute_mode)->insn_code
4711                                    != CODE_FOR_nothing)
4712                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4713           if (remainder == 0)
4714             {
4715               /* No luck there.  Can we do remainder and divide at once
4716                  without a library call?  */
4717               remainder = gen_reg_rtx (compute_mode);
4718               if (! expand_twoval_binop ((unsignedp
4719                                           ? udivmod_optab
4720                                           : sdivmod_optab),
4721                                          op0, op1,
4722                                          NULL_RTX, remainder, unsignedp))
4723                 remainder = 0;
4724             }
4725
4726           if (remainder)
4727             return gen_lowpart (mode, remainder);
4728         }
4729
4730       /* Produce the quotient.  Try a quotient insn, but not a library call.
4731          If we have a divmod in this mode, use it in preference to widening
4732          the div (for this test we assume it will not fail). Note that optab2
4733          is set to the one of the two optabs that the call below will use.  */
4734       quotient
4735         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4736                              op0, op1, rem_flag ? NULL_RTX : target,
4737                              unsignedp,
4738                              ((optab_handler (optab2, compute_mode)->insn_code
4739                                != CODE_FOR_nothing)
4740                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4741
4742       if (quotient == 0)
4743         {
4744           /* No luck there.  Try a quotient-and-remainder insn,
4745              keeping the quotient alone.  */
4746           quotient = gen_reg_rtx (compute_mode);
4747           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4748                                      op0, op1,
4749                                      quotient, NULL_RTX, unsignedp))
4750             {
4751               quotient = 0;
4752               if (! rem_flag)
4753                 /* Still no luck.  If we are not computing the remainder,
4754                    use a library call for the quotient.  */
4755                 quotient = sign_expand_binop (compute_mode,
4756                                               udiv_optab, sdiv_optab,
4757                                               op0, op1, target,
4758                                               unsignedp, OPTAB_LIB_WIDEN);
4759             }
4760         }
4761     }
4762
4763   if (rem_flag)
4764     {
4765       if (target && GET_MODE (target) != compute_mode)
4766         target = 0;
4767
4768       if (quotient == 0)
4769         {
4770           /* No divide instruction either.  Use library for remainder.  */
4771           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4772                                          op0, op1, target,
4773                                          unsignedp, OPTAB_LIB_WIDEN);
4774           /* No remainder function.  Try a quotient-and-remainder
4775              function, keeping the remainder.  */
4776           if (!remainder)
4777             {
4778               remainder = gen_reg_rtx (compute_mode);
4779               if (!expand_twoval_binop_libfunc
4780                   (unsignedp ? udivmod_optab : sdivmod_optab,
4781                    op0, op1,
4782                    NULL_RTX, remainder,
4783                    unsignedp ? UMOD : MOD))
4784                 remainder = NULL_RTX;
4785             }
4786         }
4787       else
4788         {
4789           /* We divided.  Now finish doing X - Y * (X / Y).  */
4790           remainder = expand_mult (compute_mode, quotient, op1,
4791                                    NULL_RTX, unsignedp);
4792           remainder = expand_binop (compute_mode, sub_optab, op0,
4793                                     remainder, target, unsignedp,
4794                                     OPTAB_LIB_WIDEN);
4795         }
4796     }
4797
4798   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4799 }
4800 \f
4801 /* Return a tree node with data type TYPE, describing the value of X.
4802    Usually this is an VAR_DECL, if there is no obvious better choice.
4803    X may be an expression, however we only support those expressions
4804    generated by loop.c.  */
4805
4806 tree
4807 make_tree (tree type, rtx x)
4808 {
4809   tree t;
4810
4811   switch (GET_CODE (x))
4812     {
4813     case CONST_INT:
4814       {
4815         HOST_WIDE_INT hi = 0;
4816
4817         if (INTVAL (x) < 0
4818             && !(TYPE_UNSIGNED (type)
4819                  && (GET_MODE_BITSIZE (TYPE_MODE (type))
4820                      < HOST_BITS_PER_WIDE_INT)))
4821           hi = -1;
4822
4823         t = build_int_cst_wide (type, INTVAL (x), hi);
4824
4825         return t;
4826       }
4827
4828     case CONST_DOUBLE:
4829       if (GET_MODE (x) == VOIDmode)
4830         t = build_int_cst_wide (type,
4831                                 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4832       else
4833         {
4834           REAL_VALUE_TYPE d;
4835
4836           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4837           t = build_real (type, d);
4838         }
4839
4840       return t;
4841
4842     case CONST_VECTOR:
4843       {
4844         int units = CONST_VECTOR_NUNITS (x);
4845         tree itype = TREE_TYPE (type);
4846         tree t = NULL_TREE;
4847         int i;
4848
4849
4850         /* Build a tree with vector elements.  */
4851         for (i = units - 1; i >= 0; --i)
4852           {
4853             rtx elt = CONST_VECTOR_ELT (x, i);
4854             t = tree_cons (NULL_TREE, make_tree (itype, elt), t);
4855           }
4856
4857         return build_vector (type, t);
4858       }
4859
4860     case PLUS:
4861       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4862                           make_tree (type, XEXP (x, 1)));
4863
4864     case MINUS:
4865       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4866                           make_tree (type, XEXP (x, 1)));
4867
4868     case NEG:
4869       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
4870
4871     case MULT:
4872       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4873                           make_tree (type, XEXP (x, 1)));
4874
4875     case ASHIFT:
4876       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4877                           make_tree (type, XEXP (x, 1)));
4878
4879     case LSHIFTRT:
4880       t = unsigned_type_for (type);
4881       return fold_convert (type, build2 (RSHIFT_EXPR, t,
4882                                          make_tree (t, XEXP (x, 0)),
4883                                          make_tree (type, XEXP (x, 1))));
4884
4885     case ASHIFTRT:
4886       t = signed_type_for (type);
4887       return fold_convert (type, build2 (RSHIFT_EXPR, t,
4888                                          make_tree (t, XEXP (x, 0)),
4889                                          make_tree (type, XEXP (x, 1))));
4890
4891     case DIV:
4892       if (TREE_CODE (type) != REAL_TYPE)
4893         t = signed_type_for (type);
4894       else
4895         t = type;
4896
4897       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
4898                                          make_tree (t, XEXP (x, 0)),
4899                                          make_tree (t, XEXP (x, 1))));
4900     case UDIV:
4901       t = unsigned_type_for (type);
4902       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
4903                                          make_tree (t, XEXP (x, 0)),
4904                                          make_tree (t, XEXP (x, 1))));
4905
4906     case SIGN_EXTEND:
4907     case ZERO_EXTEND:
4908       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
4909                                           GET_CODE (x) == ZERO_EXTEND);
4910       return fold_convert (type, make_tree (t, XEXP (x, 0)));
4911
4912     case CONST:
4913       return make_tree (type, XEXP (x, 0));
4914
4915     case SYMBOL_REF:
4916       t = SYMBOL_REF_DECL (x);
4917       if (t)
4918         return fold_convert (type, build_fold_addr_expr (t));
4919       /* else fall through.  */
4920
4921     default:
4922       t = build_decl (VAR_DECL, NULL_TREE, type);
4923
4924       /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
4925          ptr_mode.  So convert.  */
4926       if (POINTER_TYPE_P (type))
4927         x = convert_memory_address (TYPE_MODE (type), x);
4928
4929       /* Note that we do *not* use SET_DECL_RTL here, because we do not
4930          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
4931       t->decl_with_rtl.rtl = x;
4932
4933       return t;
4934     }
4935 }
4936 \f
4937 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
4938    and returning TARGET.
4939
4940    If TARGET is 0, a pseudo-register or constant is returned.  */
4941
4942 rtx
4943 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
4944 {
4945   rtx tem = 0;
4946
4947   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
4948     tem = simplify_binary_operation (AND, mode, op0, op1);
4949   if (tem == 0)
4950     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
4951
4952   if (target == 0)
4953     target = tem;
4954   else if (tem != target)
4955     emit_move_insn (target, tem);
4956   return target;
4957 }
4958 \f
4959 /* Helper function for emit_store_flag.  */
4960 static rtx
4961 emit_store_flag_1 (rtx target, rtx subtarget, enum machine_mode mode,
4962                    int normalizep)
4963 {
4964   rtx op0;
4965   enum machine_mode target_mode = GET_MODE (target);
4966
4967   /* If we are converting to a wider mode, first convert to
4968      TARGET_MODE, then normalize.  This produces better combining
4969      opportunities on machines that have a SIGN_EXTRACT when we are
4970      testing a single bit.  This mostly benefits the 68k.
4971
4972      If STORE_FLAG_VALUE does not have the sign bit set when
4973      interpreted in MODE, we can do this conversion as unsigned, which
4974      is usually more efficient.  */
4975   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
4976     {
4977       convert_move (target, subtarget,
4978                     (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
4979                     && 0 == (STORE_FLAG_VALUE
4980                              & ((HOST_WIDE_INT) 1
4981                                 << (GET_MODE_BITSIZE (mode) -1))));
4982       op0 = target;
4983       mode = target_mode;
4984     }
4985   else
4986     op0 = subtarget;
4987
4988   /* If we want to keep subexpressions around, don't reuse our last
4989      target.  */
4990   if (optimize)
4991     subtarget = 0;
4992
4993   /* Now normalize to the proper value in MODE.  Sometimes we don't
4994      have to do anything.  */
4995   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
4996     ;
4997   /* STORE_FLAG_VALUE might be the most negative number, so write
4998      the comparison this way to avoid a compiler-time warning.  */
4999   else if (- normalizep == STORE_FLAG_VALUE)
5000     op0 = expand_unop (mode, neg_optab, op0, subtarget, 0);
5001
5002   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5003      it hard to use a value of just the sign bit due to ANSI integer
5004      constant typing rules.  */
5005   else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5006            && (STORE_FLAG_VALUE
5007                & ((HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1))))
5008     op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5009                         size_int (GET_MODE_BITSIZE (mode) - 1), subtarget,
5010                         normalizep == 1);
5011   else
5012     {
5013       gcc_assert (STORE_FLAG_VALUE & 1);
5014
5015       op0 = expand_and (mode, op0, const1_rtx, subtarget);
5016       if (normalizep == -1)
5017         op0 = expand_unop (mode, neg_optab, op0, op0, 0);
5018     }
5019
5020   /* If we were converting to a smaller mode, do the conversion now.  */
5021   if (target_mode != mode)
5022     {
5023       convert_move (target, op0, 0);
5024       return target;
5025     }
5026   else
5027     return op0;
5028 }
5029
5030 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5031    and storing in TARGET.  Normally return TARGET.
5032    Return 0 if that cannot be done.
5033
5034    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5035    it is VOIDmode, they cannot both be CONST_INT.
5036
5037    UNSIGNEDP is for the case where we have to widen the operands
5038    to perform the operation.  It says to use zero-extension.
5039
5040    NORMALIZEP is 1 if we should convert the result to be either zero
5041    or one.  Normalize is -1 if we should convert the result to be
5042    either zero or -1.  If NORMALIZEP is zero, the result will be left
5043    "raw" out of the scc insn.  */
5044
5045 rtx
5046 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5047                  enum machine_mode mode, int unsignedp, int normalizep)
5048 {
5049   rtx subtarget;
5050   enum insn_code icode;
5051   enum machine_mode compare_mode;
5052   enum machine_mode target_mode = GET_MODE (target);
5053   rtx tem;
5054   rtx last = get_last_insn ();
5055   rtx pattern, comparison;
5056
5057   if (unsignedp)
5058     code = unsigned_condition (code);
5059
5060   /* If one operand is constant, make it the second one.  Only do this
5061      if the other operand is not constant as well.  */
5062
5063   if (swap_commutative_operands_p (op0, op1))
5064     {
5065       tem = op0;
5066       op0 = op1;
5067       op1 = tem;
5068       code = swap_condition (code);
5069     }
5070
5071   if (mode == VOIDmode)
5072     mode = GET_MODE (op0);
5073
5074   /* For some comparisons with 1 and -1, we can convert this to
5075      comparisons with zero.  This will often produce more opportunities for
5076      store-flag insns.  */
5077
5078   switch (code)
5079     {
5080     case LT:
5081       if (op1 == const1_rtx)
5082         op1 = const0_rtx, code = LE;
5083       break;
5084     case LE:
5085       if (op1 == constm1_rtx)
5086         op1 = const0_rtx, code = LT;
5087       break;
5088     case GE:
5089       if (op1 == const1_rtx)
5090         op1 = const0_rtx, code = GT;
5091       break;
5092     case GT:
5093       if (op1 == constm1_rtx)
5094         op1 = const0_rtx, code = GE;
5095       break;
5096     case GEU:
5097       if (op1 == const1_rtx)
5098         op1 = const0_rtx, code = NE;
5099       break;
5100     case LTU:
5101       if (op1 == const1_rtx)
5102         op1 = const0_rtx, code = EQ;
5103       break;
5104     default:
5105       break;
5106     }
5107
5108   /* If we are comparing a double-word integer with zero or -1, we can
5109      convert the comparison into one involving a single word.  */
5110   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5111       && GET_MODE_CLASS (mode) == MODE_INT
5112       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5113     {
5114       if ((code == EQ || code == NE)
5115           && (op1 == const0_rtx || op1 == constm1_rtx))
5116         {
5117           rtx op00, op01, op0both;
5118
5119           /* Do a logical OR or AND of the two words and compare the
5120              result.  */
5121           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5122           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5123           op0both = expand_binop (word_mode,
5124                                   op1 == const0_rtx ? ior_optab : and_optab,
5125                                   op00, op01, NULL_RTX, unsignedp,
5126                                   OPTAB_DIRECT);
5127
5128           if (op0both != 0)
5129             return emit_store_flag (target, code, op0both, op1, word_mode,
5130                                     unsignedp, normalizep);
5131         }
5132       else if ((code == LT || code == GE) && op1 == const0_rtx)
5133         {
5134           rtx op0h;
5135
5136           /* If testing the sign bit, can just test on high word.  */
5137           op0h = simplify_gen_subreg (word_mode, op0, mode,
5138                                       subreg_highpart_offset (word_mode,
5139                                                               mode));
5140           return emit_store_flag (target, code, op0h, op1, word_mode,
5141                                   unsignedp, normalizep);
5142         }
5143     }
5144
5145   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5146      complement of A (for GE) and shifting the sign bit to the low bit.  */
5147   if (op1 == const0_rtx && (code == LT || code == GE)
5148       && GET_MODE_CLASS (mode) == MODE_INT
5149       && (normalizep || STORE_FLAG_VALUE == 1
5150           || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5151               && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5152                   == ((unsigned HOST_WIDE_INT) 1
5153                       << (GET_MODE_BITSIZE (mode) - 1))))))
5154     {
5155       subtarget = target;
5156
5157       /* If the result is to be wider than OP0, it is best to convert it
5158          first.  If it is to be narrower, it is *incorrect* to convert it
5159          first.  */
5160       if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5161         {
5162           op0 = convert_modes (target_mode, mode, op0, 0);
5163           mode = target_mode;
5164         }
5165
5166       if (target_mode != mode)
5167         subtarget = 0;
5168
5169       if (code == GE)
5170         op0 = expand_unop (mode, one_cmpl_optab, op0,
5171                            ((STORE_FLAG_VALUE == 1 || normalizep)
5172                             ? 0 : subtarget), 0);
5173
5174       if (STORE_FLAG_VALUE == 1 || normalizep)
5175         /* If we are supposed to produce a 0/1 value, we want to do
5176            a logical shift from the sign bit to the low-order bit; for
5177            a -1/0 value, we do an arithmetic shift.  */
5178         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5179                             size_int (GET_MODE_BITSIZE (mode) - 1),
5180                             subtarget, normalizep != -1);
5181
5182       if (mode != target_mode)
5183         op0 = convert_modes (target_mode, mode, op0, 0);
5184
5185       return op0;
5186     }
5187
5188   icode = setcc_gen_code[(int) code];
5189
5190   if (icode != CODE_FOR_nothing)
5191     {
5192       insn_operand_predicate_fn pred;
5193
5194       /* We think we may be able to do this with a scc insn.  Emit the
5195          comparison and then the scc insn.  */
5196
5197       do_pending_stack_adjust ();
5198       last = get_last_insn ();
5199
5200       comparison
5201         = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX);
5202       if (CONSTANT_P (comparison))
5203         {
5204           switch (GET_CODE (comparison))
5205             {
5206             case CONST_INT:
5207               if (comparison == const0_rtx)
5208                 return const0_rtx;
5209               break;
5210
5211 #ifdef FLOAT_STORE_FLAG_VALUE
5212             case CONST_DOUBLE:
5213               if (comparison == CONST0_RTX (GET_MODE (comparison)))
5214                 return const0_rtx;
5215               break;
5216 #endif
5217             default:
5218               gcc_unreachable ();
5219             }
5220
5221           if (normalizep == 1)
5222             return const1_rtx;
5223           if (normalizep == -1)
5224             return constm1_rtx;
5225           return const_true_rtx;
5226         }
5227
5228       /* The code of COMPARISON may not match CODE if compare_from_rtx
5229          decided to swap its operands and reverse the original code.
5230
5231          We know that compare_from_rtx returns either a CONST_INT or
5232          a new comparison code, so it is safe to just extract the
5233          code from COMPARISON.  */
5234       code = GET_CODE (comparison);
5235
5236       /* Get a reference to the target in the proper mode for this insn.  */
5237       compare_mode = insn_data[(int) icode].operand[0].mode;
5238       subtarget = target;
5239       pred = insn_data[(int) icode].operand[0].predicate;
5240       if (optimize || ! (*pred) (subtarget, compare_mode))
5241         subtarget = gen_reg_rtx (compare_mode);
5242
5243       pattern = GEN_FCN (icode) (subtarget);
5244       if (pattern)
5245         {
5246           emit_insn (pattern);
5247           return emit_store_flag_1 (target, subtarget, compare_mode,
5248                                     normalizep);
5249         }
5250     }
5251   else
5252     {
5253       /* We don't have an scc insn, so try a cstore insn.  */
5254
5255       for (compare_mode = mode; compare_mode != VOIDmode;
5256            compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5257         {
5258           icode = optab_handler (cstore_optab, compare_mode)->insn_code;
5259           if (icode != CODE_FOR_nothing)
5260             break;
5261         }
5262
5263       if (icode != CODE_FOR_nothing)
5264         {
5265           enum machine_mode result_mode
5266             = insn_data[(int) icode].operand[0].mode;
5267           rtx cstore_op0 = op0;
5268           rtx cstore_op1 = op1;
5269
5270           do_pending_stack_adjust ();
5271           last = get_last_insn ();
5272
5273           if (compare_mode != mode)
5274             {
5275               cstore_op0 = convert_modes (compare_mode, mode, cstore_op0,
5276                                           unsignedp);
5277               cstore_op1 = convert_modes (compare_mode, mode, cstore_op1,
5278                                           unsignedp);
5279             }
5280
5281           if (!insn_data[(int) icode].operand[2].predicate (cstore_op0,
5282                                                             compare_mode))
5283             cstore_op0 = copy_to_mode_reg (compare_mode, cstore_op0);
5284
5285           if (!insn_data[(int) icode].operand[3].predicate (cstore_op1,
5286                                                             compare_mode))
5287             cstore_op1 = copy_to_mode_reg (compare_mode, cstore_op1);
5288
5289           comparison = gen_rtx_fmt_ee (code, result_mode, cstore_op0,
5290                                        cstore_op1);
5291           subtarget = target;
5292
5293           if (optimize || !(insn_data[(int) icode].operand[0].predicate
5294                             (subtarget, result_mode)))
5295             subtarget = gen_reg_rtx (result_mode);
5296
5297           pattern = GEN_FCN (icode) (subtarget, comparison, cstore_op0,
5298                                      cstore_op1);
5299
5300           if (pattern)
5301             {
5302               emit_insn (pattern);
5303               return emit_store_flag_1 (target, subtarget, result_mode,
5304                                         normalizep);
5305             }
5306         }
5307     }
5308
5309   delete_insns_since (last);
5310
5311   /* If optimizing, use different pseudo registers for each insn, instead
5312      of reusing the same pseudo.  This leads to better CSE, but slows
5313      down the compiler, since there are more pseudos */
5314   subtarget = (!optimize
5315                && (target_mode == mode)) ? target : NULL_RTX;
5316
5317   /* If we reached here, we can't do this with a scc insn.  However, there
5318      are some comparisons that can be done directly.  For example, if
5319      this is an equality comparison of integers, we can try to exclusive-or
5320      (or subtract) the two operands and use a recursive call to try the
5321      comparison with zero.  Don't do any of these cases if branches are
5322      very cheap.  */
5323
5324   if (BRANCH_COST > 0
5325       && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
5326       && op1 != const0_rtx)
5327     {
5328       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5329                           OPTAB_WIDEN);
5330
5331       if (tem == 0)
5332         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5333                             OPTAB_WIDEN);
5334       if (tem != 0)
5335         tem = emit_store_flag (target, code, tem, const0_rtx,
5336                                mode, unsignedp, normalizep);
5337       if (tem == 0)
5338         delete_insns_since (last);
5339       return tem;
5340     }
5341
5342   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5343      the constant zero.  Reject all other comparisons at this point.  Only
5344      do LE and GT if branches are expensive since they are expensive on
5345      2-operand machines.  */
5346
5347   if (BRANCH_COST == 0
5348       || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
5349       || (code != EQ && code != NE
5350           && (BRANCH_COST <= 1 || (code != LE && code != GT))))
5351     return 0;
5352
5353   /* See what we need to return.  We can only return a 1, -1, or the
5354      sign bit.  */
5355
5356   if (normalizep == 0)
5357     {
5358       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5359         normalizep = STORE_FLAG_VALUE;
5360
5361       else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5362                && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5363                    == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
5364         ;
5365       else
5366         return 0;
5367     }
5368
5369   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5370      do the necessary operation below.  */
5371
5372   tem = 0;
5373
5374   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5375      the sign bit set.  */
5376
5377   if (code == LE)
5378     {
5379       /* This is destructive, so SUBTARGET can't be OP0.  */
5380       if (rtx_equal_p (subtarget, op0))
5381         subtarget = 0;
5382
5383       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5384                           OPTAB_WIDEN);
5385       if (tem)
5386         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5387                             OPTAB_WIDEN);
5388     }
5389
5390   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5391      number of bits in the mode of OP0, minus one.  */
5392
5393   if (code == GT)
5394     {
5395       if (rtx_equal_p (subtarget, op0))
5396         subtarget = 0;
5397
5398       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5399                           size_int (GET_MODE_BITSIZE (mode) - 1),
5400                           subtarget, 0);
5401       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5402                           OPTAB_WIDEN);
5403     }
5404
5405   if (code == EQ || code == NE)
5406     {
5407       /* For EQ or NE, one way to do the comparison is to apply an operation
5408          that converts the operand into a positive number if it is nonzero
5409          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5410          for NE we negate.  This puts the result in the sign bit.  Then we
5411          normalize with a shift, if needed.
5412
5413          Two operations that can do the above actions are ABS and FFS, so try
5414          them.  If that doesn't work, and MODE is smaller than a full word,
5415          we can use zero-extension to the wider mode (an unsigned conversion)
5416          as the operation.  */
5417
5418       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5419          that is compensated by the subsequent overflow when subtracting
5420          one / negating.  */
5421
5422       if (optab_handler (abs_optab, mode)->insn_code != CODE_FOR_nothing)
5423         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5424       else if (optab_handler (ffs_optab, mode)->insn_code != CODE_FOR_nothing)
5425         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5426       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5427         {
5428           tem = convert_modes (word_mode, mode, op0, 1);
5429           mode = word_mode;
5430         }
5431
5432       if (tem != 0)
5433         {
5434           if (code == EQ)
5435             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5436                                 0, OPTAB_WIDEN);
5437           else
5438             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5439         }
5440
5441       /* If we couldn't do it that way, for NE we can "or" the two's complement
5442          of the value with itself.  For EQ, we take the one's complement of
5443          that "or", which is an extra insn, so we only handle EQ if branches
5444          are expensive.  */
5445
5446       if (tem == 0 && (code == NE || BRANCH_COST > 1))
5447         {
5448           if (rtx_equal_p (subtarget, op0))
5449             subtarget = 0;
5450
5451           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5452           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5453                               OPTAB_WIDEN);
5454
5455           if (tem && code == EQ)
5456             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5457         }
5458     }
5459
5460   if (tem && normalizep)
5461     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5462                         size_int (GET_MODE_BITSIZE (mode) - 1),
5463                         subtarget, normalizep == 1);
5464
5465   if (tem)
5466     {
5467       if (GET_MODE (tem) != target_mode)
5468         {
5469           convert_move (target, tem, 0);
5470           tem = target;
5471         }
5472       else if (!subtarget)
5473         {
5474           emit_move_insn (target, tem);
5475           tem = target;
5476         }
5477     }
5478   else
5479     delete_insns_since (last);
5480
5481   return tem;
5482 }
5483
5484 /* Like emit_store_flag, but always succeeds.  */
5485
5486 rtx
5487 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5488                        enum machine_mode mode, int unsignedp, int normalizep)
5489 {
5490   rtx tem, label;
5491
5492   /* First see if emit_store_flag can do the job.  */
5493   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5494   if (tem != 0)
5495     return tem;
5496
5497   if (normalizep == 0)
5498     normalizep = 1;
5499
5500   /* If this failed, we have to do this with set/compare/jump/set code.  */
5501
5502   if (!REG_P (target)
5503       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5504     target = gen_reg_rtx (GET_MODE (target));
5505
5506   emit_move_insn (target, const1_rtx);
5507   label = gen_label_rtx ();
5508   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5509                            NULL_RTX, label);
5510
5511   emit_move_insn (target, const0_rtx);
5512   emit_label (label);
5513
5514   return target;
5515 }
5516 \f
5517 /* Perform possibly multi-word comparison and conditional jump to LABEL
5518    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5519    now a thin wrapper around do_compare_rtx_and_jump.  */
5520
5521 static void
5522 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5523                  rtx label)
5524 {
5525   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5526   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5527                            NULL_RTX, NULL_RTX, label);
5528 }