gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006
   5    Free Software Foundation, Inc.
   6
   7 This file is part of GCC.
   8
   9 GCC is free software; you can redistribute it and/or modify it under
  10 the terms of the GNU General Public License as published by the Free
  11 Software Foundation; either version 2, or (at your option) any later
  12 version.
  13
  14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  16 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  17 for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with GCC; see the file COPYING.  If not, write to the Free
  21 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  22 02111-1307, USA.  */
  23
  24
  25 #include "config.h"
  26 #include "system.h"
  27 #include "coretypes.h"
  28 #include "tm.h"
  29 #include "toplev.h"
  30 #include "rtl.h"
  31 #include "tree.h"
  32 #include "tm_p.h"
  33 #include "flags.h"
  34 #include "insn-config.h"
  35 #include "expr.h"
  36 #include "optabs.h"
  37 #include "real.h"
  38 #include "recog.h"
  39 #include "langhooks.h"
  40
  41 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  42                                    unsigned HOST_WIDE_INT,
  43                                    unsigned HOST_WIDE_INT, rtx);
  44 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  45                                    unsigned HOST_WIDE_INT, rtx);
  46 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  47                                     unsigned HOST_WIDE_INT,
  48                                     unsigned HOST_WIDE_INT,
  49                                     unsigned HOST_WIDE_INT, rtx, int);
  50 static rtx mask_rtx (enum machine_mode, int, int, int);
  51 static rtx lshift_value (enum machine_mode, rtx, int, int);
  52 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  53                                     unsigned HOST_WIDE_INT, int);
  54 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  55 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  56 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  57
  58 /* Test whether a value is zero of a power of two.  */
  59 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
  60
  61 /* Nonzero means divides or modulus operations are relatively cheap for
  62    powers of two, so don't use branches; emit the operation instead.
  63    Usually, this will mean that the MD file will emit non-branch
  64    sequences.  */
  65
  66 static bool sdiv_pow2_cheap[NUM_MACHINE_MODES];
  67 static bool smod_pow2_cheap[NUM_MACHINE_MODES];
  68
  69 #ifndef SLOW_UNALIGNED_ACCESS
  70 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  71 #endif
  72
  73 /* For compilers that support multiple targets with different word sizes,
  74    MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
  75    is the H8/300(H) compiler.  */
  76
  77 #ifndef MAX_BITS_PER_WORD
  78 #define MAX_BITS_PER_WORD BITS_PER_WORD
  79 #endif
  80
  81 /* Reduce conditional compilation elsewhere.  */
  82 #ifndef HAVE_insv
  83 #define HAVE_insv       0
  84 #define CODE_FOR_insv   CODE_FOR_nothing
  85 #define gen_insv(a,b,c,d) NULL_RTX
  86 #endif
  87 #ifndef HAVE_extv
  88 #define HAVE_extv       0
  89 #define CODE_FOR_extv   CODE_FOR_nothing
  90 #define gen_extv(a,b,c,d) NULL_RTX
  91 #endif
  92 #ifndef HAVE_extzv
  93 #define HAVE_extzv      0
  94 #define CODE_FOR_extzv  CODE_FOR_nothing
  95 #define gen_extzv(a,b,c,d) NULL_RTX
  96 #endif
  97
  98 /* Cost of various pieces of RTL.  Note that some of these are indexed by
  99    shift count and some by mode.  */
 100 static int zero_cost;
 101 static int add_cost[NUM_MACHINE_MODES];
 102 static int neg_cost[NUM_MACHINE_MODES];
 103 static int shift_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 104 static int shiftadd_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 105 static int shiftsub_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 106 static int mul_cost[NUM_MACHINE_MODES];
 107 static int div_cost[NUM_MACHINE_MODES];
 108 static int mul_widen_cost[NUM_MACHINE_MODES];
 109 static int mul_highpart_cost[NUM_MACHINE_MODES];
 110
 111 void
 112 init_expmed (void)
 113 {
 114   struct
 115   {
 116     struct rtx_def reg;         rtunion reg_fld[2];
 117     struct rtx_def plus;        rtunion plus_fld1;
 118     struct rtx_def neg;
 119     struct rtx_def udiv;        rtunion udiv_fld1;
 120     struct rtx_def mult;        rtunion mult_fld1;
 121     struct rtx_def div;         rtunion div_fld1;
 122     struct rtx_def mod;         rtunion mod_fld1;
 123     struct rtx_def zext;
 124     struct rtx_def wide_mult;   rtunion wide_mult_fld1;
 125     struct rtx_def wide_lshr;   rtunion wide_lshr_fld1;
 126     struct rtx_def wide_trunc;
 127     struct rtx_def shift;       rtunion shift_fld1;
 128     struct rtx_def shift_mult;  rtunion shift_mult_fld1;
 129     struct rtx_def shift_add;   rtunion shift_add_fld1;
 130     struct rtx_def shift_sub;   rtunion shift_sub_fld1;
 131   } all;
 132
 133   rtx pow2[MAX_BITS_PER_WORD];
 134   rtx cint[MAX_BITS_PER_WORD];
 135   int m, n;
 136   enum machine_mode mode, wider_mode;
 137
 138   zero_cost = rtx_cost (const0_rtx, 0);
 139
 140   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 141     {
 142       pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 143       cint[m] = GEN_INT (m);
 144     }
 145
 146   memset (&all, 0, sizeof all);
 147
 148   PUT_CODE (&all.reg, REG);
 149   /* Avoid using hard regs in ways which may be unsupported.  */
 150   REGNO (&all.reg) = LAST_VIRTUAL_REGISTER + 1;
 151
 152   PUT_CODE (&all.plus, PLUS);
 153   XEXP (&all.plus, 0) = &all.reg;
 154   XEXP (&all.plus, 1) = &all.reg;
 155
 156   PUT_CODE (&all.neg, NEG);
 157   XEXP (&all.neg, 0) = &all.reg;
 158
 159   PUT_CODE (&all.udiv, UDIV);
 160   XEXP (&all.udiv, 0) = &all.reg;
 161   XEXP (&all.udiv, 1) = &all.reg;
 162
 163   PUT_CODE (&all.mult, MULT);
 164   XEXP (&all.mult, 0) = &all.reg;
 165   XEXP (&all.mult, 1) = &all.reg;
 166
 167   PUT_CODE (&all.div, DIV);
 168   XEXP (&all.div, 0) = &all.reg;
 169   XEXP (&all.div, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
 170
 171   PUT_CODE (&all.mod, MOD);
 172   XEXP (&all.mod, 0) = &all.reg;
 173   XEXP (&all.mod, 1) = XEXP (&all.div, 1);
 174
 175   PUT_CODE (&all.zext, ZERO_EXTEND);
 176   XEXP (&all.zext, 0) = &all.reg;
 177
 178   PUT_CODE (&all.wide_mult, MULT);
 179   XEXP (&all.wide_mult, 0) = &all.zext;
 180   XEXP (&all.wide_mult, 1) = &all.zext;
 181
 182   PUT_CODE (&all.wide_lshr, LSHIFTRT);
 183   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
 184
 185   PUT_CODE (&all.wide_trunc, TRUNCATE);
 186   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
 187
 188   PUT_CODE (&all.shift, ASHIFT);
 189   XEXP (&all.shift, 0) = &all.reg;
 190
 191   PUT_CODE (&all.shift_mult, MULT);
 192   XEXP (&all.shift_mult, 0) = &all.reg;
 193
 194   PUT_CODE (&all.shift_add, PLUS);
 195   XEXP (&all.shift_add, 0) = &all.shift_mult;
 196   XEXP (&all.shift_add, 1) = &all.reg;
 197
 198   PUT_CODE (&all.shift_sub, MINUS);
 199   XEXP (&all.shift_sub, 0) = &all.shift_mult;
 200   XEXP (&all.shift_sub, 1) = &all.reg;
 201
 202   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 203        mode != VOIDmode;
 204        mode = GET_MODE_WIDER_MODE (mode))
 205     {
 206       PUT_MODE (&all.reg, mode);
 207       PUT_MODE (&all.plus, mode);
 208       PUT_MODE (&all.neg, mode);
 209       PUT_MODE (&all.udiv, mode);
 210       PUT_MODE (&all.mult, mode);
 211       PUT_MODE (&all.div, mode);
 212       PUT_MODE (&all.mod, mode);
 213       PUT_MODE (&all.wide_trunc, mode);
 214       PUT_MODE (&all.shift, mode);
 215       PUT_MODE (&all.shift_mult, mode);
 216       PUT_MODE (&all.shift_add, mode);
 217       PUT_MODE (&all.shift_sub, mode);
 218
 219       add_cost[mode] = rtx_cost (&all.plus, SET);
 220       neg_cost[mode] = rtx_cost (&all.neg, SET);
 221       div_cost[mode] = rtx_cost (&all.udiv, SET);
 222       mul_cost[mode] = rtx_cost (&all.mult, SET);
 223
 224       sdiv_pow2_cheap[mode] = (rtx_cost (&all.div, SET) <= 2 * add_cost[mode]);
 225       smod_pow2_cheap[mode] = (rtx_cost (&all.mod, SET) <= 4 * add_cost[mode]);
 226
 227       wider_mode = GET_MODE_WIDER_MODE (mode);
 228       if (wider_mode != VOIDmode)
 229         {
 230           PUT_MODE (&all.zext, wider_mode);
 231           PUT_MODE (&all.wide_mult, wider_mode);
 232           PUT_MODE (&all.wide_lshr, wider_mode);
 233           XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
 234
 235           mul_widen_cost[wider_mode] = rtx_cost (&all.wide_mult, SET);
 236           mul_highpart_cost[mode] = rtx_cost (&all.wide_trunc, SET);
 237         }
 238
 239       shift_cost[mode][0] = 0;
 240       shiftadd_cost[mode][0] = shiftsub_cost[mode][0] = add_cost[mode];
 241
 242       n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
 243       for (m = 1; m < n; m++)
 244         {
 245           XEXP (&all.shift, 1) = cint[m];
 246           XEXP (&all.shift_mult, 1) = pow2[m];
 247
 248           shift_cost[mode][m] = rtx_cost (&all.shift, SET);
 249           shiftadd_cost[mode][m] = rtx_cost (&all.shift_add, SET);
 250           shiftsub_cost[mode][m] = rtx_cost (&all.shift_sub, SET);
 251         }
 252     }
 253 }
 254
 255 /* Return an rtx representing minus the value of X.
 256    MODE is the intended mode of the result,
 257    useful if X is a CONST_INT.  */
 258
 259 rtx
 260 negate_rtx (enum machine_mode mode, rtx x)
 261 {
 262   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 263
 264   if (result == 0)
 265     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 266
 267   return result;
 268 }
 269
 270 /* Report on the availability of insv/extv/extzv and the desired mode
 271    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
 272    is false; else the mode of the specified operand.  If OPNO is -1,
 273    all the caller cares about is whether the insn is available.  */
 274 enum machine_mode
 275 mode_for_extraction (enum extraction_pattern pattern, int opno)
 276 {
 277   const struct insn_data *data;
 278
 279   switch (pattern)
 280     {
 281     case EP_insv:
 282       if (HAVE_insv)
 283         {
 284           data = &insn_data[CODE_FOR_insv];
 285           break;
 286         }
 287       return MAX_MACHINE_MODE;
 288
 289     case EP_extv:
 290       if (HAVE_extv)
 291         {
 292           data = &insn_data[CODE_FOR_extv];
 293           break;
 294         }
 295       return MAX_MACHINE_MODE;
 296
 297     case EP_extzv:
 298       if (HAVE_extzv)
 299         {
 300           data = &insn_data[CODE_FOR_extzv];
 301           break;
 302         }
 303       return MAX_MACHINE_MODE;
 304
 305     default:
 306       gcc_unreachable ();
 307     }
 308
 309   if (opno == -1)
 310     return VOIDmode;
 311
 312   /* Everyone who uses this function used to follow it with
 313      if (result == VOIDmode) result = word_mode; */
 314   if (data->operand[opno].mode == VOIDmode)
 315     return word_mode;
 316   return data->operand[opno].mode;
 317 }
 318
 319 \f
 320 /* Generate code to store value from rtx VALUE
 321    into a bit-field within structure STR_RTX
 322    containing BITSIZE bits starting at bit BITNUM.
 323    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
 324    ALIGN is the alignment that STR_RTX is known to have.
 325    TOTAL_SIZE is the size of the structure in bytes, or -1 if varying.  */
 326
 327 /* ??? Note that there are two different ideas here for how
 328    to determine the size to count bits within, for a register.
 329    One is BITS_PER_WORD, and the other is the size of operand 3
 330    of the insv pattern.
 331
 332    If operand 3 of the insv pattern is VOIDmode, then we will use BITS_PER_WORD
 333    else, we use the mode of operand 3.  */
 334
 335 rtx
 336 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 337                  unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
 338                  rtx value)
 339 {
 340   unsigned int unit
 341     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
 342   unsigned HOST_WIDE_INT offset, bitpos;
 343   rtx op0 = str_rtx;
 344   int byte_offset;
 345   rtx orig_value;
 346
 347   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
 348
 349   while (GET_CODE (op0) == SUBREG)
 350     {
 351       /* The following line once was done only if WORDS_BIG_ENDIAN,
 352          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 353          meaningful at a much higher level; when structures are copied
 354          between memory and regs, the higher-numbered regs
 355          always get higher addresses.  */
 356       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
 357       op0 = SUBREG_REG (op0);
 358     }
 359
 360   /* No action is needed if the target is a register and if the field
 361      lies completely outside that register.  This can occur if the source
 362      code contains an out-of-bounds access to a small array.  */
 363   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 364     return value;
 365
 366   /* Use vec_set patterns for inserting parts of vectors whenever
 367      available.  */
 368   if (VECTOR_MODE_P (GET_MODE (op0))
 369       && !MEM_P (op0)
 370       && (vec_set_optab->handlers[GET_MODE (op0)].insn_code
 371           != CODE_FOR_nothing)
 372       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 373       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 374       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 375     {
 376       enum machine_mode outermode = GET_MODE (op0);
 377       enum machine_mode innermode = GET_MODE_INNER (outermode);
 378       int icode = (int) vec_set_optab->handlers[outermode].insn_code;
 379       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 380       rtx rtxpos = GEN_INT (pos);
 381       rtx src = value;
 382       rtx dest = op0;
 383       rtx pat, seq;
 384       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
 385       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
 386       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
 387
 388       start_sequence ();
 389
 390       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
 391         src = copy_to_mode_reg (mode1, src);
 392
 393       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
 394         rtxpos = copy_to_mode_reg (mode1, rtxpos);
 395
 396       /* We could handle this, but we should always be called with a pseudo
 397          for our targets and all insns should take them as outputs.  */
 398       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
 399                   && (*insn_data[icode].operand[1].predicate) (src, mode1)
 400                   && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
 401       pat = GEN_FCN (icode) (dest, src, rtxpos);
 402       seq = get_insns ();
 403       end_sequence ();
 404       if (pat)
 405         {
 406           emit_insn (seq);
 407           emit_insn (pat);
 408           return dest;
 409         }
 410     }
 411
 412   if (flag_force_mem)
 413     {
 414       int old_generating_concat_p = generating_concat_p;
 415       generating_concat_p = 0;
 416       value = force_not_mem (value);
 417       generating_concat_p = old_generating_concat_p;
 418     }
 419
 420   /* If the target is a register, overwriting the entire object, or storing
 421      a full-word or multi-word field can be done with just a SUBREG.
 422
 423      If the target is memory, storing any naturally aligned field can be
 424      done with a simple store.  For targets that support fast unaligned
 425      memory, any naturally sized, unit aligned field can be done directly.  */
 426
 427   offset = bitnum / unit;
 428   bitpos = bitnum % unit;
 429   byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 430                 + (offset * UNITS_PER_WORD);
 431
 432   if (bitpos == 0
 433       && bitsize == GET_MODE_BITSIZE (fieldmode)
 434       && (!MEM_P (op0)
 435           ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 436              || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
 437              && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
 438           : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 439              || (offset * BITS_PER_UNIT % bitsize == 0
 440                  && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
 441     {
 442       if (MEM_P (op0))
 443         op0 = adjust_address (op0, fieldmode, offset);
 444       else if (GET_MODE (op0) != fieldmode)
 445         op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 446                                    byte_offset);
 447       emit_move_insn (op0, value);
 448       return value;
 449     }
 450
 451   /* Make sure we are playing with integral modes.  Pun with subregs
 452      if we aren't.  This must come after the entire register case above,
 453      since that case is valid for any mode.  The following cases are only
 454      valid for integral modes.  */
 455   {
 456     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 457     if (imode != GET_MODE (op0))
 458       {
 459         if (MEM_P (op0))
 460           op0 = adjust_address (op0, imode, 0);
 461         else
 462           {
 463             gcc_assert (imode != BLKmode);
 464             op0 = gen_lowpart (imode, op0);
 465           }
 466       }
 467   }
 468
 469   /* We may be accessing data outside the field, which means
 470      we can alias adjacent data.  */
 471   if (MEM_P (op0))
 472     {
 473       op0 = shallow_copy_rtx (op0);
 474       set_mem_alias_set (op0, 0);
 475       set_mem_expr (op0, 0);
 476     }
 477
 478   /* If OP0 is a register, BITPOS must count within a word.
 479      But as we have it, it counts within whatever size OP0 now has.
 480      On a bigendian machine, these are not the same, so convert.  */
 481   if (BYTES_BIG_ENDIAN
 482       && !MEM_P (op0)
 483       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 484     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 485
 486   /* Storing an lsb-aligned field in a register
 487      can be done with a movestrict instruction.  */
 488
 489   if (!MEM_P (op0)
 490       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 491       && bitsize == GET_MODE_BITSIZE (fieldmode)
 492       && (movstrict_optab->handlers[fieldmode].insn_code
 493           != CODE_FOR_nothing))
 494     {
 495       int icode = movstrict_optab->handlers[fieldmode].insn_code;
 496
 497       /* Get appropriate low part of the value being stored.  */
 498       if (GET_CODE (value) == CONST_INT || REG_P (value))
 499         value = gen_lowpart (fieldmode, value);
 500       else if (!(GET_CODE (value) == SYMBOL_REF
 501                  || GET_CODE (value) == LABEL_REF
 502                  || GET_CODE (value) == CONST))
 503         value = convert_to_mode (fieldmode, value, 0);
 504
 505       if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
 506         value = copy_to_mode_reg (fieldmode, value);
 507
 508       if (GET_CODE (op0) == SUBREG)
 509         {
 510           /* Else we've got some float mode source being extracted into
 511              a different float mode destination -- this combination of
 512              subregs results in Severe Tire Damage.  */
 513           gcc_assert (GET_MODE (SUBREG_REG (op0)) == fieldmode
 514                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 515                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 516           op0 = SUBREG_REG (op0);
 517         }
 518
 519       emit_insn (GEN_FCN (icode)
 520                  (gen_rtx_SUBREG (fieldmode, op0,
 521                                   (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 522                                   + (offset * UNITS_PER_WORD)),
 523                                   value));
 524
 525       return value;
 526     }
 527
 528   /* Handle fields bigger than a word.  */
 529
 530   if (bitsize > BITS_PER_WORD)
 531     {
 532       /* Here we transfer the words of the field
 533          in the order least significant first.
 534          This is because the most significant word is the one which may
 535          be less than full.
 536          However, only do that if the value is not BLKmode.  */
 537
 538       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 539       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 540       unsigned int i;
 541
 542       /* This is the mode we must force value to, so that there will be enough
 543          subwords to extract.  Note that fieldmode will often (always?) be
 544          VOIDmode, because that is what store_field uses to indicate that this
 545          is a bit field, but passing VOIDmode to operand_subword_force will
 546          result in an abort.  */
 547       fieldmode = GET_MODE (value);
 548       if (fieldmode == VOIDmode)
 549         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 550
 551       for (i = 0; i < nwords; i++)
 552         {
 553           /* If I is 0, use the low-order word in both field and target;
 554              if I is 1, use the next to lowest word; and so on.  */
 555           unsigned int wordnum = (backwards ? nwords - i - 1 : i);
 556           unsigned int bit_offset = (backwards
 557                                      ? MAX ((int) bitsize - ((int) i + 1)
 558                                             * BITS_PER_WORD,
 559                                             0)
 560                                      : (int) i * BITS_PER_WORD);
 561
 562           store_bit_field (op0, MIN (BITS_PER_WORD,
 563                                      bitsize - i * BITS_PER_WORD),
 564                            bitnum + bit_offset, word_mode,
 565                            operand_subword_force (value, wordnum, fieldmode));
 566         }
 567       return value;
 568     }
 569
 570   /* From here on we can assume that the field to be stored in is
 571      a full-word (whatever type that is), since it is shorter than a word.  */
 572
 573   /* OFFSET is the number of words or bytes (UNIT says which)
 574      from STR_RTX to the first word or byte containing part of the field.  */
 575
 576   if (!MEM_P (op0))
 577     {
 578       if (offset != 0
 579           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 580         {
 581           if (!REG_P (op0))
 582             {
 583               /* Since this is a destination (lvalue), we can't copy it to a
 584                  pseudo.  We can trivially remove a SUBREG that does not
 585                  change the size of the operand.  Such a SUBREG may have been
 586                  added above.  Otherwise, abort.  */
 587               gcc_assert (GET_CODE (op0) == SUBREG
 588                           && (GET_MODE_SIZE (GET_MODE (op0))
 589                               == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
 590               op0 = SUBREG_REG (op0);
 591             }
 592           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 593                                 op0, (offset * UNITS_PER_WORD));
 594         }
 595       offset = 0;
 596     }
 597
 598   /* If VALUE has a floating-point or complex mode, access it as an
 599      integer of the corresponding size.  This can occur on a machine
 600      with 64 bit registers that uses SFmode for float.  It can also
 601      occur for unaligned float or complex fields.  */
 602   orig_value = value;
 603   if (GET_MODE (value) != VOIDmode
 604       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 605       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 606     {
 607       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 608       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 609     }
 610
 611   /* Now OFFSET is nonzero only if OP0 is memory
 612      and is therefore always measured in bytes.  */
 613
 614   if (HAVE_insv
 615       && GET_MODE (value) != BLKmode
 616       && !(bitsize == 1 && GET_CODE (value) == CONST_INT)
 617       /* Ensure insv's size is wide enough for this field.  */
 618       && (GET_MODE_BITSIZE (op_mode) >= bitsize)
 619       && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
 620             && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode)))
 621       && insn_data[CODE_FOR_insv].operand[1].predicate (GEN_INT (bitsize),
 622                                                         VOIDmode))
 623     {
 624       int xbitpos = bitpos;
 625       rtx value1;
 626       rtx xop0 = op0;
 627       rtx last = get_last_insn ();
 628       rtx pat;
 629       enum machine_mode maxmode = mode_for_extraction (EP_insv, 3);
 630       int save_volatile_ok = volatile_ok;
 631
 632       volatile_ok = 1;
 633
 634       /* If this machine's insv can only insert into a register, copy OP0
 635          into a register and save it back later.  */
 636       /* This used to check flag_force_mem, but that was a serious
 637          de-optimization now that flag_force_mem is enabled by -O2.  */
 638       if (MEM_P (op0)
 639           && ! ((*insn_data[(int) CODE_FOR_insv].operand[0].predicate)
 640                 (op0, VOIDmode)))
 641         {
 642           rtx tempreg;
 643           enum machine_mode bestmode;
 644
 645           /* Get the mode to use for inserting into this field.  If OP0 is
 646              BLKmode, get the smallest mode consistent with the alignment. If
 647              OP0 is a non-BLKmode object that is no wider than MAXMODE, use its
 648              mode. Otherwise, use the smallest mode containing the field.  */
 649
 650           if (GET_MODE (op0) == BLKmode
 651               || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (maxmode))
 652             bestmode
 653               = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0), maxmode,
 654                                MEM_VOLATILE_P (op0));
 655           else
 656             bestmode = GET_MODE (op0);
 657
 658           if (bestmode == VOIDmode
 659               || GET_MODE_SIZE (bestmode) < GET_MODE_SIZE (fieldmode)
 660               || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
 661                   && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
 662             goto insv_loses;
 663
 664           /* Adjust address to point to the containing unit of that mode.
 665              Compute offset as multiple of this unit, counting in bytes.  */
 666           unit = GET_MODE_BITSIZE (bestmode);
 667           offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 668           bitpos = bitnum % unit;
 669           op0 = adjust_address (op0, bestmode,  offset);
 670
 671           /* Fetch that unit, store the bitfield in it, then store
 672              the unit.  */
 673           tempreg = copy_to_reg (op0);
 674           store_bit_field (tempreg, bitsize, bitpos, fieldmode, orig_value);
 675           emit_move_insn (op0, tempreg);
 676           return value;
 677         }
 678       volatile_ok = save_volatile_ok;
 679
 680       /* Add OFFSET into OP0's address.  */
 681       if (MEM_P (xop0))
 682         xop0 = adjust_address (xop0, byte_mode, offset);
 683
 684       /* If xop0 is a register, we need it in MAXMODE
 685          to make it acceptable to the format of insv.  */
 686       if (GET_CODE (xop0) == SUBREG)
 687         /* We can't just change the mode, because this might clobber op0,
 688            and we will need the original value of op0 if insv fails.  */
 689         xop0 = gen_rtx_SUBREG (maxmode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 690       if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
 691         xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
 692
 693       /* On big-endian machines, we count bits from the most significant.
 694          If the bit field insn does not, we must invert.  */
 695
 696       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 697         xbitpos = unit - bitsize - xbitpos;
 698
 699       /* We have been counting XBITPOS within UNIT.
 700          Count instead within the size of the register.  */
 701       if (BITS_BIG_ENDIAN && !MEM_P (xop0))
 702         xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
 703
 704       unit = GET_MODE_BITSIZE (maxmode);
 705
 706       /* Convert VALUE to maxmode (which insv insn wants) in VALUE1.  */
 707       value1 = value;
 708       if (GET_MODE (value) != maxmode)
 709         {
 710           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 711             {
 712               /* Optimization: Don't bother really extending VALUE
 713                  if it has all the bits we will actually use.  However,
 714                  if we must narrow it, be sure we do it correctly.  */
 715
 716               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (maxmode))
 717                 {
 718                   rtx tmp;
 719
 720                   tmp = simplify_subreg (maxmode, value1, GET_MODE (value), 0);
 721                   if (! tmp)
 722                     tmp = simplify_gen_subreg (maxmode,
 723                                                force_reg (GET_MODE (value),
 724                                                           value1),
 725                                                GET_MODE (value), 0);
 726                   value1 = tmp;
 727                 }
 728               else
 729                 value1 = gen_lowpart (maxmode, value1);
 730             }
 731           else if (GET_CODE (value) == CONST_INT)
 732             value1 = gen_int_mode (INTVAL (value), maxmode);
 733           else
 734             /* Parse phase is supposed to make VALUE's data type
 735                match that of the component reference, which is a type
 736                at least as wide as the field; so VALUE should have
 737                a mode that corresponds to that type.  */
 738             gcc_assert (CONSTANT_P (value));
 739         }
 740
 741       /* If this machine's insv insists on a register,
 742          get VALUE1 into a register.  */
 743       if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
 744              (value1, maxmode)))
 745         value1 = force_reg (maxmode, value1);
 746
 747       pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
 748       if (pat)
 749         emit_insn (pat);
 750       else
 751         {
 752           delete_insns_since (last);
 753           store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 754         }
 755     }
 756   else
 757     insv_loses:
 758     /* Insv is not available; store using shifts and boolean ops.  */
 759     store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 760   return value;
 761 }
 762 \f
 763 /* Use shifts and boolean operations to store VALUE
 764    into a bit field of width BITSIZE
 765    in a memory location specified by OP0 except offset by OFFSET bytes.
 766      (OFFSET must be 0 if OP0 is a register.)
 767    The field starts at position BITPOS within the byte.
 768     (If OP0 is a register, it may be a full word or a narrower mode,
 769      but BITPOS still counts within a full word,
 770      which is significant on bigendian machines.)  */
 771
 772 static void
 773 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
 774                        unsigned HOST_WIDE_INT bitsize,
 775                        unsigned HOST_WIDE_INT bitpos, rtx value)
 776 {
 777   enum machine_mode mode;
 778   unsigned int total_bits = BITS_PER_WORD;
 779   rtx temp;
 780   int all_zero = 0;
 781   int all_one = 0;
 782
 783   /* There is a case not handled here:
 784      a structure with a known alignment of just a halfword
 785      and a field split across two aligned halfwords within the structure.
 786      Or likewise a structure with a known alignment of just a byte
 787      and a field split across two bytes.
 788      Such cases are not supposed to be able to occur.  */
 789
 790   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 791     {
 792       gcc_assert (!offset);
 793       /* Special treatment for a bit field split across two registers.  */
 794       if (bitsize + bitpos > BITS_PER_WORD)
 795         {
 796           store_split_bit_field (op0, bitsize, bitpos, value);
 797           return;
 798         }
 799     }
 800   else
 801     {
 802       /* Get the proper mode to use for this field.  We want a mode that
 803          includes the entire field.  If such a mode would be larger than
 804          a word, we won't be doing the extraction the normal way.
 805          We don't want a mode bigger than the destination.  */
 806
 807       mode = GET_MODE (op0);
 808       if (GET_MODE_BITSIZE (mode) == 0
 809           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 810         mode = word_mode;
 811       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 812                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 813
 814       if (mode == VOIDmode)
 815         {
 816           /* The only way this should occur is if the field spans word
 817              boundaries.  */
 818           store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
 819                                  value);
 820           return;
 821         }
 822
 823       total_bits = GET_MODE_BITSIZE (mode);
 824
 825       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 826          be in the range 0 to total_bits-1, and put any excess bytes in
 827          OFFSET.  */
 828       if (bitpos >= total_bits)
 829         {
 830           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 831           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 832                      * BITS_PER_UNIT);
 833         }
 834
 835       /* Get ref to an aligned byte, halfword, or word containing the field.
 836          Adjust BITPOS to be position within a word,
 837          and OFFSET to be the offset of that word.
 838          Then alter OP0 to refer to that word.  */
 839       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 840       offset -= (offset % (total_bits / BITS_PER_UNIT));
 841       op0 = adjust_address (op0, mode, offset);
 842     }
 843
 844   mode = GET_MODE (op0);
 845
 846   /* Now MODE is either some integral mode for a MEM as OP0,
 847      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 848      The bit field is contained entirely within OP0.
 849      BITPOS is the starting bit number within OP0.
 850      (OP0's mode may actually be narrower than MODE.)  */
 851
 852   if (BYTES_BIG_ENDIAN)
 853       /* BITPOS is the distance between our msb
 854          and that of the containing datum.
 855          Convert it to the distance from the lsb.  */
 856       bitpos = total_bits - bitsize - bitpos;
 857
 858   /* Now BITPOS is always the distance between our lsb
 859      and that of OP0.  */
 860
 861   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 862      we must first convert its mode to MODE.  */
 863
 864   if (GET_CODE (value) == CONST_INT)
 865     {
 866       HOST_WIDE_INT v = INTVAL (value);
 867
 868       if (bitsize < HOST_BITS_PER_WIDE_INT)
 869         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 870
 871       if (v == 0)
 872         all_zero = 1;
 873       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 874                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 875                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 876         all_one = 1;
 877
 878       value = lshift_value (mode, value, bitpos, bitsize);
 879     }
 880   else
 881     {
 882       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 883                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 884
 885       if (GET_MODE (value) != mode)
 886         {
 887           if ((REG_P (value) || GET_CODE (value) == SUBREG)
 888               && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
 889             value = gen_lowpart (mode, value);
 890           else
 891             value = convert_to_mode (mode, value, 1);
 892         }
 893
 894       if (must_and)
 895         value = expand_binop (mode, and_optab, value,
 896                               mask_rtx (mode, 0, bitsize, 0),
 897                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
 898       if (bitpos > 0)
 899         value = expand_shift (LSHIFT_EXPR, mode, value,
 900                               build_int_cst (NULL_TREE, bitpos), NULL_RTX, 1);
 901     }
 902
 903   /* Now clear the chosen bits in OP0,
 904      except that if VALUE is -1 we need not bother.  */
 905   /* We keep the intermediates in registers to allow CSE to combine
 906      consecutive bitfield assignments.  */
 907
 908   temp = force_reg (mode, op0);
 909
 910   if (! all_one)
 911     {
 912       temp = expand_binop (mode, and_optab, temp,
 913                            mask_rtx (mode, bitpos, bitsize, 1),
 914                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
 915       temp = force_reg (mode, temp);
 916     }
 917
 918   /* Now logical-or VALUE into OP0, unless it is zero.  */
 919
 920   if (! all_zero)
 921     {
 922       temp = expand_binop (mode, ior_optab, temp, value,
 923                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
 924       temp = force_reg (mode, temp);
 925     }
 926
 927   if (op0 != temp)
 928     emit_move_insn (op0, temp);
 929 }
 930 \f
 931 /* Store a bit field that is split across multiple accessible memory objects.
 932
 933    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
 934    BITSIZE is the field width; BITPOS the position of its first bit
 935    (within the word).
 936    VALUE is the value to store.
 937
 938    This does not yet handle fields wider than BITS_PER_WORD.  */
 939
 940 static void
 941 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
 942                        unsigned HOST_WIDE_INT bitpos, rtx value)
 943 {
 944   unsigned int unit;
 945   unsigned int bitsdone = 0;
 946
 947   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
 948      much at a time.  */
 949   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 950     unit = BITS_PER_WORD;
 951   else
 952     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
 953
 954   /* If VALUE is a constant other than a CONST_INT, get it into a register in
 955      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
 956      that VALUE might be a floating-point constant.  */
 957   if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
 958     {
 959       rtx word = gen_lowpart_common (word_mode, value);
 960
 961       if (word && (value != word))
 962         value = word;
 963       else
 964         value = gen_lowpart_common (word_mode,
 965                                     force_reg (GET_MODE (value) != VOIDmode
 966                                                ? GET_MODE (value)
 967                                                : word_mode, value));
 968     }
 969
 970   while (bitsdone < bitsize)
 971     {
 972       unsigned HOST_WIDE_INT thissize;
 973       rtx part, word;
 974       unsigned HOST_WIDE_INT thispos;
 975       unsigned HOST_WIDE_INT offset;
 976
 977       offset = (bitpos + bitsdone) / unit;
 978       thispos = (bitpos + bitsdone) % unit;
 979
 980       /* THISSIZE must not overrun a word boundary.  Otherwise,
 981          store_fixed_bit_field will call us again, and we will mutually
 982          recurse forever.  */
 983       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
 984       thissize = MIN (thissize, unit - thispos);
 985
 986       if (BYTES_BIG_ENDIAN)
 987         {
 988           int total_bits;
 989
 990           /* We must do an endian conversion exactly the same way as it is
 991              done in extract_bit_field, so that the two calls to
 992              extract_fixed_bit_field will have comparable arguments.  */
 993           if (!MEM_P (value) || GET_MODE (value) == BLKmode)
 994             total_bits = BITS_PER_WORD;
 995           else
 996             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
 997
 998           /* Fetch successively less significant portions.  */
 999           if (GET_CODE (value) == CONST_INT)
1000             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1001                              >> (bitsize - bitsdone - thissize))
1002                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1003           else
1004             /* The args are chosen so that the last part includes the
1005                lsb.  Give extract_bit_field the value it needs (with
1006                endianness compensation) to fetch the piece we want.  */
1007             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1008                                             total_bits - bitsize + bitsdone,
1009                                             NULL_RTX, 1);
1010         }
1011       else
1012         {
1013           /* Fetch successively more significant portions.  */
1014           if (GET_CODE (value) == CONST_INT)
1015             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1016                              >> bitsdone)
1017                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1018           else
1019             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1020                                             bitsdone, NULL_RTX, 1);
1021         }
1022
1023       /* If OP0 is a register, then handle OFFSET here.
1024
1025          When handling multiword bitfields, extract_bit_field may pass
1026          down a word_mode SUBREG of a larger REG for a bitfield that actually
1027          crosses a word boundary.  Thus, for a SUBREG, we must find
1028          the current word starting from the base register.  */
1029       if (GET_CODE (op0) == SUBREG)
1030         {
1031           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1032           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1033                                         GET_MODE (SUBREG_REG (op0)));
1034           offset = 0;
1035         }
1036       else if (REG_P (op0))
1037         {
1038           word = operand_subword_force (op0, offset, GET_MODE (op0));
1039           offset = 0;
1040         }
1041       else
1042         word = op0;
1043
1044       /* OFFSET is in UNITs, and UNIT is in bits.
1045          store_fixed_bit_field wants offset in bytes.  */
1046       store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1047                              thispos, part);
1048       bitsdone += thissize;
1049     }
1050 }
1051 \f
1052 /* Generate code to extract a byte-field from STR_RTX
1053    containing BITSIZE bits, starting at BITNUM,
1054    and put it in TARGET if possible (if TARGET is nonzero).
1055    Regardless of TARGET, we return the rtx for where the value is placed.
1056
1057    STR_RTX is the structure containing the byte (a REG or MEM).
1058    UNSIGNEDP is nonzero if this is an unsigned bit field.
1059    MODE is the natural mode of the field value once extracted.
1060    TMODE is the mode the caller would like the value to have;
1061    but the value may be returned with type MODE instead.
1062
1063    TOTAL_SIZE is the size in bytes of the containing structure,
1064    or -1 if varying.
1065
1066    If a TARGET is specified and we can store in it at no extra cost,
1067    we do so, and return TARGET.
1068    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1069    if they are equally easy.  */
1070
1071 rtx
1072 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1073                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1074                    enum machine_mode mode, enum machine_mode tmode)
1075 {
1076   unsigned int unit
1077     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1078   unsigned HOST_WIDE_INT offset, bitpos;
1079   rtx op0 = str_rtx;
1080   rtx spec_target = target;
1081   rtx spec_target_subreg = 0;
1082   enum machine_mode int_mode;
1083   enum machine_mode extv_mode = mode_for_extraction (EP_extv, 0);
1084   enum machine_mode extzv_mode = mode_for_extraction (EP_extzv, 0);
1085   enum machine_mode mode1;
1086   int byte_offset;
1087
1088   if (tmode == VOIDmode)
1089     tmode = mode;
1090
1091   while (GET_CODE (op0) == SUBREG)
1092     {
1093       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1094       op0 = SUBREG_REG (op0);
1095     }
1096
1097   /* If we have an out-of-bounds access to a register, just return an
1098      uninitialized register of the required mode.  This can occur if the
1099      source code contains an out-of-bounds access to a small array.  */
1100   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1101     return gen_reg_rtx (tmode);
1102
1103   if (REG_P (op0)
1104       && mode == GET_MODE (op0)
1105       && bitnum == 0
1106       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1107     {
1108       /* We're trying to extract a full register from itself.  */
1109       return op0;
1110     }
1111
1112   /* Use vec_extract patterns for extracting parts of vectors whenever
1113      available.  */
1114   if (VECTOR_MODE_P (GET_MODE (op0))
1115       && !MEM_P (op0)
1116       && (vec_extract_optab->handlers[GET_MODE (op0)].insn_code
1117           != CODE_FOR_nothing)
1118       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1119           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1120     {
1121       enum machine_mode outermode = GET_MODE (op0);
1122       enum machine_mode innermode = GET_MODE_INNER (outermode);
1123       int icode = (int) vec_extract_optab->handlers[outermode].insn_code;
1124       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1125       rtx rtxpos = GEN_INT (pos);
1126       rtx src = op0;
1127       rtx dest = NULL, pat, seq;
1128       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
1129       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
1130       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
1131
1132       if (innermode == tmode || innermode == mode)
1133         dest = target;
1134
1135       if (!dest)
1136         dest = gen_reg_rtx (innermode);
1137
1138       start_sequence ();
1139
1140       if (! (*insn_data[icode].operand[0].predicate) (dest, mode0))
1141         dest = copy_to_mode_reg (mode0, dest);
1142
1143       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
1144         src = copy_to_mode_reg (mode1, src);
1145
1146       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1147         rtxpos = copy_to_mode_reg (mode1, rtxpos);
1148
1149       /* We could handle this, but we should always be called with a pseudo
1150          for our targets and all insns should take them as outputs.  */
1151       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
1152                   && (*insn_data[icode].operand[1].predicate) (src, mode1)
1153                   && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
1154
1155       pat = GEN_FCN (icode) (dest, src, rtxpos);
1156       seq = get_insns ();
1157       end_sequence ();
1158       if (pat)
1159         {
1160           emit_insn (seq);
1161           emit_insn (pat);
1162           return dest;
1163         }
1164     }
1165
1166   /* Make sure we are playing with integral modes.  Pun with subregs
1167      if we aren't.  */
1168   {
1169     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1170     if (imode != GET_MODE (op0))
1171       {
1172         if (MEM_P (op0))
1173           op0 = adjust_address (op0, imode, 0);
1174         else
1175           {
1176             gcc_assert (imode != BLKmode);
1177             op0 = gen_lowpart (imode, op0);
1178
1179             /* If we got a SUBREG, force it into a register since we
1180                aren't going to be able to do another SUBREG on it.  */
1181             if (GET_CODE (op0) == SUBREG)
1182               op0 = force_reg (imode, op0);
1183           }
1184       }
1185   }
1186
1187   /* We may be accessing data outside the field, which means
1188      we can alias adjacent data.  */
1189   if (MEM_P (op0))
1190     {
1191       op0 = shallow_copy_rtx (op0);
1192       set_mem_alias_set (op0, 0);
1193       set_mem_expr (op0, 0);
1194     }
1195
1196   /* Extraction of a full-word or multi-word value from a structure
1197      in a register or aligned memory can be done with just a SUBREG.
1198      A subword value in the least significant part of a register
1199      can also be extracted with a SUBREG.  For this, we need the
1200      byte offset of the value in op0.  */
1201
1202   bitpos = bitnum % unit;
1203   offset = bitnum / unit;
1204   byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1205
1206   /* If OP0 is a register, BITPOS must count within a word.
1207      But as we have it, it counts within whatever size OP0 now has.
1208      On a bigendian machine, these are not the same, so convert.  */
1209   if (BYTES_BIG_ENDIAN
1210       && !MEM_P (op0)
1211       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1212     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1213
1214   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1215      If that's wrong, the solution is to test for it and set TARGET to 0
1216      if needed.  */
1217
1218   /* Only scalar integer modes can be converted via subregs.  There is an
1219      additional problem for FP modes here in that they can have a precision
1220      which is different from the size.  mode_for_size uses precision, but
1221      we want a mode based on the size, so we must avoid calling it for FP
1222      modes.  */
1223   mode1  = (SCALAR_INT_MODE_P (tmode)
1224             ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1225             : mode);
1226
1227   if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1228         && bitpos % BITS_PER_WORD == 0)
1229        || (mode1 != BLKmode
1230            /* ??? The big endian test here is wrong.  This is correct
1231               if the value is in a register, and if mode_for_size is not
1232               the same mode as op0.  This causes us to get unnecessarily
1233               inefficient code from the Thumb port when -mbig-endian.  */
1234            && (BYTES_BIG_ENDIAN
1235                ? bitpos + bitsize == BITS_PER_WORD
1236                : bitpos == 0)))
1237       && ((!MEM_P (op0)
1238            && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
1239                                      GET_MODE_BITSIZE (GET_MODE (op0)))
1240            && GET_MODE_SIZE (mode1) != 0
1241            && byte_offset % GET_MODE_SIZE (mode1) == 0)
1242           || (MEM_P (op0)
1243               && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1244                   || (offset * BITS_PER_UNIT % bitsize == 0
1245                       && MEM_ALIGN (op0) % bitsize == 0)))))
1246     {
1247       if (mode1 != GET_MODE (op0))
1248         {
1249           if (MEM_P (op0))
1250             op0 = adjust_address (op0, mode1, offset);
1251           else
1252             {
1253               rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1254                                              byte_offset);
1255               if (sub == NULL)
1256                 goto no_subreg_mode_swap;
1257               op0 = sub;
1258             }
1259         }
1260       if (mode1 != mode)
1261         return convert_to_mode (tmode, op0, unsignedp);
1262       return op0;
1263     }
1264  no_subreg_mode_swap:
1265
1266   /* Handle fields bigger than a word.  */
1267
1268   if (bitsize > BITS_PER_WORD)
1269     {
1270       /* Here we transfer the words of the field
1271          in the order least significant first.
1272          This is because the most significant word is the one which may
1273          be less than full.  */
1274
1275       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1276       unsigned int i;
1277
1278       if (target == 0 || !REG_P (target))
1279         target = gen_reg_rtx (mode);
1280
1281       /* Indicate for flow that the entire target reg is being set.  */
1282       emit_insn (gen_rtx_CLOBBER (VOIDmode, target));
1283
1284       for (i = 0; i < nwords; i++)
1285         {
1286           /* If I is 0, use the low-order word in both field and target;
1287              if I is 1, use the next to lowest word; and so on.  */
1288           /* Word number in TARGET to use.  */
1289           unsigned int wordnum
1290             = (WORDS_BIG_ENDIAN
1291                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1292                : i);
1293           /* Offset from start of field in OP0.  */
1294           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1295                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1296                                                 * (int) BITS_PER_WORD))
1297                                      : (int) i * BITS_PER_WORD);
1298           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1299           rtx result_part
1300             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1301                                            bitsize - i * BITS_PER_WORD),
1302                                  bitnum + bit_offset, 1, target_part, mode,
1303                                  word_mode);
1304
1305           gcc_assert (target_part);
1306
1307           if (result_part != target_part)
1308             emit_move_insn (target_part, result_part);
1309         }
1310
1311       if (unsignedp)
1312         {
1313           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1314              need to be zero'd out.  */
1315           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1316             {
1317               unsigned int i, total_words;
1318
1319               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1320               for (i = nwords; i < total_words; i++)
1321                 emit_move_insn
1322                   (operand_subword (target,
1323                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1324                                     1, VOIDmode),
1325                    const0_rtx);
1326             }
1327           return target;
1328         }
1329
1330       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1331       target = expand_shift (LSHIFT_EXPR, mode, target,
1332                              build_int_cst (NULL_TREE,
1333                                             GET_MODE_BITSIZE (mode) - bitsize),
1334                              NULL_RTX, 0);
1335       return expand_shift (RSHIFT_EXPR, mode, target,
1336                            build_int_cst (NULL_TREE,
1337                                           GET_MODE_BITSIZE (mode) - bitsize),
1338                            NULL_RTX, 0);
1339     }
1340
1341   /* From here on we know the desired field is smaller than a word.  */
1342
1343   /* Check if there is a correspondingly-sized integer field, so we can
1344      safely extract it as one size of integer, if necessary; then
1345      truncate or extend to the size that is wanted; then use SUBREGs or
1346      convert_to_mode to get one of the modes we really wanted.  */
1347
1348   int_mode = int_mode_for_mode (tmode);
1349   if (int_mode == BLKmode)
1350     int_mode = int_mode_for_mode (mode);
1351   /* Should probably push op0 out to memory and then do a load.  */
1352   gcc_assert (int_mode != BLKmode);
1353
1354   /* OFFSET is the number of words or bytes (UNIT says which)
1355      from STR_RTX to the first word or byte containing part of the field.  */
1356   if (!MEM_P (op0))
1357     {
1358       if (offset != 0
1359           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1360         {
1361           if (!REG_P (op0))
1362             op0 = copy_to_reg (op0);
1363           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1364                                 op0, (offset * UNITS_PER_WORD));
1365         }
1366       offset = 0;
1367     }
1368
1369   /* Now OFFSET is nonzero only for memory operands.  */
1370
1371   if (unsignedp)
1372     {
1373       if (HAVE_extzv
1374           && (GET_MODE_BITSIZE (extzv_mode) >= bitsize)
1375           && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1376                 && (bitsize + bitpos > GET_MODE_BITSIZE (extzv_mode))))
1377         {
1378           unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1379           rtx bitsize_rtx, bitpos_rtx;
1380           rtx last = get_last_insn ();
1381           rtx xop0 = op0;
1382           rtx xtarget = target;
1383           rtx xspec_target = spec_target;
1384           rtx xspec_target_subreg = spec_target_subreg;
1385           rtx pat;
1386           enum machine_mode maxmode = mode_for_extraction (EP_extzv, 0);
1387
1388           if (MEM_P (xop0))
1389             {
1390               int save_volatile_ok = volatile_ok;
1391               volatile_ok = 1;
1392
1393               /* Is the memory operand acceptable?  */
1394               if (! ((*insn_data[(int) CODE_FOR_extzv].operand[1].predicate)
1395                      (xop0, GET_MODE (xop0))))
1396                 {
1397                   /* No, load into a reg and extract from there.  */
1398                   enum machine_mode bestmode;
1399
1400                   /* Get the mode to use for inserting into this field.  If
1401                      OP0 is BLKmode, get the smallest mode consistent with the
1402                      alignment. If OP0 is a non-BLKmode object that is no
1403                      wider than MAXMODE, use its mode. Otherwise, use the
1404                      smallest mode containing the field.  */
1405
1406                   if (GET_MODE (xop0) == BLKmode
1407                       || (GET_MODE_SIZE (GET_MODE (op0))
1408                           > GET_MODE_SIZE (maxmode)))
1409                     bestmode = get_best_mode (bitsize, bitnum,
1410                                               MEM_ALIGN (xop0), maxmode,
1411                                               MEM_VOLATILE_P (xop0));
1412                   else
1413                     bestmode = GET_MODE (xop0);
1414
1415                   if (bestmode == VOIDmode
1416                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1417                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1418                     goto extzv_loses;
1419
1420                   /* Compute offset as multiple of this unit,
1421                      counting in bytes.  */
1422                   unit = GET_MODE_BITSIZE (bestmode);
1423                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1424                   xbitpos = bitnum % unit;
1425                   xop0 = adjust_address (xop0, bestmode, xoffset);
1426
1427                   /* Make sure register is big enough for the whole field. */
1428                   if (xoffset * BITS_PER_UNIT + unit
1429                       < offset * BITS_PER_UNIT + bitsize)
1430                     goto extzv_loses;
1431
1432                   /* Fetch it to a register in that size.  */
1433                   xop0 = force_reg (bestmode, xop0);
1434
1435                   /* XBITPOS counts within UNIT, which is what is expected.  */
1436                 }
1437               else
1438                 /* Get ref to first byte containing part of the field.  */
1439                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1440
1441               volatile_ok = save_volatile_ok;
1442             }
1443
1444           /* If op0 is a register, we need it in MAXMODE (which is usually
1445              SImode). to make it acceptable to the format of extzv.  */
1446           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1447             goto extzv_loses;
1448           if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1449             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1450
1451           /* On big-endian machines, we count bits from the most significant.
1452              If the bit field insn does not, we must invert.  */
1453           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1454             xbitpos = unit - bitsize - xbitpos;
1455
1456           /* Now convert from counting within UNIT to counting in MAXMODE.  */
1457           if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1458             xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
1459
1460           unit = GET_MODE_BITSIZE (maxmode);
1461
1462           if (xtarget == 0
1463               || (flag_force_mem && MEM_P (xtarget)))
1464             xtarget = xspec_target = gen_reg_rtx (tmode);
1465
1466           if (GET_MODE (xtarget) != maxmode)
1467             {
1468               if (REG_P (xtarget))
1469                 {
1470                   int wider = (GET_MODE_SIZE (maxmode)
1471                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1472                   xtarget = gen_lowpart (maxmode, xtarget);
1473                   if (wider)
1474                     xspec_target_subreg = xtarget;
1475                 }
1476               else
1477                 xtarget = gen_reg_rtx (maxmode);
1478             }
1479
1480           /* If this machine's extzv insists on a register target,
1481              make sure we have one.  */
1482           if (! ((*insn_data[(int) CODE_FOR_extzv].operand[0].predicate)
1483                  (xtarget, maxmode)))
1484             xtarget = gen_reg_rtx (maxmode);
1485
1486           bitsize_rtx = GEN_INT (bitsize);
1487           bitpos_rtx = GEN_INT (xbitpos);
1488
1489           pat = gen_extzv (xtarget, xop0, bitsize_rtx, bitpos_rtx);
1490           if (pat)
1491             {
1492               emit_insn (pat);
1493               target = xtarget;
1494               spec_target = xspec_target;
1495               spec_target_subreg = xspec_target_subreg;
1496             }
1497           else
1498             {
1499               delete_insns_since (last);
1500               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1501                                                 bitpos, target, 1);
1502             }
1503         }
1504       else
1505       extzv_loses:
1506         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1507                                           bitpos, target, 1);
1508     }
1509   else
1510     {
1511       if (HAVE_extv
1512           && (GET_MODE_BITSIZE (extv_mode) >= bitsize)
1513           && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1514                 && (bitsize + bitpos > GET_MODE_BITSIZE (extv_mode))))
1515         {
1516           int xbitpos = bitpos, xoffset = offset;
1517           rtx bitsize_rtx, bitpos_rtx;
1518           rtx last = get_last_insn ();
1519           rtx xop0 = op0, xtarget = target;
1520           rtx xspec_target = spec_target;
1521           rtx xspec_target_subreg = spec_target_subreg;
1522           rtx pat;
1523           enum machine_mode maxmode = mode_for_extraction (EP_extv, 0);
1524
1525           if (MEM_P (xop0))
1526             {
1527               /* Is the memory operand acceptable?  */
1528               if (! ((*insn_data[(int) CODE_FOR_extv].operand[1].predicate)
1529                      (xop0, GET_MODE (xop0))))
1530                 {
1531                   /* No, load into a reg and extract from there.  */
1532                   enum machine_mode bestmode;
1533
1534                   /* Get the mode to use for inserting into this field.  If
1535                      OP0 is BLKmode, get the smallest mode consistent with the
1536                      alignment. If OP0 is a non-BLKmode object that is no
1537                      wider than MAXMODE, use its mode. Otherwise, use the
1538                      smallest mode containing the field.  */
1539
1540                   if (GET_MODE (xop0) == BLKmode
1541                       || (GET_MODE_SIZE (GET_MODE (op0))
1542                           > GET_MODE_SIZE (maxmode)))
1543                     bestmode = get_best_mode (bitsize, bitnum,
1544                                               MEM_ALIGN (xop0), maxmode,
1545                                               MEM_VOLATILE_P (xop0));
1546                   else
1547                     bestmode = GET_MODE (xop0);
1548
1549                   if (bestmode == VOIDmode
1550                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1551                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1552                     goto extv_loses;
1553
1554                   /* Compute offset as multiple of this unit,
1555                      counting in bytes.  */
1556                   unit = GET_MODE_BITSIZE (bestmode);
1557                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1558                   xbitpos = bitnum % unit;
1559                   xop0 = adjust_address (xop0, bestmode, xoffset);
1560
1561                   /* Make sure register is big enough for the whole field. */
1562                   if (xoffset * BITS_PER_UNIT + unit
1563                       < offset * BITS_PER_UNIT + bitsize)
1564                     goto extv_loses;
1565
1566                   /* Fetch it to a register in that size.  */
1567                   xop0 = force_reg (bestmode, xop0);
1568
1569                   /* XBITPOS counts within UNIT, which is what is expected.  */
1570                 }
1571               else
1572                 /* Get ref to first byte containing part of the field.  */
1573                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1574             }
1575
1576           /* If op0 is a register, we need it in MAXMODE (which is usually
1577              SImode) to make it acceptable to the format of extv.  */
1578           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1579             goto extv_loses;
1580           if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1581             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1582
1583           /* On big-endian machines, we count bits from the most significant.
1584              If the bit field insn does not, we must invert.  */
1585           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1586             xbitpos = unit - bitsize - xbitpos;
1587
1588           /* XBITPOS counts within a size of UNIT.
1589              Adjust to count within a size of MAXMODE.  */
1590           if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1591             xbitpos += (GET_MODE_BITSIZE (maxmode) - unit);
1592
1593           unit = GET_MODE_BITSIZE (maxmode);
1594
1595           if (xtarget == 0
1596               || (flag_force_mem && MEM_P (xtarget)))
1597             xtarget = xspec_target = gen_reg_rtx (tmode);
1598
1599           if (GET_MODE (xtarget) != maxmode)
1600             {
1601               if (REG_P (xtarget))
1602                 {
1603                   int wider = (GET_MODE_SIZE (maxmode)
1604                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1605                   xtarget = gen_lowpart (maxmode, xtarget);
1606                   if (wider)
1607                     xspec_target_subreg = xtarget;
1608                 }
1609               else
1610                 xtarget = gen_reg_rtx (maxmode);
1611             }
1612
1613           /* If this machine's extv insists on a register target,
1614              make sure we have one.  */
1615           if (! ((*insn_data[(int) CODE_FOR_extv].operand[0].predicate)
1616                  (xtarget, maxmode)))
1617             xtarget = gen_reg_rtx (maxmode);
1618
1619           bitsize_rtx = GEN_INT (bitsize);
1620           bitpos_rtx = GEN_INT (xbitpos);
1621
1622           pat = gen_extv (xtarget, xop0, bitsize_rtx, bitpos_rtx);
1623           if (pat)
1624             {
1625               emit_insn (pat);
1626               target = xtarget;
1627               spec_target = xspec_target;
1628               spec_target_subreg = xspec_target_subreg;
1629             }
1630           else
1631             {
1632               delete_insns_since (last);
1633               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1634                                                 bitpos, target, 0);
1635             }
1636         }
1637       else
1638       extv_loses:
1639         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1640                                           bitpos, target, 0);
1641     }
1642   if (target == spec_target)
1643     return target;
1644   if (target == spec_target_subreg)
1645     return spec_target;
1646   if (GET_MODE (target) != tmode && GET_MODE (target) != mode)
1647     {
1648       /* If the target mode is not a scalar integral, first convert to the
1649          integer mode of that size and then access it as a floating-point
1650          value via a SUBREG.  */
1651       if (!SCALAR_INT_MODE_P (tmode))
1652         {
1653           enum machine_mode smode
1654             = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1655           target = convert_to_mode (smode, target, unsignedp);
1656           target = force_reg (smode, target);
1657           return gen_lowpart (tmode, target);
1658         }
1659
1660       return convert_to_mode (tmode, target, unsignedp);
1661     }
1662   return target;
1663 }
1664 \f
1665 /* Extract a bit field using shifts and boolean operations
1666    Returns an rtx to represent the value.
1667    OP0 addresses a register (word) or memory (byte).
1668    BITPOS says which bit within the word or byte the bit field starts in.
1669    OFFSET says how many bytes farther the bit field starts;
1670     it is 0 if OP0 is a register.
1671    BITSIZE says how many bits long the bit field is.
1672     (If OP0 is a register, it may be narrower than a full word,
1673      but BITPOS still counts within a full word,
1674      which is significant on bigendian machines.)
1675
1676    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1677    If TARGET is nonzero, attempts to store the value there
1678    and return TARGET, but this is not guaranteed.
1679    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1680
1681 static rtx
1682 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1683                          unsigned HOST_WIDE_INT offset,
1684                          unsigned HOST_WIDE_INT bitsize,
1685                          unsigned HOST_WIDE_INT bitpos, rtx target,
1686                          int unsignedp)
1687 {
1688   unsigned int total_bits = BITS_PER_WORD;
1689   enum machine_mode mode;
1690
1691   if (GET_CODE (op0) == SUBREG || REG_P (op0))
1692     {
1693       /* Special treatment for a bit field split across two registers.  */
1694       if (bitsize + bitpos > BITS_PER_WORD)
1695         return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1696     }
1697   else
1698     {
1699       /* Get the proper mode to use for this field.  We want a mode that
1700          includes the entire field.  If such a mode would be larger than
1701          a word, we won't be doing the extraction the normal way.  */
1702
1703       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1704                             MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1705
1706       if (mode == VOIDmode)
1707         /* The only way this should occur is if the field spans word
1708            boundaries.  */
1709         return extract_split_bit_field (op0, bitsize,
1710                                         bitpos + offset * BITS_PER_UNIT,
1711                                         unsignedp);
1712
1713       total_bits = GET_MODE_BITSIZE (mode);
1714
1715       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1716          be in the range 0 to total_bits-1, and put any excess bytes in
1717          OFFSET.  */
1718       if (bitpos >= total_bits)
1719         {
1720           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1721           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1722                      * BITS_PER_UNIT);
1723         }
1724
1725       /* Get ref to an aligned byte, halfword, or word containing the field.
1726          Adjust BITPOS to be position within a word,
1727          and OFFSET to be the offset of that word.
1728          Then alter OP0 to refer to that word.  */
1729       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1730       offset -= (offset % (total_bits / BITS_PER_UNIT));
1731       op0 = adjust_address (op0, mode, offset);
1732     }
1733
1734   mode = GET_MODE (op0);
1735
1736   if (BYTES_BIG_ENDIAN)
1737     /* BITPOS is the distance between our msb and that of OP0.
1738        Convert it to the distance from the lsb.  */
1739     bitpos = total_bits - bitsize - bitpos;
1740
1741   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1742      We have reduced the big-endian case to the little-endian case.  */
1743
1744   if (unsignedp)
1745     {
1746       if (bitpos)
1747         {
1748           /* If the field does not already start at the lsb,
1749              shift it so it does.  */
1750           tree amount = build_int_cst (NULL_TREE, bitpos);
1751           /* Maybe propagate the target for the shift.  */
1752           /* But not if we will return it--could confuse integrate.c.  */
1753           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1754           if (tmode != mode) subtarget = 0;
1755           op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1756         }
1757       /* Convert the value to the desired mode.  */
1758       if (mode != tmode)
1759         op0 = convert_to_mode (tmode, op0, 1);
1760
1761       /* Unless the msb of the field used to be the msb when we shifted,
1762          mask out the upper bits.  */
1763
1764       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1765         return expand_binop (GET_MODE (op0), and_optab, op0,
1766                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1767                              target, 1, OPTAB_LIB_WIDEN);
1768       return op0;
1769     }
1770
1771   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1772      then arithmetic-shift its lsb to the lsb of the word.  */
1773   op0 = force_reg (mode, op0);
1774   if (mode != tmode)
1775     target = 0;
1776
1777   /* Find the narrowest integer mode that contains the field.  */
1778
1779   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1780        mode = GET_MODE_WIDER_MODE (mode))
1781     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1782       {
1783         op0 = convert_to_mode (mode, op0, 0);
1784         break;
1785       }
1786
1787   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1788     {
1789       tree amount
1790         = build_int_cst (NULL_TREE,
1791                          GET_MODE_BITSIZE (mode) - (bitsize + bitpos));
1792       /* Maybe propagate the target for the shift.  */
1793       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1794       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1795     }
1796
1797   return expand_shift (RSHIFT_EXPR, mode, op0,
1798                        build_int_cst (NULL_TREE,
1799                                       GET_MODE_BITSIZE (mode) - bitsize),
1800                        target, 0);
1801 }
1802 \f
1803 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1804    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1805    complement of that if COMPLEMENT.  The mask is truncated if
1806    necessary to the width of mode MODE.  The mask is zero-extended if
1807    BITSIZE+BITPOS is too small for MODE.  */
1808
1809 static rtx
1810 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1811 {
1812   HOST_WIDE_INT masklow, maskhigh;
1813
1814   if (bitsize == 0)
1815     masklow = 0;
1816   else if (bitpos < HOST_BITS_PER_WIDE_INT)
1817     masklow = (HOST_WIDE_INT) -1 << bitpos;
1818   else
1819     masklow = 0;
1820
1821   if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1822     masklow &= ((unsigned HOST_WIDE_INT) -1
1823                 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1824
1825   if (bitpos <= HOST_BITS_PER_WIDE_INT)
1826     maskhigh = -1;
1827   else
1828     maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1829
1830   if (bitsize == 0)
1831     maskhigh = 0;
1832   else if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1833     maskhigh &= ((unsigned HOST_WIDE_INT) -1
1834                  >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1835   else
1836     maskhigh = 0;
1837
1838   if (complement)
1839     {
1840       maskhigh = ~maskhigh;
1841       masklow = ~masklow;
1842     }
1843
1844   return immed_double_const (masklow, maskhigh, mode);
1845 }
1846
1847 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1848    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1849
1850 static rtx
1851 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1852 {
1853   unsigned HOST_WIDE_INT v = INTVAL (value);
1854   HOST_WIDE_INT low, high;
1855
1856   if (bitsize < HOST_BITS_PER_WIDE_INT)
1857     v &= ~((HOST_WIDE_INT) -1 << bitsize);
1858
1859   if (bitpos < HOST_BITS_PER_WIDE_INT)
1860     {
1861       low = v << bitpos;
1862       high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1863     }
1864   else
1865     {
1866       low = 0;
1867       high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1868     }
1869
1870   return immed_double_const (low, high, mode);
1871 }
1872 \f
1873 /* Extract a bit field from a memory by forcing the alignment of the
1874    memory.  This efficient only if the field spans at least 4 boundaries.
1875
1876    OP0 is the MEM.
1877    BITSIZE is the field width; BITPOS is the position of the first bit.
1878    UNSIGNEDP is true if the result should be zero-extended.  */
1879
1880 static rtx
1881 extract_force_align_mem_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1882                                    unsigned HOST_WIDE_INT bitpos,
1883                                    int unsignedp)
1884 {
1885   enum machine_mode mode, dmode;
1886   unsigned int m_bitsize, m_size;
1887   unsigned int sign_shift_up, sign_shift_dn;
1888   rtx base, a1, a2, v1, v2, comb, shift, result, start;
1889
1890   /* Choose a mode that will fit BITSIZE.  */
1891   mode = smallest_mode_for_size (bitsize, MODE_INT);
1892   m_size = GET_MODE_SIZE (mode);
1893   m_bitsize = GET_MODE_BITSIZE (mode);
1894
1895   /* Choose a mode twice as wide.  Fail if no such mode exists.  */
1896   dmode = mode_for_size (m_bitsize * 2, MODE_INT, false);
1897   if (dmode == BLKmode)
1898     return NULL;
1899
1900   do_pending_stack_adjust ();
1901   start = get_last_insn ();
1902
1903   /* At the end, we'll need an additional shift to deal with sign/zero
1904      extension.  By default this will be a left+right shift of the
1905      appropriate size.  But we may be able to eliminate one of them.  */
1906   sign_shift_up = sign_shift_dn = m_bitsize - bitsize;
1907
1908   if (STRICT_ALIGNMENT)
1909     {
1910       base = plus_constant (XEXP (op0, 0), bitpos / BITS_PER_UNIT);
1911       bitpos %= BITS_PER_UNIT;
1912
1913       /* We load two values to be concatenate.  There's an edge condition
1914          that bears notice -- an aligned value at the end of a page can
1915          only load one value lest we segfault.  So the two values we load
1916          are at "base & -size" and "(base + size - 1) & -size".  If base
1917          is unaligned, the addresses will be aligned and sequential; if
1918          base is aligned, the addresses will both be equal to base.  */
1919
1920       a1 = expand_simple_binop (Pmode, AND, force_operand (base, NULL),
1921                                 GEN_INT (-(HOST_WIDE_INT)m_size),
1922                                 NULL, true, OPTAB_LIB_WIDEN);
1923       mark_reg_pointer (a1, m_bitsize);
1924       v1 = gen_rtx_MEM (mode, a1);
1925       set_mem_align (v1, m_bitsize);
1926       v1 = force_reg (mode, validize_mem (v1));
1927
1928       a2 = plus_constant (base, GET_MODE_SIZE (mode) - 1);
1929       a2 = expand_simple_binop (Pmode, AND, force_operand (a2, NULL),
1930                                 GEN_INT (-(HOST_WIDE_INT)m_size),
1931                                 NULL, true, OPTAB_LIB_WIDEN);
1932       v2 = gen_rtx_MEM (mode, a2);
1933       set_mem_align (v2, m_bitsize);
1934       v2 = force_reg (mode, validize_mem (v2));
1935
1936       /* Combine these two values into a double-word value.  */
1937       if (m_bitsize == BITS_PER_WORD)
1938         {
1939           comb = gen_reg_rtx (dmode);
1940           emit_insn (gen_rtx_CLOBBER (VOIDmode, comb));
1941           emit_move_insn (gen_rtx_SUBREG (mode, comb, 0), v1);
1942           emit_move_insn (gen_rtx_SUBREG (mode, comb, m_size), v2);
1943         }
1944       else
1945         {
1946           if (BYTES_BIG_ENDIAN)
1947             comb = v1, v1 = v2, v2 = comb;
1948           v1 = convert_modes (dmode, mode, v1, true);
1949           if (v1 == NULL)
1950             goto fail;
1951           v2 = convert_modes (dmode, mode, v2, true);
1952           v2 = expand_simple_binop (dmode, ASHIFT, v2, GEN_INT (m_bitsize),
1953                                     NULL, true, OPTAB_LIB_WIDEN);
1954           if (v2 == NULL)
1955             goto fail;
1956           comb = expand_simple_binop (dmode, IOR, v1, v2, NULL,
1957                                       true, OPTAB_LIB_WIDEN);
1958           if (comb == NULL)
1959             goto fail;
1960         }
1961
1962       shift = expand_simple_binop (Pmode, AND, base, GEN_INT (m_size - 1),
1963                                    NULL, true, OPTAB_LIB_WIDEN);
1964       shift = expand_mult (Pmode, shift, GEN_INT (BITS_PER_UNIT), NULL, 1);
1965
1966       if (bitpos != 0)
1967         {
1968           if (sign_shift_up <= bitpos)
1969             bitpos -= sign_shift_up, sign_shift_up = 0;
1970           shift = expand_simple_binop (Pmode, PLUS, shift, GEN_INT (bitpos),
1971                                        NULL, true, OPTAB_LIB_WIDEN);
1972         }
1973     }
1974   else
1975     {
1976       unsigned HOST_WIDE_INT offset = bitpos / BITS_PER_UNIT;
1977       bitpos %= BITS_PER_UNIT;
1978
1979       /* When strict alignment is not required, we can just load directly
1980          from memory without masking.  If the remaining BITPOS offset is
1981          small enough, we may be able to do all operations in MODE as
1982          opposed to DMODE.  */
1983       if (bitpos + bitsize <= m_bitsize)
1984         dmode = mode;
1985       comb = adjust_address (op0, dmode, offset);
1986
1987       if (sign_shift_up <= bitpos)
1988         bitpos -= sign_shift_up, sign_shift_up = 0;
1989       shift = GEN_INT (bitpos);
1990     }
1991
1992   /* Shift down the double-word such that the requested value is at bit 0.  */
1993   if (shift != const0_rtx)
1994     comb = expand_simple_binop (dmode, unsignedp ? LSHIFTRT : ASHIFTRT,
1995                                 comb, shift, NULL, unsignedp, OPTAB_LIB_WIDEN);
1996   if (comb == NULL)
1997     goto fail;
1998
1999   /* If the field exactly matches MODE, then all we need to do is return the
2000      lowpart.  Otherwise, shift to get the sign bits set properly.  */
2001   result = force_reg (mode, gen_lowpart (mode, comb));
2002
2003   if (sign_shift_up)
2004     result = expand_simple_binop (mode, ASHIFT, result,
2005                                   GEN_INT (sign_shift_up),
2006                                   NULL_RTX, 0, OPTAB_LIB_WIDEN);
2007   if (sign_shift_dn)
2008     result = expand_simple_binop (mode, unsignedp ? LSHIFTRT : ASHIFTRT,
2009                                   result, GEN_INT (sign_shift_dn),
2010                                   NULL_RTX, 0, OPTAB_LIB_WIDEN);
2011
2012   return result;
2013
2014  fail:
2015   delete_insns_since (start);
2016   return NULL;
2017 }
2018
2019 /* Extract a bit field that is split across two words
2020    and return an RTX for the result.
2021
2022    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2023    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2024    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
2025
2026 static rtx
2027 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
2028                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
2029 {
2030   unsigned int unit;
2031   unsigned int bitsdone = 0;
2032   rtx result = NULL_RTX;
2033   int first = 1;
2034
2035   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2036      much at a time.  */
2037   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2038     unit = BITS_PER_WORD;
2039   else
2040     {
2041       unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2042       if (0 && bitsize / unit > 2)
2043         {
2044           rtx tmp = extract_force_align_mem_bit_field (op0, bitsize, bitpos,
2045                                                        unsignedp);
2046           if (tmp)
2047             return tmp;
2048         }
2049     }
2050
2051   while (bitsdone < bitsize)
2052     {
2053       unsigned HOST_WIDE_INT thissize;
2054       rtx part, word;
2055       unsigned HOST_WIDE_INT thispos;
2056       unsigned HOST_WIDE_INT offset;
2057
2058       offset = (bitpos + bitsdone) / unit;
2059       thispos = (bitpos + bitsdone) % unit;
2060
2061       /* THISSIZE must not overrun a word boundary.  Otherwise,
2062          extract_fixed_bit_field will call us again, and we will mutually
2063          recurse forever.  */
2064       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2065       thissize = MIN (thissize, unit - thispos);
2066
2067       /* If OP0 is a register, then handle OFFSET here.
2068
2069          When handling multiword bitfields, extract_bit_field may pass
2070          down a word_mode SUBREG of a larger REG for a bitfield that actually
2071          crosses a word boundary.  Thus, for a SUBREG, we must find
2072          the current word starting from the base register.  */
2073       if (GET_CODE (op0) == SUBREG)
2074         {
2075           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
2076           word = operand_subword_force (SUBREG_REG (op0), word_offset,
2077                                         GET_MODE (SUBREG_REG (op0)));
2078           offset = 0;
2079         }
2080       else if (REG_P (op0))
2081         {
2082           word = operand_subword_force (op0, offset, GET_MODE (op0));
2083           offset = 0;
2084         }
2085       else
2086         word = op0;
2087
2088       /* Extract the parts in bit-counting order,
2089          whose meaning is determined by BYTES_PER_UNIT.
2090          OFFSET is in UNITs, and UNIT is in bits.
2091          extract_fixed_bit_field wants offset in bytes.  */
2092       part = extract_fixed_bit_field (word_mode, word,
2093                                       offset * unit / BITS_PER_UNIT,
2094                                       thissize, thispos, 0, 1);
2095       bitsdone += thissize;
2096
2097       /* Shift this part into place for the result.  */
2098       if (BYTES_BIG_ENDIAN)
2099         {
2100           if (bitsize != bitsdone)
2101             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2102                                  build_int_cst (NULL_TREE, bitsize - bitsdone),
2103                                  0, 1);
2104         }
2105       else
2106         {
2107           if (bitsdone != thissize)
2108             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2109                                  build_int_cst (NULL_TREE,
2110                                                 bitsdone - thissize), 0, 1);
2111         }
2112
2113       if (first)
2114         result = part;
2115       else
2116         /* Combine the parts with bitwise or.  This works
2117            because we extracted each part as an unsigned bit field.  */
2118         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2119                                OPTAB_LIB_WIDEN);
2120
2121       first = 0;
2122     }
2123
2124   /* Unsigned bit field: we are done.  */
2125   if (unsignedp)
2126     return result;
2127   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2128   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2129                          build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
2130                          NULL_RTX, 0);
2131   return expand_shift (RSHIFT_EXPR, word_mode, result,
2132                        build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
2133                        NULL_RTX, 0);
2134 }
2135 \f
2136 /* Add INC into TARGET.  */
2137
2138 void
2139 expand_inc (rtx target, rtx inc)
2140 {
2141   rtx value = expand_binop (GET_MODE (target), add_optab,
2142                             target, inc,
2143                             target, 0, OPTAB_LIB_WIDEN);
2144   if (value != target)
2145     emit_move_insn (target, value);
2146 }
2147
2148 /* Subtract DEC from TARGET.  */
2149
2150 void
2151 expand_dec (rtx target, rtx dec)
2152 {
2153   rtx value = expand_binop (GET_MODE (target), sub_optab,
2154                             target, dec,
2155                             target, 0, OPTAB_LIB_WIDEN);
2156   if (value != target)
2157     emit_move_insn (target, value);
2158 }
2159 \f
2160 /* Output a shift instruction for expression code CODE,
2161    with SHIFTED being the rtx for the value to shift,
2162    and AMOUNT the tree for the amount to shift by.
2163    Store the result in the rtx TARGET, if that is convenient.
2164    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2165    Return the rtx for where the value is.  */
2166
2167 rtx
2168 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2169               tree amount, rtx target, int unsignedp)
2170 {
2171   rtx op1, temp = 0;
2172   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2173   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2174   int try;
2175
2176   /* Previously detected shift-counts computed by NEGATE_EXPR
2177      and shifted in the other direction; but that does not work
2178      on all machines.  */
2179
2180   op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0);
2181
2182   if (SHIFT_COUNT_TRUNCATED)
2183     {
2184       if (GET_CODE (op1) == CONST_INT
2185           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2186               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2187         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2188                        % GET_MODE_BITSIZE (mode));
2189       else if (GET_CODE (op1) == SUBREG
2190                && subreg_lowpart_p (op1))
2191         op1 = SUBREG_REG (op1);
2192     }
2193
2194   if (op1 == const0_rtx)
2195     return shifted;
2196
2197   /* Check whether its cheaper to implement a left shift by a constant
2198      bit count by a sequence of additions.  */
2199   if (code == LSHIFT_EXPR
2200       && GET_CODE (op1) == CONST_INT
2201       && INTVAL (op1) > 0
2202       && INTVAL (op1) < GET_MODE_BITSIZE (mode)
2203       && shift_cost[mode][INTVAL (op1)] > INTVAL (op1) * add_cost[mode])
2204     {
2205       int i;
2206       for (i = 0; i < INTVAL (op1); i++)
2207         {
2208           temp = force_reg (mode, shifted);
2209           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2210                                   unsignedp, OPTAB_LIB_WIDEN);
2211         }
2212       return shifted;
2213     }
2214
2215   for (try = 0; temp == 0 && try < 3; try++)
2216     {
2217       enum optab_methods methods;
2218
2219       if (try == 0)
2220         methods = OPTAB_DIRECT;
2221       else if (try == 1)
2222         methods = OPTAB_WIDEN;
2223       else
2224         methods = OPTAB_LIB_WIDEN;
2225
2226       if (rotate)
2227         {
2228           /* Widening does not work for rotation.  */
2229           if (methods == OPTAB_WIDEN)
2230             continue;
2231           else if (methods == OPTAB_LIB_WIDEN)
2232             {
2233               /* If we have been unable to open-code this by a rotation,
2234                  do it as the IOR of two shifts.  I.e., to rotate A
2235                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2236                  where C is the bitsize of A.
2237
2238                  It is theoretically possible that the target machine might
2239                  not be able to perform either shift and hence we would
2240                  be making two libcalls rather than just the one for the
2241                  shift (similarly if IOR could not be done).  We will allow
2242                  this extremely unlikely lossage to avoid complicating the
2243                  code below.  */
2244
2245               rtx subtarget = target == shifted ? 0 : target;
2246               tree new_amount, other_amount;
2247               rtx temp1;
2248               tree type = TREE_TYPE (amount);
2249               if (GET_MODE (op1) != TYPE_MODE (type)
2250                   && GET_MODE (op1) != VOIDmode)
2251                 op1 = convert_to_mode (TYPE_MODE (type), op1, 1);
2252               new_amount = make_tree (type, op1);
2253               other_amount
2254                 = fold (build2 (MINUS_EXPR, type,
2255                                 build_int_cst (type, GET_MODE_BITSIZE (mode)),
2256                                 new_amount));
2257
2258               shifted = force_reg (mode, shifted);
2259
2260               temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2261                                    mode, shifted, new_amount, 0, 1);
2262               temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2263                                     mode, shifted, other_amount, subtarget, 1);
2264               return expand_binop (mode, ior_optab, temp, temp1, target,
2265                                    unsignedp, methods);
2266             }
2267
2268           temp = expand_binop (mode,
2269                                left ? rotl_optab : rotr_optab,
2270                                shifted, op1, target, unsignedp, methods);
2271
2272           /* If we don't have the rotate, but we are rotating by a constant
2273              that is in range, try a rotate in the opposite direction.  */
2274
2275           if (temp == 0 && GET_CODE (op1) == CONST_INT
2276               && INTVAL (op1) > 0
2277               && (unsigned int) INTVAL (op1) < GET_MODE_BITSIZE (mode))
2278             temp = expand_binop (mode,
2279                                  left ? rotr_optab : rotl_optab,
2280                                  shifted,
2281                                  GEN_INT (GET_MODE_BITSIZE (mode)
2282                                           - INTVAL (op1)),
2283                                  target, unsignedp, methods);
2284         }
2285       else if (unsignedp)
2286         temp = expand_binop (mode,
2287                              left ? ashl_optab : lshr_optab,
2288                              shifted, op1, target, unsignedp, methods);
2289
2290       /* Do arithmetic shifts.
2291          Also, if we are going to widen the operand, we can just as well
2292          use an arithmetic right-shift instead of a logical one.  */
2293       if (temp == 0 && ! rotate
2294           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2295         {
2296           enum optab_methods methods1 = methods;
2297
2298           /* If trying to widen a log shift to an arithmetic shift,
2299              don't accept an arithmetic shift of the same size.  */
2300           if (unsignedp)
2301             methods1 = OPTAB_MUST_WIDEN;
2302
2303           /* Arithmetic shift */
2304
2305           temp = expand_binop (mode,
2306                                left ? ashl_optab : ashr_optab,
2307                                shifted, op1, target, unsignedp, methods1);
2308         }
2309
2310       /* We used to try extzv here for logical right shifts, but that was
2311          only useful for one machine, the VAX, and caused poor code
2312          generation there for lshrdi3, so the code was deleted and a
2313          define_expand for lshrsi3 was added to vax.md.  */
2314     }
2315
2316   gcc_assert (temp);
2317   return temp;
2318 }
2319 \f
2320 enum alg_code { alg_unknown, alg_zero, alg_m, alg_shift,
2321                   alg_add_t_m2, alg_sub_t_m2,
2322                   alg_add_factor, alg_sub_factor,
2323                   alg_add_t2_m, alg_sub_t2_m };
2324
2325 /* This structure holds the "cost" of a multiply sequence.  The
2326    "cost" field holds the total rtx_cost of every operator in the
2327    synthetic multiplication sequence, hence cost(a op b) is defined
2328    as rtx_cost(op) + cost(a) + cost(b), where cost(leaf) is zero.
2329    The "latency" field holds the minimum possible latency of the
2330    synthetic multiply, on a hypothetical infinitely parallel CPU.
2331    This is the critical path, or the maximum height, of the expression
2332    tree which is the sum of rtx_costs on the most expensive path from
2333    any leaf to the root.  Hence latency(a op b) is defined as zero for
2334    leaves and rtx_cost(op) + max(latency(a), latency(b)) otherwise.  */
2335
2336 struct mult_cost {
2337   short cost;     /* Total rtx_cost of the multiplication sequence.  */
2338   short latency;  /* The latency of the multiplication sequence.  */
2339 };
2340
2341 /* This macro is used to compare a pointer to a mult_cost against an
2342    single integer "rtx_cost" value.  This is equivalent to the macro
2343    CHEAPER_MULT_COST(X,Z) where Z = {Y,Y}.  */
2344 #define MULT_COST_LESS(X,Y) ((X)->cost < (Y)    \
2345                              || ((X)->cost == (Y) && (X)->latency < (Y)))
2346
2347 /* This macro is used to compare two pointers to mult_costs against
2348    each other.  The macro returns true if X is cheaper than Y.
2349    Currently, the cheaper of two mult_costs is the one with the
2350    lower "cost".  If "cost"s are tied, the lower latency is cheaper.  */
2351 #define CHEAPER_MULT_COST(X,Y)  ((X)->cost < (Y)->cost          \
2352                                  || ((X)->cost == (Y)->cost     \
2353                                      && (X)->latency < (Y)->latency))
2354
2355 /* This structure records a sequence of operations.
2356    `ops' is the number of operations recorded.
2357    `cost' is their total cost.
2358    The operations are stored in `op' and the corresponding
2359    logarithms of the integer coefficients in `log'.
2360
2361    These are the operations:
2362    alg_zero             total := 0;
2363    alg_m                total := multiplicand;
2364    alg_shift            total := total * coeff
2365    alg_add_t_m2         total := total + multiplicand * coeff;
2366    alg_sub_t_m2         total := total - multiplicand * coeff;
2367    alg_add_factor       total := total * coeff + total;
2368    alg_sub_factor       total := total * coeff - total;
2369    alg_add_t2_m         total := total * coeff + multiplicand;
2370    alg_sub_t2_m         total := total * coeff - multiplicand;
2371
2372    The first operand must be either alg_zero or alg_m.  */
2373
2374 struct algorithm
2375 {
2376   struct mult_cost cost;
2377   short ops;
2378   /* The size of the OP and LOG fields are not directly related to the
2379      word size, but the worst-case algorithms will be if we have few
2380      consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2381      In that case we will generate shift-by-2, add, shift-by-2, add,...,
2382      in total wordsize operations.  */
2383   enum alg_code op[MAX_BITS_PER_WORD];
2384   char log[MAX_BITS_PER_WORD];
2385 };
2386
2387 /* The entry for our multiplication cache/hash table.  */
2388 struct alg_hash_entry {
2389   /* The number we are multiplying by.  */
2390   unsigned int t;
2391
2392   /* The mode in which we are multiplying something by T.  */
2393   enum machine_mode mode;
2394
2395   /* The best multiplication algorithm for t.  */
2396   enum alg_code alg;
2397 };
2398
2399 /* The number of cache/hash entries.  */
2400 #define NUM_ALG_HASH_ENTRIES 307
2401
2402 /* Each entry of ALG_HASH caches alg_code for some integer.  This is
2403    actually a hash table.  If we have a collision, that the older
2404    entry is kicked out.  */
2405 static struct alg_hash_entry alg_hash[NUM_ALG_HASH_ENTRIES];
2406
2407 /* Indicates the type of fixup needed after a constant multiplication.
2408    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2409    the result should be negated, and ADD_VARIANT means that the
2410    multiplicand should be added to the result.  */
2411 enum mult_variant {basic_variant, negate_variant, add_variant};
2412
2413 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2414                         const struct mult_cost *, enum machine_mode mode);
2415 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2416                                  struct algorithm *, enum mult_variant *, int);
2417 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2418                               const struct algorithm *, enum mult_variant);
2419 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2420                                                  int, rtx *, int *, int *);
2421 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2422 static rtx extract_high_half (enum machine_mode, rtx);
2423 static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2424 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2425                                        int, int);
2426 /* Compute and return the best algorithm for multiplying by T.
2427    The algorithm must cost less than cost_limit
2428    If retval.cost >= COST_LIMIT, no algorithm was found and all
2429    other field of the returned struct are undefined.
2430    MODE is the machine mode of the multiplication.  */
2431
2432 static void
2433 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2434             const struct mult_cost *cost_limit, enum machine_mode mode)
2435 {
2436   int m;
2437   struct algorithm *alg_in, *best_alg;
2438   struct mult_cost best_cost;
2439   struct mult_cost new_limit;
2440   int op_cost, op_latency;
2441   unsigned HOST_WIDE_INT q;
2442   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2443   int hash_index;
2444   bool cache_hit = false;
2445   enum alg_code cache_alg = alg_zero;
2446
2447   /* Indicate that no algorithm is yet found.  If no algorithm
2448      is found, this value will be returned and indicate failure.  */
2449   alg_out->cost.cost = cost_limit->cost + 1;
2450   alg_out->cost.latency = cost_limit->latency + 1;
2451
2452   if (cost_limit->cost < 0
2453       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2454     return;
2455
2456   /* Restrict the bits of "t" to the multiplication's mode.  */
2457   t &= GET_MODE_MASK (mode);
2458
2459   /* t == 1 can be done in zero cost.  */
2460   if (t == 1)
2461     {
2462       alg_out->ops = 1;
2463       alg_out->cost.cost = 0;
2464       alg_out->cost.latency = 0;
2465       alg_out->op[0] = alg_m;
2466       return;
2467     }
2468
2469   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2470      fail now.  */
2471   if (t == 0)
2472     {
2473       if (MULT_COST_LESS (cost_limit, zero_cost))
2474         return;
2475       else
2476         {
2477           alg_out->ops = 1;
2478           alg_out->cost.cost = zero_cost;
2479           alg_out->cost.latency = zero_cost;
2480           alg_out->op[0] = alg_zero;
2481           return;
2482         }
2483     }
2484
2485   /* We'll be needing a couple extra algorithm structures now.  */
2486
2487   alg_in = alloca (sizeof (struct algorithm));
2488   best_alg = alloca (sizeof (struct algorithm));
2489   best_cost = *cost_limit;
2490
2491   /* Compute the hash index.  */
2492   hash_index = (t ^ (unsigned int) mode) % NUM_ALG_HASH_ENTRIES;
2493
2494   /* See if we already know what to do for T.  */
2495   if (alg_hash[hash_index].t == t
2496       && alg_hash[hash_index].mode == mode
2497       && alg_hash[hash_index].alg != alg_unknown)
2498     {
2499       cache_hit = true;
2500       cache_alg = alg_hash[hash_index].alg;
2501       switch (cache_alg)
2502         {
2503         case alg_shift:
2504           goto do_alg_shift;
2505
2506         case alg_add_t_m2:
2507         case alg_sub_t_m2:
2508           goto do_alg_addsub_t_m2;
2509
2510         case alg_add_factor:
2511         case alg_sub_factor:
2512           goto do_alg_addsub_factor;
2513
2514         case alg_add_t2_m:
2515           goto do_alg_add_t2_m;
2516
2517         case alg_sub_t2_m:
2518           goto do_alg_sub_t2_m;
2519
2520         default:
2521           gcc_unreachable ();
2522         }
2523     }
2524
2525   /* If we have a group of zero bits at the low-order part of T, try
2526      multiplying by the remaining bits and then doing a shift.  */
2527
2528   if ((t & 1) == 0)
2529     {
2530     do_alg_shift:
2531       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2532       if (m < maxm)
2533         {
2534           q = t >> m;
2535           /* The function expand_shift will choose between a shift and
2536              a sequence of additions, so the observed cost is given as
2537              MIN (m * add_cost[mode], shift_cost[mode][m]).  */
2538           op_cost = m * add_cost[mode];
2539           if (shift_cost[mode][m] < op_cost)
2540             op_cost = shift_cost[mode][m];
2541           new_limit.cost = best_cost.cost - op_cost;
2542           new_limit.latency = best_cost.latency - op_cost;
2543           synth_mult (alg_in, q, &new_limit, mode);
2544
2545           alg_in->cost.cost += op_cost;
2546           alg_in->cost.latency += op_cost;
2547           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2548             {
2549               struct algorithm *x;
2550               best_cost = alg_in->cost;
2551               x = alg_in, alg_in = best_alg, best_alg = x;
2552               best_alg->log[best_alg->ops] = m;
2553               best_alg->op[best_alg->ops] = alg_shift;
2554             }
2555         }
2556       if (cache_hit)
2557         goto done;
2558     }
2559
2560   /* If we have an odd number, add or subtract one.  */
2561   if ((t & 1) != 0)
2562     {
2563       unsigned HOST_WIDE_INT w;
2564
2565     do_alg_addsub_t_m2:
2566       for (w = 1; (w & t) != 0; w <<= 1)
2567         ;
2568       /* If T was -1, then W will be zero after the loop.  This is another
2569          case where T ends with ...111.  Handling this with (T + 1) and
2570          subtract 1 produces slightly better code and results in algorithm
2571          selection much faster than treating it like the ...0111 case
2572          below.  */
2573       if (w == 0
2574           || (w > 2
2575               /* Reject the case where t is 3.
2576                  Thus we prefer addition in that case.  */
2577               && t != 3))
2578         {
2579           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2580
2581           op_cost = add_cost[mode];
2582           new_limit.cost = best_cost.cost - op_cost;
2583           new_limit.latency = best_cost.latency - op_cost;
2584           synth_mult (alg_in, t + 1, &new_limit, mode);
2585
2586           alg_in->cost.cost += op_cost;
2587           alg_in->cost.latency += op_cost;
2588           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2589             {
2590               struct algorithm *x;
2591               best_cost = alg_in->cost;
2592               x = alg_in, alg_in = best_alg, best_alg = x;
2593               best_alg->log[best_alg->ops] = 0;
2594               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2595             }
2596         }
2597       else
2598         {
2599           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2600
2601           op_cost = add_cost[mode];
2602           new_limit.cost = best_cost.cost - op_cost;
2603           new_limit.latency = best_cost.latency - op_cost;
2604           synth_mult (alg_in, t - 1, &new_limit, mode);
2605
2606           alg_in->cost.cost += op_cost;
2607           alg_in->cost.latency += op_cost;
2608           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2609             {
2610               struct algorithm *x;
2611               best_cost = alg_in->cost;
2612               x = alg_in, alg_in = best_alg, best_alg = x;
2613               best_alg->log[best_alg->ops] = 0;
2614               best_alg->op[best_alg->ops] = alg_add_t_m2;
2615             }
2616         }
2617       if (cache_hit)
2618         goto done;
2619     }
2620
2621   /* Look for factors of t of the form
2622      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2623      If we find such a factor, we can multiply by t using an algorithm that
2624      multiplies by q, shift the result by m and add/subtract it to itself.
2625
2626      We search for large factors first and loop down, even if large factors
2627      are less probable than small; if we find a large factor we will find a
2628      good sequence quickly, and therefore be able to prune (by decreasing
2629      COST_LIMIT) the search.  */
2630
2631  do_alg_addsub_factor:
2632   for (m = floor_log2 (t - 1); m >= 2; m--)
2633     {
2634       unsigned HOST_WIDE_INT d;
2635
2636       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2637       if (t % d == 0 && t > d && m < maxm
2638           && (!cache_hit || cache_alg == alg_add_factor))
2639         {
2640           /* If the target has a cheap shift-and-add instruction use
2641              that in preference to a shift insn followed by an add insn.
2642              Assume that the shift-and-add is "atomic" with a latency
2643              equal to its cost, otherwise assume that on superscalar
2644              hardware the shift may be executed concurrently with the
2645              earlier steps in the algorithm.  */
2646           op_cost = add_cost[mode] + shift_cost[mode][m];
2647           if (shiftadd_cost[mode][m] < op_cost)
2648             {
2649               op_cost = shiftadd_cost[mode][m];
2650               op_latency = op_cost;
2651             }
2652           else
2653             op_latency = add_cost[mode];
2654
2655           new_limit.cost = best_cost.cost - op_cost;
2656           new_limit.latency = best_cost.latency - op_latency;
2657           synth_mult (alg_in, t / d, &new_limit, mode);
2658
2659           alg_in->cost.cost += op_cost;
2660           alg_in->cost.latency += op_latency;
2661           if (alg_in->cost.latency < op_cost)
2662             alg_in->cost.latency = op_cost;
2663           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2664             {
2665               struct algorithm *x;
2666               best_cost = alg_in->cost;
2667               x = alg_in, alg_in = best_alg, best_alg = x;
2668               best_alg->log[best_alg->ops] = m;
2669               best_alg->op[best_alg->ops] = alg_add_factor;
2670             }
2671           /* Other factors will have been taken care of in the recursion.  */
2672           break;
2673         }
2674
2675       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2676       if (t % d == 0 && t > d && m < maxm
2677           && (!cache_hit || cache_alg == alg_sub_factor))
2678         {
2679           /* If the target has a cheap shift-and-subtract insn use
2680              that in preference to a shift insn followed by a sub insn.
2681              Assume that the shift-and-sub is "atomic" with a latency
2682              equal to it's cost, otherwise assume that on superscalar
2683              hardware the shift may be executed concurrently with the
2684              earlier steps in the algorithm.  */
2685           op_cost = add_cost[mode] + shift_cost[mode][m];
2686           if (shiftsub_cost[mode][m] < op_cost)
2687             {
2688               op_cost = shiftsub_cost[mode][m];
2689               op_latency = op_cost;
2690             }
2691           else
2692             op_latency = add_cost[mode];
2693
2694           new_limit.cost = best_cost.cost - op_cost;
2695           new_limit.latency = best_cost.latency - op_latency;
2696           synth_mult (alg_in, t / d, &new_limit, mode);
2697
2698           alg_in->cost.cost += op_cost;
2699           alg_in->cost.latency += op_latency;
2700           if (alg_in->cost.latency < op_cost)
2701             alg_in->cost.latency = op_cost;
2702           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2703             {
2704               struct algorithm *x;
2705               best_cost = alg_in->cost;
2706               x = alg_in, alg_in = best_alg, best_alg = x;
2707               best_alg->log[best_alg->ops] = m;
2708               best_alg->op[best_alg->ops] = alg_sub_factor;
2709             }
2710           break;
2711         }
2712     }
2713   if (cache_hit)
2714     goto done;
2715
2716   /* Try shift-and-add (load effective address) instructions,
2717      i.e. do a*3, a*5, a*9.  */
2718   if ((t & 1) != 0)
2719     {
2720     do_alg_add_t2_m:
2721       q = t - 1;
2722       q = q & -q;
2723       m = exact_log2 (q);
2724       if (m >= 0 && m < maxm)
2725         {
2726           op_cost = shiftadd_cost[mode][m];
2727           new_limit.cost = best_cost.cost - op_cost;
2728           new_limit.latency = best_cost.latency - op_cost;
2729           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2730
2731           alg_in->cost.cost += op_cost;
2732           alg_in->cost.latency += op_cost;
2733           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2734             {
2735               struct algorithm *x;
2736               best_cost = alg_in->cost;
2737               x = alg_in, alg_in = best_alg, best_alg = x;
2738               best_alg->log[best_alg->ops] = m;
2739               best_alg->op[best_alg->ops] = alg_add_t2_m;
2740             }
2741         }
2742       if (cache_hit)
2743         goto done;
2744
2745     do_alg_sub_t2_m:
2746       q = t + 1;
2747       q = q & -q;
2748       m = exact_log2 (q);
2749       if (m >= 0 && m < maxm)
2750         {
2751           op_cost = shiftsub_cost[mode][m];
2752           new_limit.cost = best_cost.cost - op_cost;
2753           new_limit.latency = best_cost.latency - op_cost;
2754           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2755
2756           alg_in->cost.cost += op_cost;
2757           alg_in->cost.latency += op_cost;
2758           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2759             {
2760               struct algorithm *x;
2761               best_cost = alg_in->cost;
2762               x = alg_in, alg_in = best_alg, best_alg = x;
2763               best_alg->log[best_alg->ops] = m;
2764               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2765             }
2766         }
2767       if (cache_hit)
2768         goto done;
2769     }
2770
2771  done:
2772   /* If best_cost has not decreased, we have not found any algorithm.  */
2773   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2774     return;
2775
2776   /* Cache the result.  */
2777   if (!cache_hit)
2778     {
2779       alg_hash[hash_index].t = t;
2780       alg_hash[hash_index].mode = mode;
2781       alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2782     }
2783
2784   /* If we are getting a too long sequence for `struct algorithm'
2785      to record, make this search fail.  */
2786   if (best_alg->ops == MAX_BITS_PER_WORD)
2787     return;
2788
2789   /* Copy the algorithm from temporary space to the space at alg_out.
2790      We avoid using structure assignment because the majority of
2791      best_alg is normally undefined, and this is a critical function.  */
2792   alg_out->ops = best_alg->ops + 1;
2793   alg_out->cost = best_cost;
2794   memcpy (alg_out->op, best_alg->op,
2795           alg_out->ops * sizeof *alg_out->op);
2796   memcpy (alg_out->log, best_alg->log,
2797           alg_out->ops * sizeof *alg_out->log);
2798 }
2799 \f
2800 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2801    Try three variations:
2802
2803        - a shift/add sequence based on VAL itself
2804        - a shift/add sequence based on -VAL, followed by a negation
2805        - a shift/add sequence based on VAL - 1, followed by an addition.
2806
2807    Return true if the cheapest of these cost less than MULT_COST,
2808    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2809
2810 static bool
2811 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2812                      struct algorithm *alg, enum mult_variant *variant,
2813                      int mult_cost)
2814 {
2815   struct algorithm alg2;
2816   struct mult_cost limit;
2817   int op_cost;
2818
2819   /* Fail quickly for impossible bounds.  */
2820   if (mult_cost < 0)
2821     return false;
2822
2823   /* Ensure that mult_cost provides a reasonable upper bound.
2824      Any constant multiplication can be performed with less
2825      than 2 * bits additions.  */
2826   op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[mode];
2827   if (mult_cost > op_cost)
2828     mult_cost = op_cost;
2829
2830   *variant = basic_variant;
2831   limit.cost = mult_cost;
2832   limit.latency = mult_cost;
2833   synth_mult (alg, val, &limit, mode);
2834
2835   /* This works only if the inverted value actually fits in an
2836      `unsigned int' */
2837   if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2838     {
2839       op_cost = neg_cost[mode];
2840       if (MULT_COST_LESS (&alg->cost, mult_cost))
2841         {
2842           limit.cost = alg->cost.cost - op_cost;
2843           limit.latency = alg->cost.latency - op_cost;
2844         }
2845       else
2846         {
2847           limit.cost = mult_cost - op_cost;
2848           limit.latency = mult_cost - op_cost;
2849         }
2850
2851       synth_mult (&alg2, -val, &limit, mode);
2852       alg2.cost.cost += op_cost;
2853       alg2.cost.latency += op_cost;
2854       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2855         *alg = alg2, *variant = negate_variant;
2856     }
2857
2858   /* This proves very useful for division-by-constant.  */
2859   op_cost = add_cost[mode];
2860   if (MULT_COST_LESS (&alg->cost, mult_cost))
2861     {
2862       limit.cost = alg->cost.cost - op_cost;
2863       limit.latency = alg->cost.latency - op_cost;
2864     }
2865   else
2866     {
2867       limit.cost = mult_cost - op_cost;
2868       limit.latency = mult_cost - op_cost;
2869     }
2870
2871   synth_mult (&alg2, val - 1, &limit, mode);
2872   alg2.cost.cost += op_cost;
2873   alg2.cost.latency += op_cost;
2874   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2875     *alg = alg2, *variant = add_variant;
2876
2877   return MULT_COST_LESS (&alg->cost, mult_cost);
2878 }
2879
2880 /* A subroutine of expand_mult, used for constant multiplications.
2881    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2882    convenient.  Use the shift/add sequence described by ALG and apply
2883    the final fixup specified by VARIANT.  */
2884
2885 static rtx
2886 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2887                    rtx target, const struct algorithm *alg,
2888                    enum mult_variant variant)
2889 {
2890   HOST_WIDE_INT val_so_far;
2891   rtx insn, accum, tem;
2892   int opno;
2893   enum machine_mode nmode;
2894
2895   /* Avoid referencing memory over and over.
2896      For speed, but also for correctness when mem is volatile.  */
2897   if (MEM_P (op0))
2898     op0 = force_reg (mode, op0);
2899
2900   /* ACCUM starts out either as OP0 or as a zero, depending on
2901      the first operation.  */
2902
2903   if (alg->op[0] == alg_zero)
2904     {
2905       accum = copy_to_mode_reg (mode, const0_rtx);
2906       val_so_far = 0;
2907     }
2908   else if (alg->op[0] == alg_m)
2909     {
2910       accum = copy_to_mode_reg (mode, op0);
2911       val_so_far = 1;
2912     }
2913   else
2914     gcc_unreachable ();
2915
2916   for (opno = 1; opno < alg->ops; opno++)
2917     {
2918       int log = alg->log[opno];
2919       rtx shift_subtarget = optimize ? 0 : accum;
2920       rtx add_target
2921         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2922            && !optimize)
2923           ? target : 0;
2924       rtx accum_target = optimize ? 0 : accum;
2925
2926       switch (alg->op[opno])
2927         {
2928         case alg_shift:
2929           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2930                                 build_int_cst (NULL_TREE, log),
2931                                 NULL_RTX, 0);
2932           val_so_far <<= log;
2933           break;
2934
2935         case alg_add_t_m2:
2936           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2937                               build_int_cst (NULL_TREE, log),
2938                               NULL_RTX, 0);
2939           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2940                                  add_target ? add_target : accum_target);
2941           val_so_far += (HOST_WIDE_INT) 1 << log;
2942           break;
2943
2944         case alg_sub_t_m2:
2945           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2946                               build_int_cst (NULL_TREE, log),
2947                               NULL_RTX, 0);
2948           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2949                                  add_target ? add_target : accum_target);
2950           val_so_far -= (HOST_WIDE_INT) 1 << log;
2951           break;
2952
2953         case alg_add_t2_m:
2954           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2955                                 build_int_cst (NULL_TREE, log),
2956                                 shift_subtarget,
2957                                 0);
2958           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2959                                  add_target ? add_target : accum_target);
2960           val_so_far = (val_so_far << log) + 1;
2961           break;
2962
2963         case alg_sub_t2_m:
2964           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2965                                 build_int_cst (NULL_TREE, log),
2966                                 shift_subtarget, 0);
2967           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2968                                  add_target ? add_target : accum_target);
2969           val_so_far = (val_so_far << log) - 1;
2970           break;
2971
2972         case alg_add_factor:
2973           tem = expand_shift (LSHIFT_EXPR, mode, accum,
2974                               build_int_cst (NULL_TREE, log),
2975                               NULL_RTX, 0);
2976           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2977                                  add_target ? add_target : accum_target);
2978           val_so_far += val_so_far << log;
2979           break;
2980
2981         case alg_sub_factor:
2982           tem = expand_shift (LSHIFT_EXPR, mode, accum,
2983                               build_int_cst (NULL_TREE, log),
2984                               NULL_RTX, 0);
2985           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2986                                  (add_target
2987                                   ? add_target : (optimize ? 0 : tem)));
2988           val_so_far = (val_so_far << log) - val_so_far;
2989           break;
2990
2991         default:
2992           gcc_unreachable ();
2993         }
2994
2995       /* Write a REG_EQUAL note on the last insn so that we can cse
2996          multiplication sequences.  Note that if ACCUM is a SUBREG,
2997          we've set the inner register and must properly indicate
2998          that.  */
2999
3000       tem = op0, nmode = mode;
3001       if (GET_CODE (accum) == SUBREG)
3002         {
3003           nmode = GET_MODE (SUBREG_REG (accum));
3004           tem = gen_lowpart (nmode, op0);
3005         }
3006
3007       insn = get_last_insn ();
3008       set_unique_reg_note (insn, REG_EQUAL,
3009                            gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)));
3010     }
3011
3012   if (variant == negate_variant)
3013     {
3014       val_so_far = -val_so_far;
3015       accum = expand_unop (mode, neg_optab, accum, target, 0);
3016     }
3017   else if (variant == add_variant)
3018     {
3019       val_so_far = val_so_far + 1;
3020       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3021     }
3022
3023   /* Compare only the bits of val and val_so_far that are significant
3024      in the result mode, to avoid sign-/zero-extension confusion.  */
3025   val &= GET_MODE_MASK (mode);
3026   val_so_far &= GET_MODE_MASK (mode);
3027   gcc_assert (val == val_so_far);
3028
3029   return accum;
3030 }
3031
3032 /* Perform a multiplication and return an rtx for the result.
3033    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3034    TARGET is a suggestion for where to store the result (an rtx).
3035
3036    We check specially for a constant integer as OP1.
3037    If you want this check for OP0 as well, then before calling
3038    you should swap the two operands if OP0 would be constant.  */
3039
3040 rtx
3041 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3042              int unsignedp)
3043 {
3044   rtx const_op1 = op1;
3045   enum mult_variant variant;
3046   struct algorithm algorithm;
3047
3048   /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3049      less than or equal in size to `unsigned int' this doesn't matter.
3050      If the mode is larger than `unsigned int', then synth_mult works only
3051      if the constant value exactly fits in an `unsigned int' without any
3052      truncation.  This means that multiplying by negative values does
3053      not work; results are off by 2^32 on a 32 bit machine.  */
3054
3055   /* If we are multiplying in DImode, it may still be a win
3056      to try to work with shifts and adds.  */
3057   if (GET_CODE (op1) == CONST_DOUBLE
3058       && GET_MODE_CLASS (GET_MODE (op1)) == MODE_INT
3059       && HOST_BITS_PER_INT >= BITS_PER_WORD
3060       && CONST_DOUBLE_HIGH (op1) == 0)
3061     const_op1 = GEN_INT (CONST_DOUBLE_LOW (op1));
3062   else if (HOST_BITS_PER_INT < GET_MODE_BITSIZE (mode)
3063            && GET_CODE (op1) == CONST_INT
3064            && INTVAL (op1) < 0)
3065     const_op1 = 0;
3066
3067   /* We used to test optimize here, on the grounds that it's better to
3068      produce a smaller program when -O is not used.
3069      But this causes such a terrible slowdown sometimes
3070      that it seems better to use synth_mult always.  */
3071
3072   if (const_op1 && GET_CODE (const_op1) == CONST_INT
3073       && (unsignedp || !flag_trapv))
3074     {
3075       HOST_WIDE_INT coeff = INTVAL (const_op1);
3076       int mult_cost;
3077
3078       /* Special case powers of two.  */
3079       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3080         {
3081           if (coeff == 0)
3082             return const0_rtx;
3083           if (coeff == 1)
3084             return op0;
3085           return expand_shift (LSHIFT_EXPR, mode, op0,
3086                                build_int_cst (NULL_TREE, floor_log2 (coeff)),
3087                                target, unsignedp);
3088         }
3089
3090       mult_cost = rtx_cost (gen_rtx_MULT (mode, op0, op1), SET);
3091       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3092                                mult_cost))
3093         return expand_mult_const (mode, op0, coeff, target,
3094                                   &algorithm, variant);
3095     }
3096
3097   if (GET_CODE (op0) == CONST_DOUBLE)
3098     {
3099       rtx temp = op0;
3100       op0 = op1;
3101       op1 = temp;
3102     }
3103
3104   /* Expand x*2.0 as x+x.  */
3105   if (GET_CODE (op1) == CONST_DOUBLE
3106       && GET_MODE_CLASS (mode) == MODE_FLOAT)
3107     {
3108       REAL_VALUE_TYPE d;
3109       REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3110
3111       if (REAL_VALUES_EQUAL (d, dconst2))
3112         {
3113           op0 = force_reg (GET_MODE (op0), op0);
3114           return expand_binop (mode, add_optab, op0, op0,
3115                                target, unsignedp, OPTAB_LIB_WIDEN);
3116         }
3117     }
3118
3119   /* This used to use umul_optab if unsigned, but for non-widening multiply
3120      there is no difference between signed and unsigned.  */
3121   op0 = expand_binop (mode,
3122                       ! unsignedp
3123                       && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3124                       ? smulv_optab : smul_optab,
3125                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3126   gcc_assert (op0);
3127   return op0;
3128 }
3129 \f
3130 /* Return the smallest n such that 2**n >= X.  */
3131
3132 int
3133 ceil_log2 (unsigned HOST_WIDE_INT x)
3134 {
3135   return floor_log2 (x - 1) + 1;
3136 }
3137
3138 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3139    replace division by D, and put the least significant N bits of the result
3140    in *MULTIPLIER_PTR and return the most significant bit.
3141
3142    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3143    needed precision is in PRECISION (should be <= N).
3144
3145    PRECISION should be as small as possible so this function can choose
3146    multiplier more freely.
3147
3148    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3149    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3150
3151    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3152    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3153
3154 static
3155 unsigned HOST_WIDE_INT
3156 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3157                    rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr)
3158 {
3159   HOST_WIDE_INT mhigh_hi, mlow_hi;
3160   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3161   int lgup, post_shift;
3162   int pow, pow2;
3163   unsigned HOST_WIDE_INT nl, dummy1;
3164   HOST_WIDE_INT nh, dummy2;
3165
3166   /* lgup = ceil(log2(divisor)); */
3167   lgup = ceil_log2 (d);
3168
3169   gcc_assert (lgup <= n);
3170
3171   pow = n + lgup;
3172   pow2 = n + lgup - precision;
3173
3174   /* We could handle this with some effort, but this case is much
3175      better handled directly with a scc insn, so rely on caller using
3176      that.  */
3177   gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3178
3179   /* mlow = 2^(N + lgup)/d */
3180  if (pow >= HOST_BITS_PER_WIDE_INT)
3181     {
3182       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3183       nl = 0;
3184     }
3185   else
3186     {
3187       nh = 0;
3188       nl = (unsigned HOST_WIDE_INT) 1 << pow;
3189     }
3190   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3191                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3192
3193   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3194   if (pow2 >= HOST_BITS_PER_WIDE_INT)
3195     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3196   else
3197     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3198   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3199                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3200
3201   gcc_assert (!mhigh_hi || nh - d < d);
3202   gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3203   /* Assert that mlow < mhigh.  */
3204   gcc_assert (mlow_hi < mhigh_hi
3205               || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3206
3207   /* If precision == N, then mlow, mhigh exceed 2^N
3208      (but they do not exceed 2^(N+1)).  */
3209
3210   /* Reduce to lowest terms.  */
3211   for (post_shift = lgup; post_shift > 0; post_shift--)
3212     {
3213       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3214       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3215       if (ml_lo >= mh_lo)
3216         break;
3217
3218       mlow_hi = 0;
3219       mlow_lo = ml_lo;
3220       mhigh_hi = 0;
3221       mhigh_lo = mh_lo;
3222     }
3223
3224   *post_shift_ptr = post_shift;
3225   *lgup_ptr = lgup;
3226   if (n < HOST_BITS_PER_WIDE_INT)
3227     {
3228       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3229       *multiplier_ptr = GEN_INT (mhigh_lo & mask);
3230       return mhigh_lo >= mask;
3231     }
3232   else
3233     {
3234       *multiplier_ptr = GEN_INT (mhigh_lo);
3235       return mhigh_hi;
3236     }
3237 }
3238
3239 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3240    congruent to 1 (mod 2**N).  */
3241
3242 static unsigned HOST_WIDE_INT
3243 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3244 {
3245   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3246
3247   /* The algorithm notes that the choice y = x satisfies
3248      x*y == 1 mod 2^3, since x is assumed odd.
3249      Each iteration doubles the number of bits of significance in y.  */
3250
3251   unsigned HOST_WIDE_INT mask;
3252   unsigned HOST_WIDE_INT y = x;
3253   int nbit = 3;
3254
3255   mask = (n == HOST_BITS_PER_WIDE_INT
3256           ? ~(unsigned HOST_WIDE_INT) 0
3257           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3258
3259   while (nbit < n)
3260     {
3261       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3262       nbit *= 2;
3263     }
3264   return y;
3265 }
3266
3267 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3268    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3269    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3270    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3271    become signed.
3272
3273    The result is put in TARGET if that is convenient.
3274
3275    MODE is the mode of operation.  */
3276
3277 rtx
3278 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3279                              rtx op1, rtx target, int unsignedp)
3280 {
3281   rtx tem;
3282   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3283
3284   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3285                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3286                       NULL_RTX, 0);
3287   tem = expand_and (mode, tem, op1, NULL_RTX);
3288   adj_operand
3289     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3290                      adj_operand);
3291
3292   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3293                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3294                       NULL_RTX, 0);
3295   tem = expand_and (mode, tem, op0, NULL_RTX);
3296   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3297                           target);
3298
3299   return target;
3300 }
3301
3302 /* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */
3303
3304 static rtx
3305 extract_high_half (enum machine_mode mode, rtx op)
3306 {
3307   enum machine_mode wider_mode;
3308
3309   if (mode == word_mode)
3310     return gen_highpart (mode, op);
3311
3312   wider_mode = GET_MODE_WIDER_MODE (mode);
3313   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3314                      build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode)), 0, 1);
3315   return convert_modes (mode, wider_mode, op, 0);
3316 }
3317
3318 /* Like expand_mult_highpart, but only consider using a multiplication
3319    optab.  OP1 is an rtx for the constant operand.  */
3320
3321 static rtx
3322 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3323                             rtx target, int unsignedp, int max_cost)
3324 {
3325   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3326   enum machine_mode wider_mode;
3327   optab moptab;
3328   rtx tem;
3329   int size;
3330
3331   wider_mode = GET_MODE_WIDER_MODE (mode);
3332   size = GET_MODE_BITSIZE (mode);
3333
3334   /* Firstly, try using a multiplication insn that only generates the needed
3335      high part of the product, and in the sign flavor of unsignedp.  */
3336   if (mul_highpart_cost[mode] < max_cost)
3337     {
3338       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3339       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3340                           unsignedp, OPTAB_DIRECT);
3341       if (tem)
3342         return tem;
3343     }
3344
3345   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3346      Need to adjust the result after the multiplication.  */
3347   if (size - 1 < BITS_PER_WORD
3348       && (mul_highpart_cost[mode] + 2 * shift_cost[mode][size-1]
3349           + 4 * add_cost[mode] < max_cost))
3350     {
3351       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3352       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3353                           unsignedp, OPTAB_DIRECT);
3354       if (tem)
3355         /* We used the wrong signedness.  Adjust the result.  */
3356         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3357                                             tem, unsignedp);
3358     }
3359
3360   /* Try widening multiplication.  */
3361   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3362   if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3363       && mul_widen_cost[wider_mode] < max_cost)
3364     {
3365       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3366                           unsignedp, OPTAB_WIDEN);
3367       if (tem)
3368         return extract_high_half (mode, tem);
3369     }
3370
3371   /* Try widening the mode and perform a non-widening multiplication.  */
3372   if (smul_optab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3373       && size - 1 < BITS_PER_WORD
3374       && mul_cost[wider_mode] + shift_cost[mode][size-1] < max_cost)
3375     {
3376       rtx insns, wop0, wop1;
3377
3378       /* We need to widen the operands, for example to ensure the
3379          constant multiplier is correctly sign or zero extended.
3380          Use a sequence to clean-up any instructions emitted by
3381          the conversions if things don't work out.  */
3382       start_sequence ();
3383       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3384       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3385       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3386                           unsignedp, OPTAB_WIDEN);
3387       insns = get_insns ();
3388       end_sequence ();
3389
3390       if (tem)
3391         {
3392           emit_insn (insns);
3393           return extract_high_half (mode, tem);
3394         }
3395     }
3396
3397   /* Try widening multiplication of opposite signedness, and adjust.  */
3398   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3399   if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3400       && size - 1 < BITS_PER_WORD
3401       && (mul_widen_cost[wider_mode] + 2 * shift_cost[mode][size-1]
3402           + 4 * add_cost[mode] < max_cost))
3403     {
3404       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3405                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3406       if (tem != 0)
3407         {
3408           tem = extract_high_half (mode, tem);
3409           /* We used the wrong signedness.  Adjust the result.  */
3410           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3411                                               target, unsignedp);
3412         }
3413     }
3414
3415   return 0;
3416 }
3417
3418 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3419    putting the high half of the result in TARGET if that is convenient,
3420    and return where the result is.  If the operation can not be performed,
3421    0 is returned.
3422
3423    MODE is the mode of operation and result.
3424
3425    UNSIGNEDP nonzero means unsigned multiply.
3426
3427    MAX_COST is the total allowed cost for the expanded RTL.  */
3428
3429 static rtx
3430 expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3431                       rtx target, int unsignedp, int max_cost)
3432 {
3433   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3434   unsigned HOST_WIDE_INT cnst1;
3435   int extra_cost;
3436   bool sign_adjust = false;
3437   enum mult_variant variant;
3438   struct algorithm alg;
3439   rtx tem;
3440
3441   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3442   gcc_assert (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT);
3443
3444   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3445
3446   /* We can't optimize modes wider than BITS_PER_WORD.
3447      ??? We might be able to perform double-word arithmetic if
3448      mode == word_mode, however all the cost calculations in
3449      synth_mult etc. assume single-word operations.  */
3450   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3451     return expand_mult_highpart_optab (mode, op0, op1, target,
3452                                        unsignedp, max_cost);
3453
3454   extra_cost = shift_cost[mode][GET_MODE_BITSIZE (mode) - 1];
3455
3456   /* Check whether we try to multiply by a negative constant.  */
3457   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3458     {
3459       sign_adjust = true;
3460       extra_cost += add_cost[mode];
3461     }
3462
3463   /* See whether shift/add multiplication is cheap enough.  */
3464   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3465                            max_cost - extra_cost))
3466     {
3467       /* See whether the specialized multiplication optabs are
3468          cheaper than the shift/add version.  */
3469       tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3470                                         alg.cost.cost + extra_cost);
3471       if (tem)
3472         return tem;
3473
3474       tem = convert_to_mode (wider_mode, op0, unsignedp);
3475       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3476       tem = extract_high_half (mode, tem);
3477
3478       /* Adjust result for signedness.  */
3479       if (sign_adjust)
3480         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3481
3482       return tem;
3483     }
3484   return expand_mult_highpart_optab (mode, op0, op1, target,
3485                                      unsignedp, max_cost);
3486 }
3487
3488
3489 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3490
3491 static rtx
3492 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3493 {
3494   unsigned HOST_WIDE_INT masklow, maskhigh;
3495   rtx result, temp, shift, label;
3496   int logd;
3497
3498   logd = floor_log2 (d);
3499   result = gen_reg_rtx (mode);
3500
3501   /* Avoid conditional branches when they're expensive.  */
3502   if (BRANCH_COST >= 2
3503       && !optimize_size)
3504     {
3505       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3506                                       mode, 0, -1);
3507       if (signmask)
3508         {
3509           signmask = force_reg (mode, signmask);
3510           masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3511           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3512
3513           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3514              which instruction sequence to use.  If logical right shifts
3515              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3516              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3517
3518           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3519           if (lshr_optab->handlers[mode].insn_code == CODE_FOR_nothing
3520               || rtx_cost (temp, SET) > COSTS_N_INSNS (2))
3521             {
3522               temp = expand_binop (mode, xor_optab, op0, signmask,
3523                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3524               temp = expand_binop (mode, sub_optab, temp, signmask,
3525                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3526               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3527                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3528               temp = expand_binop (mode, xor_optab, temp, signmask,
3529                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3530               temp = expand_binop (mode, sub_optab, temp, signmask,
3531                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3532             }
3533           else
3534             {
3535               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3536                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3537               signmask = force_reg (mode, signmask);
3538
3539               temp = expand_binop (mode, add_optab, op0, signmask,
3540                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3541               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3542                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3543               temp = expand_binop (mode, sub_optab, temp, signmask,
3544                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3545             }
3546           return temp;
3547         }
3548     }
3549
3550   /* Mask contains the mode's signbit and the significant bits of the
3551      modulus.  By including the signbit in the operation, many targets
3552      can avoid an explicit compare operation in the following comparison
3553      against zero.  */
3554
3555   masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3556   if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3557     {
3558       masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3559       maskhigh = -1;
3560     }
3561   else
3562     maskhigh = (HOST_WIDE_INT) -1
3563                  << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3564
3565   temp = expand_binop (mode, and_optab, op0,
3566                        immed_double_const (masklow, maskhigh, mode),
3567                        result, 1, OPTAB_LIB_WIDEN);
3568   if (temp != result)
3569     emit_move_insn (result, temp);
3570
3571   label = gen_label_rtx ();
3572   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3573
3574   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3575                        0, OPTAB_LIB_WIDEN);
3576   masklow = (HOST_WIDE_INT) -1 << logd;
3577   maskhigh = -1;
3578   temp = expand_binop (mode, ior_optab, temp,
3579                        immed_double_const (masklow, maskhigh, mode),
3580                        result, 1, OPTAB_LIB_WIDEN);
3581   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3582                        0, OPTAB_LIB_WIDEN);
3583   if (temp != result)
3584     emit_move_insn (result, temp);
3585   emit_label (label);
3586   return result;
3587 }
3588
3589 /* Expand signed division of OP0 by a power of two D in mode MODE.
3590    This routine is only called for positive values of D.  */
3591
3592 static rtx
3593 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3594 {
3595   rtx temp, label;
3596   tree shift;
3597   int logd;
3598
3599   logd = floor_log2 (d);
3600   shift = build_int_cst (NULL_TREE, logd);
3601
3602   if (d == 2 && BRANCH_COST >= 1)
3603     {
3604       temp = gen_reg_rtx (mode);
3605       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3606       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3607                            0, OPTAB_LIB_WIDEN);
3608       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3609     }
3610
3611 #ifdef HAVE_conditional_move
3612   if (BRANCH_COST >= 2)
3613     {
3614       rtx temp2;
3615
3616       /* ??? emit_conditional_move forces a stack adjustment via
3617          compare_from_rtx so, if the sequence is discarded, it will
3618          be lost.  Do it now instead.  */
3619       do_pending_stack_adjust ();
3620
3621       start_sequence ();
3622       temp2 = copy_to_mode_reg (mode, op0);
3623       temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3624                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3625       temp = force_reg (mode, temp);
3626
3627       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3628       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3629                                      mode, temp, temp2, mode, 0);
3630       if (temp2)
3631         {
3632           rtx seq = get_insns ();
3633           end_sequence ();
3634           emit_insn (seq);
3635           return expand_shift (RSHIFT_EXPR, mode, temp2, shift, NULL_RTX, 0);
3636         }
3637       end_sequence ();
3638     }
3639 #endif
3640
3641   if (BRANCH_COST >= 2)
3642     {
3643       int ushift = GET_MODE_BITSIZE (mode) - logd;
3644
3645       temp = gen_reg_rtx (mode);
3646       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3647       if (shift_cost[mode][ushift] > COSTS_N_INSNS (1))
3648         temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3649                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3650       else
3651         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3652                              build_int_cst (NULL_TREE, ushift),
3653                              NULL_RTX, 1);
3654       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3655                            0, OPTAB_LIB_WIDEN);
3656       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3657     }
3658
3659   label = gen_label_rtx ();
3660   temp = copy_to_mode_reg (mode, op0);
3661   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3662   expand_inc (temp, GEN_INT (d - 1));
3663   emit_label (label);
3664   return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3665 }
3666 \f
3667 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3668    if that is convenient, and returning where the result is.
3669    You may request either the quotient or the remainder as the result;
3670    specify REM_FLAG nonzero to get the remainder.
3671
3672    CODE is the expression code for which kind of division this is;
3673    it controls how rounding is done.  MODE is the machine mode to use.
3674    UNSIGNEDP nonzero means do unsigned division.  */
3675
3676 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3677    and then correct it by or'ing in missing high bits
3678    if result of ANDI is nonzero.
3679    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3680    This could optimize to a bfexts instruction.
3681    But C doesn't use these operations, so their optimizations are
3682    left for later.  */
3683 /* ??? For modulo, we don't actually need the highpart of the first product,
3684    the low part will do nicely.  And for small divisors, the second multiply
3685    can also be a low-part only multiply or even be completely left out.
3686    E.g. to calculate the remainder of a division by 3 with a 32 bit
3687    multiply, multiply with 0x55555556 and extract the upper two bits;
3688    the result is exact for inputs up to 0x1fffffff.
3689    The input range can be reduced by using cross-sum rules.
3690    For odd divisors >= 3, the following table gives right shift counts
3691    so that if a number is shifted by an integer multiple of the given
3692    amount, the remainder stays the same:
3693    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3694    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3695    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3696    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3697    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3698
3699    Cross-sum rules for even numbers can be derived by leaving as many bits
3700    to the right alone as the divisor has zeros to the right.
3701    E.g. if x is an unsigned 32 bit number:
3702    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3703    */
3704
3705 rtx
3706 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3707                rtx op0, rtx op1, rtx target, int unsignedp)
3708 {
3709   enum machine_mode compute_mode;
3710   rtx tquotient;
3711   rtx quotient = 0, remainder = 0;
3712   rtx last;
3713   int size;
3714   rtx insn, set;
3715   optab optab1, optab2;
3716   int op1_is_constant, op1_is_pow2 = 0;
3717   int max_cost, extra_cost;
3718   static HOST_WIDE_INT last_div_const = 0;
3719   static HOST_WIDE_INT ext_op1;
3720
3721   op1_is_constant = GET_CODE (op1) == CONST_INT;
3722   if (op1_is_constant)
3723     {
3724       ext_op1 = INTVAL (op1);
3725       if (unsignedp)
3726         ext_op1 &= GET_MODE_MASK (mode);
3727       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3728                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3729     }
3730
3731   /*
3732      This is the structure of expand_divmod:
3733
3734      First comes code to fix up the operands so we can perform the operations
3735      correctly and efficiently.
3736
3737      Second comes a switch statement with code specific for each rounding mode.
3738      For some special operands this code emits all RTL for the desired
3739      operation, for other cases, it generates only a quotient and stores it in
3740      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3741      to indicate that it has not done anything.
3742
3743      Last comes code that finishes the operation.  If QUOTIENT is set and
3744      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3745      QUOTIENT is not set, it is computed using trunc rounding.
3746
3747      We try to generate special code for division and remainder when OP1 is a
3748      constant.  If |OP1| = 2**n we can use shifts and some other fast
3749      operations.  For other values of OP1, we compute a carefully selected
3750      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3751      by m.
3752
3753      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3754      half of the product.  Different strategies for generating the product are
3755      implemented in expand_mult_highpart.
3756
3757      If what we actually want is the remainder, we generate that by another
3758      by-constant multiplication and a subtraction.  */
3759
3760   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3761      code below will malfunction if we are, so check here and handle
3762      the special case if so.  */
3763   if (op1 == const1_rtx)
3764     return rem_flag ? const0_rtx : op0;
3765
3766     /* When dividing by -1, we could get an overflow.
3767      negv_optab can handle overflows.  */
3768   if (! unsignedp && op1 == constm1_rtx)
3769     {
3770       if (rem_flag)
3771         return const0_rtx;
3772       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3773                           ? negv_optab : neg_optab, op0, target, 0);
3774     }
3775
3776   if (target
3777       /* Don't use the function value register as a target
3778          since we have to read it as well as write it,
3779          and function-inlining gets confused by this.  */
3780       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3781           /* Don't clobber an operand while doing a multi-step calculation.  */
3782           || ((rem_flag || op1_is_constant)
3783               && (reg_mentioned_p (target, op0)
3784                   || (MEM_P (op0) && MEM_P (target))))
3785           || reg_mentioned_p (target, op1)
3786           || (MEM_P (op1) && MEM_P (target))))
3787     target = 0;
3788
3789   /* Get the mode in which to perform this computation.  Normally it will
3790      be MODE, but sometimes we can't do the desired operation in MODE.
3791      If so, pick a wider mode in which we can do the operation.  Convert
3792      to that mode at the start to avoid repeated conversions.
3793
3794      First see what operations we need.  These depend on the expression
3795      we are evaluating.  (We assume that divxx3 insns exist under the
3796      same conditions that modxx3 insns and that these insns don't normally
3797      fail.  If these assumptions are not correct, we may generate less
3798      efficient code in some cases.)
3799
3800      Then see if we find a mode in which we can open-code that operation
3801      (either a division, modulus, or shift).  Finally, check for the smallest
3802      mode for which we can do the operation with a library call.  */
3803
3804   /* We might want to refine this now that we have division-by-constant
3805      optimization.  Since expand_mult_highpart tries so many variants, it is
3806      not straightforward to generalize this.  Maybe we should make an array
3807      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3808
3809   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3810             ? (unsignedp ? lshr_optab : ashr_optab)
3811             : (unsignedp ? udiv_optab : sdiv_optab));
3812   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3813             ? optab1
3814             : (unsignedp ? udivmod_optab : sdivmod_optab));
3815
3816   for (compute_mode = mode; compute_mode != VOIDmode;
3817        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3818     if (optab1->handlers[compute_mode].insn_code != CODE_FOR_nothing
3819         || optab2->handlers[compute_mode].insn_code != CODE_FOR_nothing)
3820       break;
3821
3822   if (compute_mode == VOIDmode)
3823     for (compute_mode = mode; compute_mode != VOIDmode;
3824          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3825       if (optab1->handlers[compute_mode].libfunc
3826           || optab2->handlers[compute_mode].libfunc)
3827         break;
3828
3829   /* If we still couldn't find a mode, use MODE, but we'll probably abort
3830      in expand_binop.  */
3831   if (compute_mode == VOIDmode)
3832     compute_mode = mode;
3833
3834   if (target && GET_MODE (target) == compute_mode)
3835     tquotient = target;
3836   else
3837     tquotient = gen_reg_rtx (compute_mode);
3838
3839   size = GET_MODE_BITSIZE (compute_mode);
3840 #if 0
3841   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3842      (mode), and thereby get better code when OP1 is a constant.  Do that
3843      later.  It will require going over all usages of SIZE below.  */
3844   size = GET_MODE_BITSIZE (mode);
3845 #endif
3846
3847   /* Only deduct something for a REM if the last divide done was
3848      for a different constant.   Then set the constant of the last
3849      divide.  */
3850   max_cost = div_cost[compute_mode]
3851     - (rem_flag && ! (last_div_const != 0 && op1_is_constant
3852                       && INTVAL (op1) == last_div_const)
3853        ? mul_cost[compute_mode] + add_cost[compute_mode]
3854        : 0);
3855
3856   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3857
3858   /* Now convert to the best mode to use.  */
3859   if (compute_mode != mode)
3860     {
3861       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3862       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3863
3864       /* convert_modes may have placed op1 into a register, so we
3865          must recompute the following.  */
3866       op1_is_constant = GET_CODE (op1) == CONST_INT;
3867       op1_is_pow2 = (op1_is_constant
3868                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3869                           || (! unsignedp
3870                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3871     }
3872
3873   /* If one of the operands is a volatile MEM, copy it into a register.  */
3874
3875   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3876     op0 = force_reg (compute_mode, op0);
3877   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3878     op1 = force_reg (compute_mode, op1);
3879
3880   /* If we need the remainder or if OP1 is constant, we need to
3881      put OP0 in a register in case it has any queued subexpressions.  */
3882   if (rem_flag || op1_is_constant)
3883     op0 = force_reg (compute_mode, op0);
3884
3885   last = get_last_insn ();
3886
3887   /* Promote floor rounding to trunc rounding for unsigned operations.  */
3888   if (unsignedp)
3889     {
3890       if (code == FLOOR_DIV_EXPR)
3891         code = TRUNC_DIV_EXPR;
3892       if (code == FLOOR_MOD_EXPR)
3893         code = TRUNC_MOD_EXPR;
3894       if (code == EXACT_DIV_EXPR && op1_is_pow2)
3895         code = TRUNC_DIV_EXPR;
3896     }
3897
3898   if (op1 != const0_rtx)
3899     switch (code)
3900       {
3901       case TRUNC_MOD_EXPR:
3902       case TRUNC_DIV_EXPR:
3903         if (op1_is_constant)
3904           {
3905             if (unsignedp)
3906               {
3907                 unsigned HOST_WIDE_INT mh;
3908                 int pre_shift, post_shift;
3909                 int dummy;
3910                 rtx ml;
3911                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
3912                                             & GET_MODE_MASK (compute_mode));
3913
3914                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3915                   {
3916                     pre_shift = floor_log2 (d);
3917                     if (rem_flag)
3918                       {
3919                         remainder
3920                           = expand_binop (compute_mode, and_optab, op0,
3921                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3922                                           remainder, 1,
3923                                           OPTAB_LIB_WIDEN);
3924                         if (remainder)
3925                           return gen_lowpart (mode, remainder);
3926                       }
3927                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3928                                              build_int_cst (NULL_TREE,
3929                                                             pre_shift),
3930                                              tquotient, 1);
3931                   }
3932                 else if (size <= HOST_BITS_PER_WIDE_INT)
3933                   {
3934                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
3935                       {
3936                         /* Most significant bit of divisor is set; emit an scc
3937                            insn.  */
3938                         quotient = emit_store_flag (tquotient, GEU, op0, op1,
3939                                                     compute_mode, 1, 1);
3940                         if (quotient == 0)
3941                           goto fail1;
3942                       }
3943                     else
3944                       {
3945                         /* Find a suitable multiplier and right shift count
3946                            instead of multiplying with D.  */
3947
3948                         mh = choose_multiplier (d, size, size,
3949                                                 &ml, &post_shift, &dummy);
3950
3951                         /* If the suggested multiplier is more than SIZE bits,
3952                            we can do better for even divisors, using an
3953                            initial right shift.  */
3954                         if (mh != 0 && (d & 1) == 0)
3955                           {
3956                             pre_shift = floor_log2 (d & -d);
3957                             mh = choose_multiplier (d >> pre_shift, size,
3958                                                     size - pre_shift,
3959                                                     &ml, &post_shift, &dummy);
3960                             gcc_assert (!mh);
3961                           }
3962                         else
3963                           pre_shift = 0;
3964
3965                         if (mh != 0)
3966                           {
3967                             rtx t1, t2, t3, t4;
3968
3969                             if (post_shift - 1 >= BITS_PER_WORD)
3970                               goto fail1;
3971
3972                             extra_cost
3973                               = (shift_cost[compute_mode][post_shift - 1]
3974                                  + shift_cost[compute_mode][1]
3975                                  + 2 * add_cost[compute_mode]);
3976                             t1 = expand_mult_highpart (compute_mode, op0, ml,
3977                                                        NULL_RTX, 1,
3978                                                        max_cost - extra_cost);
3979                             if (t1 == 0)
3980                               goto fail1;
3981                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
3982                                                                op0, t1),
3983                                                 NULL_RTX);
3984                             t3 = expand_shift
3985                               (RSHIFT_EXPR, compute_mode, t2,
3986                                build_int_cst (NULL_TREE, 1),
3987                                NULL_RTX,1);
3988                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
3989                                                               t1, t3),
3990                                                 NULL_RTX);
3991                             quotient = expand_shift
3992                               (RSHIFT_EXPR, compute_mode, t4,
3993                                build_int_cst (NULL_TREE, post_shift - 1),
3994                                tquotient, 1);
3995                           }
3996                         else
3997                           {
3998                             rtx t1, t2;
3999
4000                             if (pre_shift >= BITS_PER_WORD
4001                                 || post_shift >= BITS_PER_WORD)
4002                               goto fail1;
4003
4004                             t1 = expand_shift
4005                               (RSHIFT_EXPR, compute_mode, op0,
4006                                build_int_cst (NULL_TREE, pre_shift),
4007                                NULL_RTX, 1);
4008                             extra_cost
4009                               = (shift_cost[compute_mode][pre_shift]
4010                                  + shift_cost[compute_mode][post_shift]);
4011                             t2 = expand_mult_highpart (compute_mode, t1, ml,
4012                                                        NULL_RTX, 1,
4013                                                        max_cost - extra_cost);
4014                             if (t2 == 0)
4015                               goto fail1;
4016                             quotient = expand_shift
4017                               (RSHIFT_EXPR, compute_mode, t2,
4018                                build_int_cst (NULL_TREE, post_shift),
4019                                tquotient, 1);
4020                           }
4021                       }
4022                   }
4023                 else            /* Too wide mode to use tricky code */
4024                   break;
4025
4026                 insn = get_last_insn ();
4027                 if (insn != last
4028                     && (set = single_set (insn)) != 0
4029                     && SET_DEST (set) == quotient)
4030                   set_unique_reg_note (insn,
4031                                        REG_EQUAL,
4032                                        gen_rtx_UDIV (compute_mode, op0, op1));
4033               }
4034             else                /* TRUNC_DIV, signed */
4035               {
4036                 unsigned HOST_WIDE_INT ml;
4037                 int lgup, post_shift;
4038                 rtx mlr;
4039                 HOST_WIDE_INT d = INTVAL (op1);
4040                 unsigned HOST_WIDE_INT abs_d = d >= 0 ? d : -d;
4041
4042                 /* n rem d = n rem -d */
4043                 if (rem_flag && d < 0)
4044                   {
4045                     d = abs_d;
4046                     op1 = gen_int_mode (abs_d, compute_mode);
4047                   }
4048
4049                 if (d == 1)
4050                   quotient = op0;
4051                 else if (d == -1)
4052                   quotient = expand_unop (compute_mode, neg_optab, op0,
4053                                           tquotient, 0);
4054                 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4055                   {
4056                     /* This case is not handled correctly below.  */
4057                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4058                                                 compute_mode, 1, 1);
4059                     if (quotient == 0)
4060                       goto fail1;
4061                   }
4062                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4063                          && (rem_flag ? smod_pow2_cheap[compute_mode]
4064                                       : sdiv_pow2_cheap[compute_mode])
4065                          /* We assume that cheap metric is true if the
4066                             optab has an expander for this mode.  */
4067                          && (((rem_flag ? smod_optab : sdiv_optab)
4068                               ->handlers[compute_mode].insn_code
4069                               != CODE_FOR_nothing)
4070                              || (sdivmod_optab->handlers[compute_mode]
4071                                  .insn_code != CODE_FOR_nothing)))
4072                   ;
4073                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4074                   {
4075                     if (rem_flag)
4076                       {
4077                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4078                         if (remainder)
4079                           return gen_lowpart (mode, remainder);
4080                       }
4081
4082                     if (sdiv_pow2_cheap[compute_mode]
4083                         && ((sdiv_optab->handlers[compute_mode].insn_code
4084                              != CODE_FOR_nothing)
4085                             || (sdivmod_optab->handlers[compute_mode].insn_code
4086                                 != CODE_FOR_nothing)))
4087                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4088                                                 compute_mode, op0,
4089                                                 gen_int_mode (abs_d,
4090                                                               compute_mode),
4091                                                 NULL_RTX, 0);
4092                     else
4093                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4094
4095                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4096                        negate the quotient.  */
4097                     if (d < 0)
4098                       {
4099                         insn = get_last_insn ();
4100                         if (insn != last
4101                             && (set = single_set (insn)) != 0
4102                             && SET_DEST (set) == quotient
4103                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4104                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4105                           set_unique_reg_note (insn,
4106                                                REG_EQUAL,
4107                                                gen_rtx_DIV (compute_mode,
4108                                                             op0,
4109                                                             GEN_INT
4110                                                             (trunc_int_for_mode
4111                                                              (abs_d,
4112                                                               compute_mode))));
4113
4114                         quotient = expand_unop (compute_mode, neg_optab,
4115                                                 quotient, quotient, 0);
4116                       }
4117                   }
4118                 else if (size <= HOST_BITS_PER_WIDE_INT)
4119                   {
4120                     choose_multiplier (abs_d, size, size - 1,
4121                                        &mlr, &post_shift, &lgup);
4122                     ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
4123                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4124                       {
4125                         rtx t1, t2, t3;
4126
4127                         if (post_shift >= BITS_PER_WORD
4128                             || size - 1 >= BITS_PER_WORD)
4129                           goto fail1;
4130
4131                         extra_cost = (shift_cost[compute_mode][post_shift]
4132                                       + shift_cost[compute_mode][size - 1]
4133                                       + add_cost[compute_mode]);
4134                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4135                                                    NULL_RTX, 0,
4136                                                    max_cost - extra_cost);
4137                         if (t1 == 0)
4138                           goto fail1;
4139                         t2 = expand_shift
4140                           (RSHIFT_EXPR, compute_mode, t1,
4141                            build_int_cst (NULL_TREE, post_shift),
4142                            NULL_RTX, 0);
4143                         t3 = expand_shift
4144                           (RSHIFT_EXPR, compute_mode, op0,
4145                            build_int_cst (NULL_TREE, size - 1),
4146                            NULL_RTX, 0);
4147                         if (d < 0)
4148                           quotient
4149                             = force_operand (gen_rtx_MINUS (compute_mode,
4150                                                             t3, t2),
4151                                              tquotient);
4152                         else
4153                           quotient
4154                             = force_operand (gen_rtx_MINUS (compute_mode,
4155                                                             t2, t3),
4156                                              tquotient);
4157                       }
4158                     else
4159                       {
4160                         rtx t1, t2, t3, t4;
4161
4162                         if (post_shift >= BITS_PER_WORD
4163                             || size - 1 >= BITS_PER_WORD)
4164                           goto fail1;
4165
4166                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4167                         mlr = gen_int_mode (ml, compute_mode);
4168                         extra_cost = (shift_cost[compute_mode][post_shift]
4169                                       + shift_cost[compute_mode][size - 1]
4170                                       + 2 * add_cost[compute_mode]);
4171                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4172                                                    NULL_RTX, 0,
4173                                                    max_cost - extra_cost);
4174                         if (t1 == 0)
4175                           goto fail1;
4176                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4177                                                           t1, op0),
4178                                             NULL_RTX);
4179                         t3 = expand_shift
4180                           (RSHIFT_EXPR, compute_mode, t2,
4181                            build_int_cst (NULL_TREE, post_shift),
4182                            NULL_RTX, 0);
4183                         t4 = expand_shift
4184                           (RSHIFT_EXPR, compute_mode, op0,
4185                            build_int_cst (NULL_TREE, size - 1),
4186                            NULL_RTX, 0);
4187                         if (d < 0)
4188                           quotient
4189                             = force_operand (gen_rtx_MINUS (compute_mode,
4190                                                             t4, t3),
4191                                              tquotient);
4192                         else
4193                           quotient
4194                             = force_operand (gen_rtx_MINUS (compute_mode,
4195                                                             t3, t4),
4196                                              tquotient);
4197                       }
4198                   }
4199                 else            /* Too wide mode to use tricky code */
4200                   break;
4201
4202                 insn = get_last_insn ();
4203                 if (insn != last
4204                     && (set = single_set (insn)) != 0
4205                     && SET_DEST (set) == quotient)
4206                   set_unique_reg_note (insn,
4207                                        REG_EQUAL,
4208                                        gen_rtx_DIV (compute_mode, op0, op1));
4209               }
4210             break;
4211           }
4212       fail1:
4213         delete_insns_since (last);
4214         break;
4215
4216       case FLOOR_DIV_EXPR:
4217       case FLOOR_MOD_EXPR:
4218       /* We will come here only for signed operations.  */
4219         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4220           {
4221             unsigned HOST_WIDE_INT mh;
4222             int pre_shift, lgup, post_shift;
4223             HOST_WIDE_INT d = INTVAL (op1);
4224             rtx ml;
4225
4226             if (d > 0)
4227               {
4228                 /* We could just as easily deal with negative constants here,
4229                    but it does not seem worth the trouble for GCC 2.6.  */
4230                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4231                   {
4232                     pre_shift = floor_log2 (d);
4233                     if (rem_flag)
4234                       {
4235                         remainder = expand_binop (compute_mode, and_optab, op0,
4236                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4237                                                   remainder, 0, OPTAB_LIB_WIDEN);
4238                         if (remainder)
4239                           return gen_lowpart (mode, remainder);
4240                       }
4241                     quotient = expand_shift
4242                       (RSHIFT_EXPR, compute_mode, op0,
4243                        build_int_cst (NULL_TREE, pre_shift),
4244                        tquotient, 0);
4245                   }
4246                 else
4247                   {
4248                     rtx t1, t2, t3, t4;
4249
4250                     mh = choose_multiplier (d, size, size - 1,
4251                                             &ml, &post_shift, &lgup);
4252                     gcc_assert (!mh);
4253
4254                     if (post_shift < BITS_PER_WORD
4255                         && size - 1 < BITS_PER_WORD)
4256                       {
4257                         t1 = expand_shift
4258                           (RSHIFT_EXPR, compute_mode, op0,
4259                            build_int_cst (NULL_TREE, size - 1),
4260                            NULL_RTX, 0);
4261                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4262                                            NULL_RTX, 0, OPTAB_WIDEN);
4263                         extra_cost = (shift_cost[compute_mode][post_shift]
4264                                       + shift_cost[compute_mode][size - 1]
4265                                       + 2 * add_cost[compute_mode]);
4266                         t3 = expand_mult_highpart (compute_mode, t2, ml,
4267                                                    NULL_RTX, 1,
4268                                                    max_cost - extra_cost);
4269                         if (t3 != 0)
4270                           {
4271                             t4 = expand_shift
4272                               (RSHIFT_EXPR, compute_mode, t3,
4273                                build_int_cst (NULL_TREE, post_shift),
4274                                NULL_RTX, 1);
4275                             quotient = expand_binop (compute_mode, xor_optab,
4276                                                      t4, t1, tquotient, 0,
4277                                                      OPTAB_WIDEN);
4278                           }
4279                       }
4280                   }
4281               }
4282             else
4283               {
4284                 rtx nsign, t1, t2, t3, t4;
4285                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4286                                                   op0, constm1_rtx), NULL_RTX);
4287                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4288                                    0, OPTAB_WIDEN);
4289                 nsign = expand_shift
4290                   (RSHIFT_EXPR, compute_mode, t2,
4291                    build_int_cst (NULL_TREE, size - 1),
4292                    NULL_RTX, 0);
4293                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4294                                     NULL_RTX);
4295                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4296                                     NULL_RTX, 0);
4297                 if (t4)
4298                   {
4299                     rtx t5;
4300                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4301                                       NULL_RTX, 0);
4302                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4303                                                             t4, t5),
4304                                               tquotient);
4305                   }
4306               }
4307           }
4308
4309         if (quotient != 0)
4310           break;
4311         delete_insns_since (last);
4312
4313         /* Try using an instruction that produces both the quotient and
4314            remainder, using truncation.  We can easily compensate the quotient
4315            or remainder to get floor rounding, once we have the remainder.
4316            Notice that we compute also the final remainder value here,
4317            and return the result right away.  */
4318         if (target == 0 || GET_MODE (target) != compute_mode)
4319           target = gen_reg_rtx (compute_mode);
4320
4321         if (rem_flag)
4322           {
4323             remainder
4324               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4325             quotient = gen_reg_rtx (compute_mode);
4326           }
4327         else
4328           {
4329             quotient
4330               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4331             remainder = gen_reg_rtx (compute_mode);
4332           }
4333
4334         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4335                                  quotient, remainder, 0))
4336           {
4337             /* This could be computed with a branch-less sequence.
4338                Save that for later.  */
4339             rtx tem;
4340             rtx label = gen_label_rtx ();
4341             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4342             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4343                                 NULL_RTX, 0, OPTAB_WIDEN);
4344             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4345             expand_dec (quotient, const1_rtx);
4346             expand_inc (remainder, op1);
4347             emit_label (label);
4348             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4349           }
4350
4351         /* No luck with division elimination or divmod.  Have to do it
4352            by conditionally adjusting op0 *and* the result.  */
4353         {
4354           rtx label1, label2, label3, label4, label5;
4355           rtx adjusted_op0;
4356           rtx tem;
4357
4358           quotient = gen_reg_rtx (compute_mode);
4359           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4360           label1 = gen_label_rtx ();
4361           label2 = gen_label_rtx ();
4362           label3 = gen_label_rtx ();
4363           label4 = gen_label_rtx ();
4364           label5 = gen_label_rtx ();
4365           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4366           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4367           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4368                               quotient, 0, OPTAB_LIB_WIDEN);
4369           if (tem != quotient)
4370             emit_move_insn (quotient, tem);
4371           emit_jump_insn (gen_jump (label5));
4372           emit_barrier ();
4373           emit_label (label1);
4374           expand_inc (adjusted_op0, const1_rtx);
4375           emit_jump_insn (gen_jump (label4));
4376           emit_barrier ();
4377           emit_label (label2);
4378           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4379           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4380                               quotient, 0, OPTAB_LIB_WIDEN);
4381           if (tem != quotient)
4382             emit_move_insn (quotient, tem);
4383           emit_jump_insn (gen_jump (label5));
4384           emit_barrier ();
4385           emit_label (label3);
4386           expand_dec (adjusted_op0, const1_rtx);
4387           emit_label (label4);
4388           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4389                               quotient, 0, OPTAB_LIB_WIDEN);
4390           if (tem != quotient)
4391             emit_move_insn (quotient, tem);
4392           expand_dec (quotient, const1_rtx);
4393           emit_label (label5);
4394         }
4395         break;
4396
4397       case CEIL_DIV_EXPR:
4398       case CEIL_MOD_EXPR:
4399         if (unsignedp)
4400           {
4401             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4402               {
4403                 rtx t1, t2, t3;
4404                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4405                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4406                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4407                                    tquotient, 1);
4408                 t2 = expand_binop (compute_mode, and_optab, op0,
4409                                    GEN_INT (d - 1),
4410                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4411                 t3 = gen_reg_rtx (compute_mode);
4412                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4413                                       compute_mode, 1, 1);
4414                 if (t3 == 0)
4415                   {
4416                     rtx lab;
4417                     lab = gen_label_rtx ();
4418                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4419                     expand_inc (t1, const1_rtx);
4420                     emit_label (lab);
4421                     quotient = t1;
4422                   }
4423                 else
4424                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4425                                                           t1, t3),
4426                                             tquotient);
4427                 break;
4428               }
4429
4430             /* Try using an instruction that produces both the quotient and
4431                remainder, using truncation.  We can easily compensate the
4432                quotient or remainder to get ceiling rounding, once we have the
4433                remainder.  Notice that we compute also the final remainder
4434                value here, and return the result right away.  */
4435             if (target == 0 || GET_MODE (target) != compute_mode)
4436               target = gen_reg_rtx (compute_mode);
4437
4438             if (rem_flag)
4439               {
4440                 remainder = (REG_P (target)
4441                              ? target : gen_reg_rtx (compute_mode));
4442                 quotient = gen_reg_rtx (compute_mode);
4443               }
4444             else
4445               {
4446                 quotient = (REG_P (target)
4447                             ? target : gen_reg_rtx (compute_mode));
4448                 remainder = gen_reg_rtx (compute_mode);
4449               }
4450
4451             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4452                                      remainder, 1))
4453               {
4454                 /* This could be computed with a branch-less sequence.
4455                    Save that for later.  */
4456                 rtx label = gen_label_rtx ();
4457                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4458                                  compute_mode, label);
4459                 expand_inc (quotient, const1_rtx);
4460                 expand_dec (remainder, op1);
4461                 emit_label (label);
4462                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4463               }
4464
4465             /* No luck with division elimination or divmod.  Have to do it
4466                by conditionally adjusting op0 *and* the result.  */
4467             {
4468               rtx label1, label2;
4469               rtx adjusted_op0, tem;
4470
4471               quotient = gen_reg_rtx (compute_mode);
4472               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4473               label1 = gen_label_rtx ();
4474               label2 = gen_label_rtx ();
4475               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4476                                compute_mode, label1);
4477               emit_move_insn  (quotient, const0_rtx);
4478               emit_jump_insn (gen_jump (label2));
4479               emit_barrier ();
4480               emit_label (label1);
4481               expand_dec (adjusted_op0, const1_rtx);
4482               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4483                                   quotient, 1, OPTAB_LIB_WIDEN);
4484               if (tem != quotient)
4485                 emit_move_insn (quotient, tem);
4486               expand_inc (quotient, const1_rtx);
4487               emit_label (label2);
4488             }
4489           }
4490         else /* signed */
4491           {
4492             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4493                 && INTVAL (op1) >= 0)
4494               {
4495                 /* This is extremely similar to the code for the unsigned case
4496                    above.  For 2.7 we should merge these variants, but for
4497                    2.6.1 I don't want to touch the code for unsigned since that
4498                    get used in C.  The signed case will only be used by other
4499                    languages (Ada).  */
4500
4501                 rtx t1, t2, t3;
4502                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4503                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4504                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4505                                    tquotient, 0);
4506                 t2 = expand_binop (compute_mode, and_optab, op0,
4507                                    GEN_INT (d - 1),
4508                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4509                 t3 = gen_reg_rtx (compute_mode);
4510                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4511                                       compute_mode, 1, 1);
4512                 if (t3 == 0)
4513                   {
4514                     rtx lab;
4515                     lab = gen_label_rtx ();
4516                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4517                     expand_inc (t1, const1_rtx);
4518                     emit_label (lab);
4519                     quotient = t1;
4520                   }
4521                 else
4522                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4523                                                           t1, t3),
4524                                             tquotient);
4525                 break;
4526               }
4527
4528             /* Try using an instruction that produces both the quotient and
4529                remainder, using truncation.  We can easily compensate the
4530                quotient or remainder to get ceiling rounding, once we have the
4531                remainder.  Notice that we compute also the final remainder
4532                value here, and return the result right away.  */
4533             if (target == 0 || GET_MODE (target) != compute_mode)
4534               target = gen_reg_rtx (compute_mode);
4535             if (rem_flag)
4536               {
4537                 remainder= (REG_P (target)
4538                             ? target : gen_reg_rtx (compute_mode));
4539                 quotient = gen_reg_rtx (compute_mode);
4540               }
4541             else
4542               {
4543                 quotient = (REG_P (target)
4544                             ? target : gen_reg_rtx (compute_mode));
4545                 remainder = gen_reg_rtx (compute_mode);
4546               }
4547
4548             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4549                                      remainder, 0))
4550               {
4551                 /* This could be computed with a branch-less sequence.
4552                    Save that for later.  */
4553                 rtx tem;
4554                 rtx label = gen_label_rtx ();
4555                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4556                                  compute_mode, label);
4557                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4558                                     NULL_RTX, 0, OPTAB_WIDEN);
4559                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4560                 expand_inc (quotient, const1_rtx);
4561                 expand_dec (remainder, op1);
4562                 emit_label (label);
4563                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4564               }
4565
4566             /* No luck with division elimination or divmod.  Have to do it
4567                by conditionally adjusting op0 *and* the result.  */
4568             {
4569               rtx label1, label2, label3, label4, label5;
4570               rtx adjusted_op0;
4571               rtx tem;
4572
4573               quotient = gen_reg_rtx (compute_mode);
4574               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4575               label1 = gen_label_rtx ();
4576               label2 = gen_label_rtx ();
4577               label3 = gen_label_rtx ();
4578               label4 = gen_label_rtx ();
4579               label5 = gen_label_rtx ();
4580               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4581               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4582                                compute_mode, label1);
4583               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4584                                   quotient, 0, OPTAB_LIB_WIDEN);
4585               if (tem != quotient)
4586                 emit_move_insn (quotient, tem);
4587               emit_jump_insn (gen_jump (label5));
4588               emit_barrier ();
4589               emit_label (label1);
4590               expand_dec (adjusted_op0, const1_rtx);
4591               emit_jump_insn (gen_jump (label4));
4592               emit_barrier ();
4593               emit_label (label2);
4594               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4595                                compute_mode, label3);
4596               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4597                                   quotient, 0, OPTAB_LIB_WIDEN);
4598               if (tem != quotient)
4599                 emit_move_insn (quotient, tem);
4600               emit_jump_insn (gen_jump (label5));
4601               emit_barrier ();
4602               emit_label (label3);
4603               expand_inc (adjusted_op0, const1_rtx);
4604               emit_label (label4);
4605               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4606                                   quotient, 0, OPTAB_LIB_WIDEN);
4607               if (tem != quotient)
4608                 emit_move_insn (quotient, tem);
4609               expand_inc (quotient, const1_rtx);
4610               emit_label (label5);
4611             }
4612           }
4613         break;
4614
4615       case EXACT_DIV_EXPR:
4616         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4617           {
4618             HOST_WIDE_INT d = INTVAL (op1);
4619             unsigned HOST_WIDE_INT ml;
4620             int pre_shift;
4621             rtx t1;
4622
4623             pre_shift = floor_log2 (d & -d);
4624             ml = invert_mod2n (d >> pre_shift, size);
4625             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4626                                build_int_cst (NULL_TREE, pre_shift),
4627                                NULL_RTX, unsignedp);
4628             quotient = expand_mult (compute_mode, t1,
4629                                     gen_int_mode (ml, compute_mode),
4630                                     NULL_RTX, 1);
4631
4632             insn = get_last_insn ();
4633             set_unique_reg_note (insn,
4634                                  REG_EQUAL,
4635                                  gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4636                                                  compute_mode,
4637                                                  op0, op1));
4638           }
4639         break;
4640
4641       case ROUND_DIV_EXPR:
4642       case ROUND_MOD_EXPR:
4643         if (unsignedp)
4644           {
4645             rtx tem;
4646             rtx label;
4647             label = gen_label_rtx ();
4648             quotient = gen_reg_rtx (compute_mode);
4649             remainder = gen_reg_rtx (compute_mode);
4650             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4651               {
4652                 rtx tem;
4653                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4654                                          quotient, 1, OPTAB_LIB_WIDEN);
4655                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4656                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4657                                           remainder, 1, OPTAB_LIB_WIDEN);
4658               }
4659             tem = plus_constant (op1, -1);
4660             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4661                                 build_int_cst (NULL_TREE, 1),
4662                                 NULL_RTX, 1);
4663             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4664             expand_inc (quotient, const1_rtx);
4665             expand_dec (remainder, op1);
4666             emit_label (label);
4667           }
4668         else
4669           {
4670             rtx abs_rem, abs_op1, tem, mask;
4671             rtx label;
4672             label = gen_label_rtx ();
4673             quotient = gen_reg_rtx (compute_mode);
4674             remainder = gen_reg_rtx (compute_mode);
4675             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4676               {
4677                 rtx tem;
4678                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4679                                          quotient, 0, OPTAB_LIB_WIDEN);
4680                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4681                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4682                                           remainder, 0, OPTAB_LIB_WIDEN);
4683               }
4684             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4685             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4686             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4687                                 build_int_cst (NULL_TREE, 1),
4688                                 NULL_RTX, 1);
4689             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4690             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4691                                 NULL_RTX, 0, OPTAB_WIDEN);
4692             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4693                                  build_int_cst (NULL_TREE, size - 1),
4694                                  NULL_RTX, 0);
4695             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4696                                 NULL_RTX, 0, OPTAB_WIDEN);
4697             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4698                                 NULL_RTX, 0, OPTAB_WIDEN);
4699             expand_inc (quotient, tem);
4700             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4701                                 NULL_RTX, 0, OPTAB_WIDEN);
4702             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4703                                 NULL_RTX, 0, OPTAB_WIDEN);
4704             expand_dec (remainder, tem);
4705             emit_label (label);
4706           }
4707         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4708
4709       default:
4710         gcc_unreachable ();
4711       }
4712
4713   if (quotient == 0)
4714     {
4715       if (target && GET_MODE (target) != compute_mode)
4716         target = 0;
4717
4718       if (rem_flag)
4719         {
4720           /* Try to produce the remainder without producing the quotient.
4721              If we seem to have a divmod pattern that does not require widening,
4722              don't try widening here.  We should really have a WIDEN argument
4723              to expand_twoval_binop, since what we'd really like to do here is
4724              1) try a mod insn in compute_mode
4725              2) try a divmod insn in compute_mode
4726              3) try a div insn in compute_mode and multiply-subtract to get
4727                 remainder
4728              4) try the same things with widening allowed.  */
4729           remainder
4730             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4731                                  op0, op1, target,
4732                                  unsignedp,
4733                                  ((optab2->handlers[compute_mode].insn_code
4734                                    != CODE_FOR_nothing)
4735                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4736           if (remainder == 0)
4737             {
4738               /* No luck there.  Can we do remainder and divide at once
4739                  without a library call?  */
4740               remainder = gen_reg_rtx (compute_mode);
4741               if (! expand_twoval_binop ((unsignedp
4742                                           ? udivmod_optab
4743                                           : sdivmod_optab),
4744                                          op0, op1,
4745                                          NULL_RTX, remainder, unsignedp))
4746                 remainder = 0;
4747             }
4748
4749           if (remainder)
4750             return gen_lowpart (mode, remainder);
4751         }
4752
4753       /* Produce the quotient.  Try a quotient insn, but not a library call.
4754          If we have a divmod in this mode, use it in preference to widening
4755          the div (for this test we assume it will not fail). Note that optab2
4756          is set to the one of the two optabs that the call below will use.  */
4757       quotient
4758         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4759                              op0, op1, rem_flag ? NULL_RTX : target,
4760                              unsignedp,
4761                              ((optab2->handlers[compute_mode].insn_code
4762                                != CODE_FOR_nothing)
4763                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4764
4765       if (quotient == 0)
4766         {
4767           /* No luck there.  Try a quotient-and-remainder insn,
4768              keeping the quotient alone.  */
4769           quotient = gen_reg_rtx (compute_mode);
4770           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4771                                      op0, op1,
4772                                      quotient, NULL_RTX, unsignedp))
4773             {
4774               quotient = 0;
4775               if (! rem_flag)
4776                 /* Still no luck.  If we are not computing the remainder,
4777                    use a library call for the quotient.  */
4778                 quotient = sign_expand_binop (compute_mode,
4779                                               udiv_optab, sdiv_optab,
4780                                               op0, op1, target,
4781                                               unsignedp, OPTAB_LIB_WIDEN);
4782             }
4783         }
4784     }
4785
4786   if (rem_flag)
4787     {
4788       if (target && GET_MODE (target) != compute_mode)
4789         target = 0;
4790
4791       if (quotient == 0)
4792         {
4793           /* No divide instruction either.  Use library for remainder.  */
4794           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4795                                          op0, op1, target,
4796                                          unsignedp, OPTAB_LIB_WIDEN);
4797           /* No remainder function.  Try a quotient-and-remainder
4798              function, keeping the remainder.  */
4799           if (!remainder)
4800             {
4801               remainder = gen_reg_rtx (compute_mode);
4802               if (!expand_twoval_binop_libfunc
4803                   (unsignedp ? udivmod_optab : sdivmod_optab,
4804                    op0, op1,
4805                    NULL_RTX, remainder,
4806                    unsignedp ? UMOD : MOD))
4807                 remainder = NULL_RTX;
4808             }
4809         }
4810       else
4811         {
4812           /* We divided.  Now finish doing X - Y * (X / Y).  */
4813           remainder = expand_mult (compute_mode, quotient, op1,
4814                                    NULL_RTX, unsignedp);
4815           remainder = expand_binop (compute_mode, sub_optab, op0,
4816                                     remainder, target, unsignedp,
4817                                     OPTAB_LIB_WIDEN);
4818         }
4819     }
4820
4821   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4822 }
4823 \f
4824 /* Return a tree node with data type TYPE, describing the value of X.
4825    Usually this is an VAR_DECL, if there is no obvious better choice.
4826    X may be an expression, however we only support those expressions
4827    generated by loop.c.  */
4828
4829 tree
4830 make_tree (tree type, rtx x)
4831 {
4832   tree t;
4833
4834   switch (GET_CODE (x))
4835     {
4836     case CONST_INT:
4837       {
4838         HOST_WIDE_INT hi = 0;
4839
4840         if (INTVAL (x) < 0
4841             && !(TYPE_UNSIGNED (type)
4842                  && (GET_MODE_BITSIZE (TYPE_MODE (type))
4843                      < HOST_BITS_PER_WIDE_INT)))
4844           hi = -1;
4845
4846         t = build_int_cst_wide (type, INTVAL (x), hi);
4847
4848         return t;
4849       }
4850
4851     case CONST_DOUBLE:
4852       if (GET_MODE (x) == VOIDmode)
4853         t = build_int_cst_wide (type,
4854                                 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4855       else
4856         {
4857           REAL_VALUE_TYPE d;
4858
4859           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4860           t = build_real (type, d);
4861         }
4862
4863       return t;
4864
4865     case CONST_VECTOR:
4866       {
4867         int i, units;
4868         rtx elt;
4869         tree t = NULL_TREE;
4870
4871         units = CONST_VECTOR_NUNITS (x);
4872
4873         /* Build a tree with vector elements.  */
4874         for (i = units - 1; i >= 0; --i)
4875           {
4876             elt = CONST_VECTOR_ELT (x, i);
4877             t = tree_cons (NULL_TREE, make_tree (type, elt), t);
4878           }
4879
4880         return build_vector (type, t);
4881       }
4882
4883     case PLUS:
4884       return fold (build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4885                            make_tree (type, XEXP (x, 1))));
4886
4887     case MINUS:
4888       return fold (build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4889                            make_tree (type, XEXP (x, 1))));
4890
4891     case NEG:
4892       return fold (build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0))));
4893
4894     case MULT:
4895       return fold (build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4896                            make_tree (type, XEXP (x, 1))));
4897
4898     case ASHIFT:
4899       return fold (build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4900                            make_tree (type, XEXP (x, 1))));
4901
4902     case LSHIFTRT:
4903       t = lang_hooks.types.unsigned_type (type);
4904       return fold_convert (type, build2 (RSHIFT_EXPR, t,
4905                                          make_tree (t, XEXP (x, 0)),
4906                                          make_tree (type, XEXP (x, 1))));
4907
4908     case ASHIFTRT:
4909       t = lang_hooks.types.signed_type (type);
4910       return fold_convert (type, build2 (RSHIFT_EXPR, t,
4911                                          make_tree (t, XEXP (x, 0)),
4912                                          make_tree (type, XEXP (x, 1))));
4913
4914     case DIV:
4915       if (TREE_CODE (type) != REAL_TYPE)
4916         t = lang_hooks.types.signed_type (type);
4917       else
4918         t = type;
4919
4920       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
4921                                          make_tree (t, XEXP (x, 0)),
4922                                          make_tree (t, XEXP (x, 1))));
4923     case UDIV:
4924       t = lang_hooks.types.unsigned_type (type);
4925       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
4926                                          make_tree (t, XEXP (x, 0)),
4927                                          make_tree (t, XEXP (x, 1))));
4928
4929     case SIGN_EXTEND:
4930     case ZERO_EXTEND:
4931       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
4932                                           GET_CODE (x) == ZERO_EXTEND);
4933       return fold_convert (type, make_tree (t, XEXP (x, 0)));
4934
4935     default:
4936       t = build_decl (VAR_DECL, NULL_TREE, type);
4937
4938       /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
4939          ptr_mode.  So convert.  */
4940       if (POINTER_TYPE_P (type))
4941         x = convert_memory_address (TYPE_MODE (type), x);
4942
4943       /* Note that we do *not* use SET_DECL_RTL here, because we do not
4944          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
4945       t->decl.rtl = x;
4946
4947       return t;
4948     }
4949 }
4950
4951 /* Check whether the multiplication X * MULT + ADD overflows.
4952    X, MULT and ADD must be CONST_*.
4953    MODE is the machine mode for the computation.
4954    X and MULT must have mode MODE.  ADD may have a different mode.
4955    So can X (defaults to same as MODE).
4956    UNSIGNEDP is nonzero to do unsigned multiplication.  */
4957
4958 bool
4959 const_mult_add_overflow_p (rtx x, rtx mult, rtx add,
4960                            enum machine_mode mode, int unsignedp)
4961 {
4962   tree type, mult_type, add_type, result;
4963
4964   type = lang_hooks.types.type_for_mode (mode, unsignedp);
4965
4966   /* In order to get a proper overflow indication from an unsigned
4967      type, we have to pretend that it's a sizetype.  */
4968   mult_type = type;
4969   if (unsignedp)
4970     {
4971       /* FIXME:It would be nice if we could step directly from this
4972          type to its sizetype equivalent.  */
4973       mult_type = build_distinct_type_copy (type);
4974       TYPE_IS_SIZETYPE (mult_type) = 1;
4975     }
4976
4977   add_type = (GET_MODE (add) == VOIDmode ? mult_type
4978               : lang_hooks.types.type_for_mode (GET_MODE (add), unsignedp));
4979
4980   result = fold (build2 (PLUS_EXPR, mult_type,
4981                          fold (build2 (MULT_EXPR, mult_type,
4982                                        make_tree (mult_type, x),
4983                                        make_tree (mult_type, mult))),
4984                          make_tree (add_type, add)));
4985
4986   return TREE_CONSTANT_OVERFLOW (result);
4987 }
4988
4989 /* Return an rtx representing the value of X * MULT + ADD.
4990    TARGET is a suggestion for where to store the result (an rtx).
4991    MODE is the machine mode for the computation.
4992    X and MULT must have mode MODE.  ADD may have a different mode.
4993    So can X (defaults to same as MODE).
4994    UNSIGNEDP is nonzero to do unsigned multiplication.
4995    This may emit insns.  */
4996
4997 rtx
4998 expand_mult_add (rtx x, rtx target, rtx mult, rtx add, enum machine_mode mode,
4999                  int unsignedp)
5000 {
5001   tree type = lang_hooks.types.type_for_mode (mode, unsignedp);
5002   tree add_type = (GET_MODE (add) == VOIDmode
5003                    ? type: lang_hooks.types.type_for_mode (GET_MODE (add),
5004                                                            unsignedp));
5005   tree result =  fold (build2 (PLUS_EXPR, type,
5006                                fold (build2 (MULT_EXPR, type,
5007                                              make_tree (type, x),
5008                                              make_tree (type, mult))),
5009                                make_tree (add_type, add)));
5010
5011   return expand_expr (result, target, VOIDmode, 0);
5012 }
5013 \f
5014 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5015    and returning TARGET.
5016
5017    If TARGET is 0, a pseudo-register or constant is returned.  */
5018
5019 rtx
5020 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5021 {
5022   rtx tem = 0;
5023
5024   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5025     tem = simplify_binary_operation (AND, mode, op0, op1);
5026   if (tem == 0)
5027     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5028
5029   if (target == 0)
5030     target = tem;
5031   else if (tem != target)
5032     emit_move_insn (target, tem);
5033   return target;
5034 }
5035 \f
5036 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5037    and storing in TARGET.  Normally return TARGET.
5038    Return 0 if that cannot be done.
5039
5040    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5041    it is VOIDmode, they cannot both be CONST_INT.
5042
5043    UNSIGNEDP is for the case where we have to widen the operands
5044    to perform the operation.  It says to use zero-extension.
5045
5046    NORMALIZEP is 1 if we should convert the result to be either zero
5047    or one.  Normalize is -1 if we should convert the result to be
5048    either zero or -1.  If NORMALIZEP is zero, the result will be left
5049    "raw" out of the scc insn.  */
5050
5051 rtx
5052 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5053                  enum machine_mode mode, int unsignedp, int normalizep)
5054 {
5055   rtx subtarget;
5056   enum insn_code icode;
5057   enum machine_mode compare_mode;
5058   enum machine_mode target_mode = GET_MODE (target);
5059   rtx tem;
5060   rtx last = get_last_insn ();
5061   rtx pattern, comparison;
5062
5063   if (unsignedp)
5064     code = unsigned_condition (code);
5065
5066   /* If one operand is constant, make it the second one.  Only do this
5067      if the other operand is not constant as well.  */
5068
5069   if (swap_commutative_operands_p (op0, op1))
5070     {
5071       tem = op0;
5072       op0 = op1;
5073       op1 = tem;
5074       code = swap_condition (code);
5075     }
5076
5077   if (mode == VOIDmode)
5078     mode = GET_MODE (op0);
5079
5080   /* For some comparisons with 1 and -1, we can convert this to
5081      comparisons with zero.  This will often produce more opportunities for
5082      store-flag insns.  */
5083
5084   switch (code)
5085     {
5086     case LT:
5087       if (op1 == const1_rtx)
5088         op1 = const0_rtx, code = LE;
5089       break;
5090     case LE:
5091       if (op1 == constm1_rtx)
5092         op1 = const0_rtx, code = LT;
5093       break;
5094     case GE:
5095       if (op1 == const1_rtx)
5096         op1 = const0_rtx, code = GT;
5097       break;
5098     case GT:
5099       if (op1 == constm1_rtx)
5100         op1 = const0_rtx, code = GE;
5101       break;
5102     case GEU:
5103       if (op1 == const1_rtx)
5104         op1 = const0_rtx, code = NE;
5105       break;
5106     case LTU:
5107       if (op1 == const1_rtx)
5108         op1 = const0_rtx, code = EQ;
5109       break;
5110     default:
5111       break;
5112     }
5113
5114   /* If we are comparing a double-word integer with zero or -1, we can
5115      convert the comparison into one involving a single word.  */
5116   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5117       && GET_MODE_CLASS (mode) == MODE_INT
5118       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5119     {
5120       if ((code == EQ || code == NE)
5121           && (op1 == const0_rtx || op1 == constm1_rtx))
5122         {
5123           rtx op00, op01, op0both;
5124
5125           /* Do a logical OR or AND of the two words and compare the result.  */
5126           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5127           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5128           op0both = expand_binop (word_mode,
5129                                   op1 == const0_rtx ? ior_optab : and_optab,
5130                                   op00, op01, NULL_RTX, unsignedp, OPTAB_DIRECT);
5131
5132           if (op0both != 0)
5133             return emit_store_flag (target, code, op0both, op1, word_mode,
5134                                     unsignedp, normalizep);
5135         }
5136       else if ((code == LT || code == GE) && op1 == const0_rtx)
5137         {
5138           rtx op0h;
5139
5140           /* If testing the sign bit, can just test on high word.  */
5141           op0h = simplify_gen_subreg (word_mode, op0, mode,
5142                                       subreg_highpart_offset (word_mode, mode));
5143           return emit_store_flag (target, code, op0h, op1, word_mode,
5144                                   unsignedp, normalizep);
5145         }
5146     }
5147
5148   /* From now on, we won't change CODE, so set ICODE now.  */
5149   icode = setcc_gen_code[(int) code];
5150
5151   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5152      complement of A (for GE) and shifting the sign bit to the low bit.  */
5153   if (op1 == const0_rtx && (code == LT || code == GE)
5154       && GET_MODE_CLASS (mode) == MODE_INT
5155       && (normalizep || STORE_FLAG_VALUE == 1
5156           || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5157               && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5158                   == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
5159     {
5160       subtarget = target;
5161
5162       /* If the result is to be wider than OP0, it is best to convert it
5163          first.  If it is to be narrower, it is *incorrect* to convert it
5164          first.  */
5165       if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5166         {
5167           op0 = convert_modes (target_mode, mode, op0, 0);
5168           mode = target_mode;
5169         }
5170
5171       if (target_mode != mode)
5172         subtarget = 0;
5173
5174       if (code == GE)
5175         op0 = expand_unop (mode, one_cmpl_optab, op0,
5176                            ((STORE_FLAG_VALUE == 1 || normalizep)
5177                             ? 0 : subtarget), 0);
5178
5179       if (STORE_FLAG_VALUE == 1 || normalizep)
5180         /* If we are supposed to produce a 0/1 value, we want to do
5181            a logical shift from the sign bit to the low-order bit; for
5182            a -1/0 value, we do an arithmetic shift.  */
5183         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5184                             size_int (GET_MODE_BITSIZE (mode) - 1),
5185                             subtarget, normalizep != -1);
5186
5187       if (mode != target_mode)
5188         op0 = convert_modes (target_mode, mode, op0, 0);
5189
5190       return op0;
5191     }
5192
5193   if (icode != CODE_FOR_nothing)
5194     {
5195       insn_operand_predicate_fn pred;
5196
5197       /* We think we may be able to do this with a scc insn.  Emit the
5198          comparison and then the scc insn.  */
5199
5200       do_pending_stack_adjust ();
5201       last = get_last_insn ();
5202
5203       comparison
5204         = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX);
5205       if (CONSTANT_P (comparison))
5206         {
5207           switch (GET_CODE (comparison))
5208             {
5209             case CONST_INT:
5210               if (comparison == const0_rtx)
5211                 return const0_rtx;
5212               break;
5213
5214 #ifdef FLOAT_STORE_FLAG_VALUE
5215             case CONST_DOUBLE:
5216               if (comparison == CONST0_RTX (GET_MODE (comparison)))
5217                 return const0_rtx;
5218               break;
5219 #endif
5220             default:
5221               gcc_unreachable ();
5222             }
5223
5224           if (normalizep == 1)
5225             return const1_rtx;
5226           if (normalizep == -1)
5227             return constm1_rtx;
5228           return const_true_rtx;
5229         }
5230
5231       /* The code of COMPARISON may not match CODE if compare_from_rtx
5232          decided to swap its operands and reverse the original code.
5233
5234          We know that compare_from_rtx returns either a CONST_INT or
5235          a new comparison code, so it is safe to just extract the
5236          code from COMPARISON.  */
5237       code = GET_CODE (comparison);
5238
5239       /* Get a reference to the target in the proper mode for this insn.  */
5240       compare_mode = insn_data[(int) icode].operand[0].mode;
5241       subtarget = target;
5242       pred = insn_data[(int) icode].operand[0].predicate;
5243       if (optimize || ! (*pred) (subtarget, compare_mode))
5244         subtarget = gen_reg_rtx (compare_mode);
5245
5246       pattern = GEN_FCN (icode) (subtarget);
5247       if (pattern)
5248         {
5249           emit_insn (pattern);
5250
5251           /* If we are converting to a wider mode, first convert to
5252              TARGET_MODE, then normalize.  This produces better combining
5253              opportunities on machines that have a SIGN_EXTRACT when we are
5254              testing a single bit.  This mostly benefits the 68k.
5255
5256              If STORE_FLAG_VALUE does not have the sign bit set when
5257              interpreted in COMPARE_MODE, we can do this conversion as
5258              unsigned, which is usually more efficient.  */
5259           if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (compare_mode))
5260             {
5261               convert_move (target, subtarget,
5262                             (GET_MODE_BITSIZE (compare_mode)
5263                              <= HOST_BITS_PER_WIDE_INT)
5264                             && 0 == (STORE_FLAG_VALUE
5265                                      & ((HOST_WIDE_INT) 1
5266                                         << (GET_MODE_BITSIZE (compare_mode) -1))));
5267               op0 = target;
5268               compare_mode = target_mode;
5269             }
5270           else
5271             op0 = subtarget;
5272
5273           /* If we want to keep subexpressions around, don't reuse our
5274              last target.  */
5275
5276           if (optimize)
5277             subtarget = 0;
5278
5279           /* Now normalize to the proper value in COMPARE_MODE.  Sometimes
5280              we don't have to do anything.  */
5281           if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5282             ;
5283           /* STORE_FLAG_VALUE might be the most negative number, so write
5284              the comparison this way to avoid a compiler-time warning.  */
5285           else if (- normalizep == STORE_FLAG_VALUE)
5286             op0 = expand_unop (compare_mode, neg_optab, op0, subtarget, 0);
5287
5288           /* We don't want to use STORE_FLAG_VALUE < 0 below since this
5289              makes it hard to use a value of just the sign bit due to
5290              ANSI integer constant typing rules.  */
5291           else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_WIDE_INT
5292                    && (STORE_FLAG_VALUE
5293                        & ((HOST_WIDE_INT) 1
5294                           << (GET_MODE_BITSIZE (compare_mode) - 1))))
5295             op0 = expand_shift (RSHIFT_EXPR, compare_mode, op0,
5296                                 size_int (GET_MODE_BITSIZE (compare_mode) - 1),
5297                                 subtarget, normalizep == 1);
5298           else
5299             {
5300               gcc_assert (STORE_FLAG_VALUE & 1);
5301
5302               op0 = expand_and (compare_mode, op0, const1_rtx, subtarget);
5303               if (normalizep == -1)
5304                 op0 = expand_unop (compare_mode, neg_optab, op0, op0, 0);
5305             }
5306
5307           /* If we were converting to a smaller mode, do the
5308              conversion now.  */
5309           if (target_mode != compare_mode)
5310             {
5311               convert_move (target, op0, 0);
5312               return target;
5313             }
5314           else
5315             return op0;
5316         }
5317     }
5318
5319   delete_insns_since (last);
5320
5321   /* If optimizing, use different pseudo registers for each insn, instead
5322      of reusing the same pseudo.  This leads to better CSE, but slows
5323      down the compiler, since there are more pseudos */
5324   subtarget = (!optimize
5325                && (target_mode == mode)) ? target : NULL_RTX;
5326
5327   /* If we reached here, we can't do this with a scc insn.  However, there
5328      are some comparisons that can be done directly.  For example, if
5329      this is an equality comparison of integers, we can try to exclusive-or
5330      (or subtract) the two operands and use a recursive call to try the
5331      comparison with zero.  Don't do any of these cases if branches are
5332      very cheap.  */
5333
5334   if (BRANCH_COST > 0
5335       && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
5336       && op1 != const0_rtx)
5337     {
5338       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5339                           OPTAB_WIDEN);
5340
5341       if (tem == 0)
5342         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5343                             OPTAB_WIDEN);
5344       if (tem != 0)
5345         tem = emit_store_flag (target, code, tem, const0_rtx,
5346                                mode, unsignedp, normalizep);
5347       if (tem == 0)
5348         delete_insns_since (last);
5349       return tem;
5350     }
5351
5352   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5353      the constant zero.  Reject all other comparisons at this point.  Only
5354      do LE and GT if branches are expensive since they are expensive on
5355      2-operand machines.  */
5356
5357   if (BRANCH_COST == 0
5358       || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
5359       || (code != EQ && code != NE
5360           && (BRANCH_COST <= 1 || (code != LE && code != GT))))
5361     return 0;
5362
5363   /* See what we need to return.  We can only return a 1, -1, or the
5364      sign bit.  */
5365
5366   if (normalizep == 0)
5367     {
5368       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5369         normalizep = STORE_FLAG_VALUE;
5370
5371       else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5372                && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5373                    == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
5374         ;
5375       else
5376         return 0;
5377     }
5378
5379   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5380      do the necessary operation below.  */
5381
5382   tem = 0;
5383
5384   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5385      the sign bit set.  */
5386
5387   if (code == LE)
5388     {
5389       /* This is destructive, so SUBTARGET can't be OP0.  */
5390       if (rtx_equal_p (subtarget, op0))
5391         subtarget = 0;
5392
5393       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5394                           OPTAB_WIDEN);
5395       if (tem)
5396         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5397                             OPTAB_WIDEN);
5398     }
5399
5400   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5401      number of bits in the mode of OP0, minus one.  */
5402
5403   if (code == GT)
5404     {
5405       if (rtx_equal_p (subtarget, op0))
5406         subtarget = 0;
5407
5408       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5409                           size_int (GET_MODE_BITSIZE (mode) - 1),
5410                           subtarget, 0);
5411       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5412                           OPTAB_WIDEN);
5413     }
5414
5415   if (code == EQ || code == NE)
5416     {
5417       /* For EQ or NE, one way to do the comparison is to apply an operation
5418          that converts the operand into a positive number if it is nonzero
5419          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5420          for NE we negate.  This puts the result in the sign bit.  Then we
5421          normalize with a shift, if needed.
5422
5423          Two operations that can do the above actions are ABS and FFS, so try
5424          them.  If that doesn't work, and MODE is smaller than a full word,
5425          we can use zero-extension to the wider mode (an unsigned conversion)
5426          as the operation.  */
5427
5428       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5429          that is compensated by the subsequent overflow when subtracting
5430          one / negating.  */
5431
5432       if (abs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
5433         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5434       else if (ffs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
5435         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5436       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5437         {
5438           tem = convert_modes (word_mode, mode, op0, 1);
5439           mode = word_mode;
5440         }
5441
5442       if (tem != 0)
5443         {
5444           if (code == EQ)
5445             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5446                                 0, OPTAB_WIDEN);
5447           else
5448             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5449         }
5450
5451       /* If we couldn't do it that way, for NE we can "or" the two's complement
5452          of the value with itself.  For EQ, we take the one's complement of
5453          that "or", which is an extra insn, so we only handle EQ if branches
5454          are expensive.  */
5455
5456       if (tem == 0 && (code == NE || BRANCH_COST > 1))
5457         {
5458           if (rtx_equal_p (subtarget, op0))
5459             subtarget = 0;
5460
5461           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5462           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5463                               OPTAB_WIDEN);
5464
5465           if (tem && code == EQ)
5466             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5467         }
5468     }
5469
5470   if (tem && normalizep)
5471     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5472                         size_int (GET_MODE_BITSIZE (mode) - 1),
5473                         subtarget, normalizep == 1);
5474
5475   if (tem)
5476     {
5477       if (GET_MODE (tem) != target_mode)
5478         {
5479           convert_move (target, tem, 0);
5480           tem = target;
5481         }
5482       else if (!subtarget)
5483         {
5484           emit_move_insn (target, tem);
5485           tem = target;
5486         }
5487     }
5488   else
5489     delete_insns_since (last);
5490
5491   return tem;
5492 }
5493
5494 /* Like emit_store_flag, but always succeeds.  */
5495
5496 rtx
5497 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5498                        enum machine_mode mode, int unsignedp, int normalizep)
5499 {
5500   rtx tem, label;
5501
5502   /* First see if emit_store_flag can do the job.  */
5503   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5504   if (tem != 0)
5505     return tem;
5506
5507   if (normalizep == 0)
5508     normalizep = 1;
5509
5510   /* If this failed, we have to do this with set/compare/jump/set code.  */
5511
5512   if (!REG_P (target)
5513       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5514     target = gen_reg_rtx (GET_MODE (target));
5515
5516   emit_move_insn (target, const1_rtx);
5517   label = gen_label_rtx ();
5518   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5519                            NULL_RTX, label);
5520
5521   emit_move_insn (target, const0_rtx);
5522   emit_label (label);
5523
5524   return target;
5525 }
5526 \f
5527 /* Perform possibly multi-word comparison and conditional jump to LABEL
5528    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE
5529
5530    The algorithm is based on the code in expr.c:do_jump.
5531
5532    Note that this does not perform a general comparison.  Only variants
5533    generated within expmed.c are correctly handled, others abort (but could
5534    be handled if needed).  */
5535
5536 static void
5537 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5538                  rtx label)
5539 {
5540   /* If this mode is an integer too wide to compare properly,
5541      compare word by word.  Rely on cse to optimize constant cases.  */
5542
5543   if (GET_MODE_CLASS (mode) == MODE_INT
5544       && ! can_compare_p (op, mode, ccp_jump))
5545     {
5546       rtx label2 = gen_label_rtx ();
5547
5548       switch (op)
5549         {
5550         case LTU:
5551           do_jump_by_parts_greater_rtx (mode, 1, arg2, arg1, label2, label);
5552           break;
5553
5554         case LEU:
5555           do_jump_by_parts_greater_rtx (mode, 1, arg1, arg2, label, label2);
5556           break;
5557
5558         case LT:
5559           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label2, label);
5560           break;
5561
5562         case GT:
5563           do_jump_by_parts_greater_rtx (mode, 0, arg1, arg2, label2, label);
5564           break;
5565
5566         case GE:
5567           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label, label2);
5568           break;
5569
5570           /* do_jump_by_parts_equality_rtx compares with zero.  Luckily
5571              that's the only equality operations we do */
5572         case EQ:
5573           gcc_assert (arg2 == const0_rtx && mode == GET_MODE(arg1));
5574           do_jump_by_parts_equality_rtx (arg1, label2, label);
5575           break;
5576
5577         case NE:
5578           gcc_assert (arg2 == const0_rtx && mode == GET_MODE(arg1));
5579           do_jump_by_parts_equality_rtx (arg1, label, label2);
5580           break;
5581
5582         default:
5583           gcc_unreachable ();
5584         }
5585
5586       emit_label (label2);
5587     }
5588   else
5589     emit_cmp_and_jump_insns (arg1, arg2, op, NULL_RTX, mode, 0, label);
5590 }