gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 2, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING.  If not, write to the Free
  20 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  21 02111-1307, USA.  */
  22
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "coretypes.h"
  27 #include "tm.h"
  28 #include "toplev.h"
  29 #include "rtl.h"
  30 #include "tree.h"
  31 #include "tm_p.h"
  32 #include "flags.h"
  33 #include "insn-config.h"
  34 #include "expr.h"
  35 #include "optabs.h"
  36 #include "real.h"
  37 #include "recog.h"
  38 #include "langhooks.h"
  39
  40 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  41                                    unsigned HOST_WIDE_INT,
  42                                    unsigned HOST_WIDE_INT, rtx);
  43 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  44                                    unsigned HOST_WIDE_INT, rtx);
  45 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  46                                     unsigned HOST_WIDE_INT,
  47                                     unsigned HOST_WIDE_INT,
  48                                     unsigned HOST_WIDE_INT, rtx, int);
  49 static rtx mask_rtx (enum machine_mode, int, int, int);
  50 static rtx lshift_value (enum machine_mode, rtx, int, int);
  51 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  52                                     unsigned HOST_WIDE_INT, int);
  53 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  54 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  55 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  56
  57 /* Nonzero means divides or modulus operations are relatively cheap for
  58    powers of two, so don't use branches; emit the operation instead.
  59    Usually, this will mean that the MD file will emit non-branch
  60    sequences.  */
  61
  62 static bool sdiv_pow2_cheap[NUM_MACHINE_MODES];
  63 static bool smod_pow2_cheap[NUM_MACHINE_MODES];
  64
  65 #ifndef SLOW_UNALIGNED_ACCESS
  66 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  67 #endif
  68
  69 /* For compilers that support multiple targets with different word sizes,
  70    MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
  71    is the H8/300(H) compiler.  */
  72
  73 #ifndef MAX_BITS_PER_WORD
  74 #define MAX_BITS_PER_WORD BITS_PER_WORD
  75 #endif
  76
  77 /* Reduce conditional compilation elsewhere.  */
  78 #ifndef HAVE_insv
  79 #define HAVE_insv       0
  80 #define CODE_FOR_insv   CODE_FOR_nothing
  81 #define gen_insv(a,b,c,d) NULL_RTX
  82 #endif
  83 #ifndef HAVE_extv
  84 #define HAVE_extv       0
  85 #define CODE_FOR_extv   CODE_FOR_nothing
  86 #define gen_extv(a,b,c,d) NULL_RTX
  87 #endif
  88 #ifndef HAVE_extzv
  89 #define HAVE_extzv      0
  90 #define CODE_FOR_extzv  CODE_FOR_nothing
  91 #define gen_extzv(a,b,c,d) NULL_RTX
  92 #endif
  93
  94 /* Cost of various pieces of RTL.  Note that some of these are indexed by
  95    shift count and some by mode.  */
  96 static int zero_cost;
  97 static int add_cost[NUM_MACHINE_MODES];
  98 static int neg_cost[NUM_MACHINE_MODES];
  99 static int shift_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 100 static int shiftadd_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 101 static int shiftsub_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 102 static int mul_cost[NUM_MACHINE_MODES];
 103 static int div_cost[NUM_MACHINE_MODES];
 104 static int mul_widen_cost[NUM_MACHINE_MODES];
 105 static int mul_highpart_cost[NUM_MACHINE_MODES];
 106
 107 void
 108 init_expmed (void)
 109 {
 110   struct
 111   {
 112     struct rtx_def reg;         rtunion reg_fld[2];
 113     struct rtx_def plus;        rtunion plus_fld1;
 114     struct rtx_def neg;
 115     struct rtx_def udiv;        rtunion udiv_fld1;
 116     struct rtx_def mult;        rtunion mult_fld1;
 117     struct rtx_def div;         rtunion div_fld1;
 118     struct rtx_def mod;         rtunion mod_fld1;
 119     struct rtx_def zext;
 120     struct rtx_def wide_mult;   rtunion wide_mult_fld1;
 121     struct rtx_def wide_lshr;   rtunion wide_lshr_fld1;
 122     struct rtx_def wide_trunc;
 123     struct rtx_def shift;       rtunion shift_fld1;
 124     struct rtx_def shift_mult;  rtunion shift_mult_fld1;
 125     struct rtx_def shift_add;   rtunion shift_add_fld1;
 126     struct rtx_def shift_sub;   rtunion shift_sub_fld1;
 127   } all;
 128
 129   rtx pow2[MAX_BITS_PER_WORD];
 130   rtx cint[MAX_BITS_PER_WORD];
 131   int m, n;
 132   enum machine_mode mode, wider_mode;
 133
 134   zero_cost = rtx_cost (const0_rtx, 0);
 135
 136   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 137     {
 138       pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 139       cint[m] = GEN_INT (m);
 140     }
 141
 142   memset (&all, 0, sizeof all);
 143
 144   PUT_CODE (&all.reg, REG);
 145   REGNO (&all.reg) = 10000;
 146
 147   PUT_CODE (&all.plus, PLUS);
 148   XEXP (&all.plus, 0) = &all.reg;
 149   XEXP (&all.plus, 1) = &all.reg;
 150
 151   PUT_CODE (&all.neg, NEG);
 152   XEXP (&all.neg, 0) = &all.reg;
 153
 154   PUT_CODE (&all.udiv, UDIV);
 155   XEXP (&all.udiv, 0) = &all.reg;
 156   XEXP (&all.udiv, 1) = &all.reg;
 157
 158   PUT_CODE (&all.mult, MULT);
 159   XEXP (&all.mult, 0) = &all.reg;
 160   XEXP (&all.mult, 1) = &all.reg;
 161
 162   PUT_CODE (&all.div, DIV);
 163   XEXP (&all.div, 0) = &all.reg;
 164   XEXP (&all.div, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
 165
 166   PUT_CODE (&all.mod, MOD);
 167   XEXP (&all.mod, 0) = &all.reg;
 168   XEXP (&all.mod, 1) = XEXP (&all.div, 1);
 169
 170   PUT_CODE (&all.zext, ZERO_EXTEND);
 171   XEXP (&all.zext, 0) = &all.reg;
 172
 173   PUT_CODE (&all.wide_mult, MULT);
 174   XEXP (&all.wide_mult, 0) = &all.zext;
 175   XEXP (&all.wide_mult, 1) = &all.zext;
 176
 177   PUT_CODE (&all.wide_lshr, LSHIFTRT);
 178   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
 179
 180   PUT_CODE (&all.wide_trunc, TRUNCATE);
 181   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
 182
 183   PUT_CODE (&all.shift, ASHIFT);
 184   XEXP (&all.shift, 0) = &all.reg;
 185
 186   PUT_CODE (&all.shift_mult, MULT);
 187   XEXP (&all.shift_mult, 0) = &all.reg;
 188
 189   PUT_CODE (&all.shift_add, PLUS);
 190   XEXP (&all.shift_add, 0) = &all.shift_mult;
 191   XEXP (&all.shift_add, 1) = &all.reg;
 192
 193   PUT_CODE (&all.shift_sub, MINUS);
 194   XEXP (&all.shift_sub, 0) = &all.shift_mult;
 195   XEXP (&all.shift_sub, 1) = &all.reg;
 196
 197   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 198        mode != VOIDmode;
 199        mode = GET_MODE_WIDER_MODE (mode))
 200     {
 201       PUT_MODE (&all.reg, mode);
 202       PUT_MODE (&all.plus, mode);
 203       PUT_MODE (&all.neg, mode);
 204       PUT_MODE (&all.udiv, mode);
 205       PUT_MODE (&all.mult, mode);
 206       PUT_MODE (&all.div, mode);
 207       PUT_MODE (&all.mod, mode);
 208       PUT_MODE (&all.wide_trunc, mode);
 209       PUT_MODE (&all.shift, mode);
 210       PUT_MODE (&all.shift_mult, mode);
 211       PUT_MODE (&all.shift_add, mode);
 212       PUT_MODE (&all.shift_sub, mode);
 213
 214       add_cost[mode] = rtx_cost (&all.plus, SET);
 215       neg_cost[mode] = rtx_cost (&all.neg, SET);
 216       div_cost[mode] = rtx_cost (&all.udiv, SET);
 217       mul_cost[mode] = rtx_cost (&all.mult, SET);
 218
 219       sdiv_pow2_cheap[mode] = (rtx_cost (&all.div, SET) <= 2 * add_cost[mode]);
 220       smod_pow2_cheap[mode] = (rtx_cost (&all.mod, SET) <= 4 * add_cost[mode]);
 221
 222       wider_mode = GET_MODE_WIDER_MODE (mode);
 223       if (wider_mode != VOIDmode)
 224         {
 225           PUT_MODE (&all.zext, wider_mode);
 226           PUT_MODE (&all.wide_mult, wider_mode);
 227           PUT_MODE (&all.wide_lshr, wider_mode);
 228           XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
 229
 230           mul_widen_cost[wider_mode] = rtx_cost (&all.wide_mult, SET);
 231           mul_highpart_cost[mode] = rtx_cost (&all.wide_trunc, SET);
 232         }
 233
 234       shift_cost[mode][0] = 0;
 235       shiftadd_cost[mode][0] = shiftsub_cost[mode][0] = add_cost[mode];
 236
 237       n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
 238       for (m = 1; m < n; m++)
 239         {
 240           XEXP (&all.shift, 1) = cint[m];
 241           XEXP (&all.shift_mult, 1) = pow2[m];
 242
 243           shift_cost[mode][m] = rtx_cost (&all.shift, SET);
 244           shiftadd_cost[mode][m] = rtx_cost (&all.shift_add, SET);
 245           shiftsub_cost[mode][m] = rtx_cost (&all.shift_sub, SET);
 246         }
 247     }
 248 }
 249
 250 /* Return an rtx representing minus the value of X.
 251    MODE is the intended mode of the result,
 252    useful if X is a CONST_INT.  */
 253
 254 rtx
 255 negate_rtx (enum machine_mode mode, rtx x)
 256 {
 257   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 258
 259   if (result == 0)
 260     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 261
 262   return result;
 263 }
 264
 265 /* Report on the availability of insv/extv/extzv and the desired mode
 266    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
 267    is false; else the mode of the specified operand.  If OPNO is -1,
 268    all the caller cares about is whether the insn is available.  */
 269 enum machine_mode
 270 mode_for_extraction (enum extraction_pattern pattern, int opno)
 271 {
 272   const struct insn_data *data;
 273
 274   switch (pattern)
 275     {
 276     case EP_insv:
 277       if (HAVE_insv)
 278         {
 279           data = &insn_data[CODE_FOR_insv];
 280           break;
 281         }
 282       return MAX_MACHINE_MODE;
 283
 284     case EP_extv:
 285       if (HAVE_extv)
 286         {
 287           data = &insn_data[CODE_FOR_extv];
 288           break;
 289         }
 290       return MAX_MACHINE_MODE;
 291
 292     case EP_extzv:
 293       if (HAVE_extzv)
 294         {
 295           data = &insn_data[CODE_FOR_extzv];
 296           break;
 297         }
 298       return MAX_MACHINE_MODE;
 299
 300     default:
 301       gcc_unreachable ();
 302     }
 303
 304   if (opno == -1)
 305     return VOIDmode;
 306
 307   /* Everyone who uses this function used to follow it with
 308      if (result == VOIDmode) result = word_mode; */
 309   if (data->operand[opno].mode == VOIDmode)
 310     return word_mode;
 311   return data->operand[opno].mode;
 312 }
 313
 314 \f
 315 /* Generate code to store value from rtx VALUE
 316    into a bit-field within structure STR_RTX
 317    containing BITSIZE bits starting at bit BITNUM.
 318    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
 319    ALIGN is the alignment that STR_RTX is known to have.
 320    TOTAL_SIZE is the size of the structure in bytes, or -1 if varying.  */
 321
 322 /* ??? Note that there are two different ideas here for how
 323    to determine the size to count bits within, for a register.
 324    One is BITS_PER_WORD, and the other is the size of operand 3
 325    of the insv pattern.
 326
 327    If operand 3 of the insv pattern is VOIDmode, then we will use BITS_PER_WORD
 328    else, we use the mode of operand 3.  */
 329
 330 rtx
 331 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 332                  unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
 333                  rtx value)
 334 {
 335   unsigned int unit
 336     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
 337   unsigned HOST_WIDE_INT offset = bitnum / unit;
 338   unsigned HOST_WIDE_INT bitpos = bitnum % unit;
 339   rtx op0 = str_rtx;
 340   int byte_offset;
 341   rtx orig_value;
 342
 343   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
 344
 345   while (GET_CODE (op0) == SUBREG)
 346     {
 347       /* The following line once was done only if WORDS_BIG_ENDIAN,
 348          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 349          meaningful at a much higher level; when structures are copied
 350          between memory and regs, the higher-numbered regs
 351          always get higher addresses.  */
 352       offset += (SUBREG_BYTE (op0) / UNITS_PER_WORD);
 353       /* We used to adjust BITPOS here, but now we do the whole adjustment
 354          right after the loop.  */
 355       op0 = SUBREG_REG (op0);
 356     }
 357
 358   /* Use vec_set patterns for inserting parts of vectors whenever
 359      available.  */
 360   if (VECTOR_MODE_P (GET_MODE (op0))
 361       && !MEM_P (op0)
 362       && (vec_set_optab->handlers[GET_MODE (op0)].insn_code
 363           != CODE_FOR_nothing)
 364       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 365       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 366       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 367     {
 368       enum machine_mode outermode = GET_MODE (op0);
 369       enum machine_mode innermode = GET_MODE_INNER (outermode);
 370       int icode = (int) vec_set_optab->handlers[outermode].insn_code;
 371       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 372       rtx rtxpos = GEN_INT (pos);
 373       rtx src = value;
 374       rtx dest = op0;
 375       rtx pat, seq;
 376       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
 377       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
 378       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
 379
 380       start_sequence ();
 381
 382       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
 383         src = copy_to_mode_reg (mode1, src);
 384
 385       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
 386         rtxpos = copy_to_mode_reg (mode1, rtxpos);
 387
 388       /* We could handle this, but we should always be called with a pseudo
 389          for our targets and all insns should take them as outputs.  */
 390       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
 391                   && (*insn_data[icode].operand[1].predicate) (src, mode1)
 392                   && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
 393       pat = GEN_FCN (icode) (dest, src, rtxpos);
 394       seq = get_insns ();
 395       end_sequence ();
 396       if (pat)
 397         {
 398           emit_insn (seq);
 399           emit_insn (pat);
 400           return dest;
 401         }
 402     }
 403
 404   if (flag_force_mem)
 405     {
 406       int old_generating_concat_p = generating_concat_p;
 407       generating_concat_p = 0;
 408       value = force_not_mem (value);
 409       generating_concat_p = old_generating_concat_p;
 410     }
 411
 412   /* If the target is a register, overwriting the entire object, or storing
 413      a full-word or multi-word field can be done with just a SUBREG.
 414
 415      If the target is memory, storing any naturally aligned field can be
 416      done with a simple store.  For targets that support fast unaligned
 417      memory, any naturally sized, unit aligned field can be done directly.  */
 418
 419   byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 420                 + (offset * UNITS_PER_WORD);
 421
 422   if (bitpos == 0
 423       && bitsize == GET_MODE_BITSIZE (fieldmode)
 424       && (!MEM_P (op0)
 425           ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 426              || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
 427              && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
 428           : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 429              || (offset * BITS_PER_UNIT % bitsize == 0
 430                  && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
 431     {
 432       if (GET_MODE (op0) != fieldmode)
 433         {
 434           if (GET_CODE (op0) == SUBREG)
 435             {
 436               /* Else we've got some float mode source being extracted
 437                  into a different float mode destination -- this
 438                  combination of subregs results in Severe Tire
 439                  Damage.  */
 440               gcc_assert (GET_MODE (SUBREG_REG (op0)) == fieldmode
 441                           || GET_MODE_CLASS (fieldmode) == MODE_INT
 442                           || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 443               op0 = SUBREG_REG (op0);
 444             }
 445           if (REG_P (op0))
 446             op0 = gen_rtx_SUBREG (fieldmode, op0, byte_offset);
 447           else
 448             op0 = adjust_address (op0, fieldmode, offset);
 449         }
 450       emit_move_insn (op0, value);
 451       return value;
 452     }
 453
 454   /* Make sure we are playing with integral modes.  Pun with subregs
 455      if we aren't.  This must come after the entire register case above,
 456      since that case is valid for any mode.  The following cases are only
 457      valid for integral modes.  */
 458   {
 459     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 460     if (imode != GET_MODE (op0))
 461       {
 462         if (MEM_P (op0))
 463           op0 = adjust_address (op0, imode, 0);
 464         else
 465           {
 466             gcc_assert (imode != BLKmode);
 467             op0 = gen_lowpart (imode, op0);
 468           }
 469       }
 470   }
 471
 472   /* We may be accessing data outside the field, which means
 473      we can alias adjacent data.  */
 474   if (MEM_P (op0))
 475     {
 476       op0 = shallow_copy_rtx (op0);
 477       set_mem_alias_set (op0, 0);
 478       set_mem_expr (op0, 0);
 479     }
 480
 481   /* If OP0 is a register, BITPOS must count within a word.
 482      But as we have it, it counts within whatever size OP0 now has.
 483      On a bigendian machine, these are not the same, so convert.  */
 484   if (BYTES_BIG_ENDIAN
 485       && !MEM_P (op0)
 486       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 487     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 488
 489   /* Storing an lsb-aligned field in a register
 490      can be done with a movestrict instruction.  */
 491
 492   if (!MEM_P (op0)
 493       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 494       && bitsize == GET_MODE_BITSIZE (fieldmode)
 495       && (movstrict_optab->handlers[fieldmode].insn_code
 496           != CODE_FOR_nothing))
 497     {
 498       int icode = movstrict_optab->handlers[fieldmode].insn_code;
 499
 500       /* Get appropriate low part of the value being stored.  */
 501       if (GET_CODE (value) == CONST_INT || REG_P (value))
 502         value = gen_lowpart (fieldmode, value);
 503       else if (!(GET_CODE (value) == SYMBOL_REF
 504                  || GET_CODE (value) == LABEL_REF
 505                  || GET_CODE (value) == CONST))
 506         value = convert_to_mode (fieldmode, value, 0);
 507
 508       if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
 509         value = copy_to_mode_reg (fieldmode, value);
 510
 511       if (GET_CODE (op0) == SUBREG)
 512         {
 513           /* Else we've got some float mode source being extracted into
 514              a different float mode destination -- this combination of
 515              subregs results in Severe Tire Damage.  */
 516           gcc_assert (GET_MODE (SUBREG_REG (op0)) == fieldmode
 517                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 518                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 519           op0 = SUBREG_REG (op0);
 520         }
 521
 522       emit_insn (GEN_FCN (icode)
 523                  (gen_rtx_SUBREG (fieldmode, op0,
 524                                   (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 525                                   + (offset * UNITS_PER_WORD)),
 526                                   value));
 527
 528       return value;
 529     }
 530
 531   /* Handle fields bigger than a word.  */
 532
 533   if (bitsize > BITS_PER_WORD)
 534     {
 535       /* Here we transfer the words of the field
 536          in the order least significant first.
 537          This is because the most significant word is the one which may
 538          be less than full.
 539          However, only do that if the value is not BLKmode.  */
 540
 541       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 542       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 543       unsigned int i;
 544
 545       /* This is the mode we must force value to, so that there will be enough
 546          subwords to extract.  Note that fieldmode will often (always?) be
 547          VOIDmode, because that is what store_field uses to indicate that this
 548          is a bit field, but passing VOIDmode to operand_subword_force will
 549          result in an abort.  */
 550       fieldmode = GET_MODE (value);
 551       if (fieldmode == VOIDmode)
 552         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 553
 554       for (i = 0; i < nwords; i++)
 555         {
 556           /* If I is 0, use the low-order word in both field and target;
 557              if I is 1, use the next to lowest word; and so on.  */
 558           unsigned int wordnum = (backwards ? nwords - i - 1 : i);
 559           unsigned int bit_offset = (backwards
 560                                      ? MAX ((int) bitsize - ((int) i + 1)
 561                                             * BITS_PER_WORD,
 562                                             0)
 563                                      : (int) i * BITS_PER_WORD);
 564
 565           store_bit_field (op0, MIN (BITS_PER_WORD,
 566                                      bitsize - i * BITS_PER_WORD),
 567                            bitnum + bit_offset, word_mode,
 568                            operand_subword_force (value, wordnum, fieldmode));
 569         }
 570       return value;
 571     }
 572
 573   /* From here on we can assume that the field to be stored in is
 574      a full-word (whatever type that is), since it is shorter than a word.  */
 575
 576   /* OFFSET is the number of words or bytes (UNIT says which)
 577      from STR_RTX to the first word or byte containing part of the field.  */
 578
 579   if (!MEM_P (op0))
 580     {
 581       if (offset != 0
 582           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 583         {
 584           if (!REG_P (op0))
 585             {
 586               /* Since this is a destination (lvalue), we can't copy it to a
 587                  pseudo.  We can trivially remove a SUBREG that does not
 588                  change the size of the operand.  Such a SUBREG may have been
 589                  added above.  Otherwise, abort.  */
 590               gcc_assert (GET_CODE (op0) == SUBREG
 591                           && (GET_MODE_SIZE (GET_MODE (op0))
 592                               == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
 593               op0 = SUBREG_REG (op0);
 594             }
 595           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 596                                 op0, (offset * UNITS_PER_WORD));
 597         }
 598       offset = 0;
 599     }
 600
 601   /* If VALUE is a floating-point mode, access it as an integer of the
 602      corresponding size.  This can occur on a machine with 64 bit registers
 603      that uses SFmode for float.  This can also occur for unaligned float
 604      structure fields.  */
 605   orig_value = value;
 606   if (GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 607       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 608     value = gen_lowpart ((GET_MODE (value) == VOIDmode
 609                           ? word_mode : int_mode_for_mode (GET_MODE (value))),
 610                          value);
 611
 612   /* Now OFFSET is nonzero only if OP0 is memory
 613      and is therefore always measured in bytes.  */
 614
 615   if (HAVE_insv
 616       && GET_MODE (value) != BLKmode
 617       && !(bitsize == 1 && GET_CODE (value) == CONST_INT)
 618       /* Ensure insv's size is wide enough for this field.  */
 619       && (GET_MODE_BITSIZE (op_mode) >= bitsize)
 620       && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
 621             && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))))
 622     {
 623       int xbitpos = bitpos;
 624       rtx value1;
 625       rtx xop0 = op0;
 626       rtx last = get_last_insn ();
 627       rtx pat;
 628       enum machine_mode maxmode = mode_for_extraction (EP_insv, 3);
 629       int save_volatile_ok = volatile_ok;
 630
 631       volatile_ok = 1;
 632
 633       /* If this machine's insv can only insert into a register, copy OP0
 634          into a register and save it back later.  */
 635       /* This used to check flag_force_mem, but that was a serious
 636          de-optimization now that flag_force_mem is enabled by -O2.  */
 637       if (MEM_P (op0)
 638           && ! ((*insn_data[(int) CODE_FOR_insv].operand[0].predicate)
 639                 (op0, VOIDmode)))
 640         {
 641           rtx tempreg;
 642           enum machine_mode bestmode;
 643
 644           /* Get the mode to use for inserting into this field.  If OP0 is
 645              BLKmode, get the smallest mode consistent with the alignment. If
 646              OP0 is a non-BLKmode object that is no wider than MAXMODE, use its
 647              mode. Otherwise, use the smallest mode containing the field.  */
 648
 649           if (GET_MODE (op0) == BLKmode
 650               || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (maxmode))
 651             bestmode
 652               = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0), maxmode,
 653                                MEM_VOLATILE_P (op0));
 654           else
 655             bestmode = GET_MODE (op0);
 656
 657           if (bestmode == VOIDmode
 658               || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
 659                   && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
 660             goto insv_loses;
 661
 662           /* Adjust address to point to the containing unit of that mode.
 663              Compute offset as multiple of this unit, counting in bytes.  */
 664           unit = GET_MODE_BITSIZE (bestmode);
 665           offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 666           bitpos = bitnum % unit;
 667           op0 = adjust_address (op0, bestmode,  offset);
 668
 669           /* Fetch that unit, store the bitfield in it, then store
 670              the unit.  */
 671           tempreg = copy_to_reg (op0);
 672           store_bit_field (tempreg, bitsize, bitpos, fieldmode, orig_value);
 673           emit_move_insn (op0, tempreg);
 674           return value;
 675         }
 676       volatile_ok = save_volatile_ok;
 677
 678       /* Add OFFSET into OP0's address.  */
 679       if (MEM_P (xop0))
 680         xop0 = adjust_address (xop0, byte_mode, offset);
 681
 682       /* If xop0 is a register, we need it in MAXMODE
 683          to make it acceptable to the format of insv.  */
 684       if (GET_CODE (xop0) == SUBREG)
 685         /* We can't just change the mode, because this might clobber op0,
 686            and we will need the original value of op0 if insv fails.  */
 687         xop0 = gen_rtx_SUBREG (maxmode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 688       if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
 689         xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
 690
 691       /* On big-endian machines, we count bits from the most significant.
 692          If the bit field insn does not, we must invert.  */
 693
 694       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 695         xbitpos = unit - bitsize - xbitpos;
 696
 697       /* We have been counting XBITPOS within UNIT.
 698          Count instead within the size of the register.  */
 699       if (BITS_BIG_ENDIAN && !MEM_P (xop0))
 700         xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
 701
 702       unit = GET_MODE_BITSIZE (maxmode);
 703
 704       /* Convert VALUE to maxmode (which insv insn wants) in VALUE1.  */
 705       value1 = value;
 706       if (GET_MODE (value) != maxmode)
 707         {
 708           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 709             {
 710               /* Optimization: Don't bother really extending VALUE
 711                  if it has all the bits we will actually use.  However,
 712                  if we must narrow it, be sure we do it correctly.  */
 713
 714               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (maxmode))
 715                 {
 716                   rtx tmp;
 717
 718                   tmp = simplify_subreg (maxmode, value1, GET_MODE (value), 0);
 719                   if (! tmp)
 720                     tmp = simplify_gen_subreg (maxmode,
 721                                                force_reg (GET_MODE (value),
 722                                                           value1),
 723                                                GET_MODE (value), 0);
 724                   value1 = tmp;
 725                 }
 726               else
 727                 value1 = gen_lowpart (maxmode, value1);
 728             }
 729           else if (GET_CODE (value) == CONST_INT)
 730             value1 = gen_int_mode (INTVAL (value), maxmode);
 731           else
 732             /* Parse phase is supposed to make VALUE's data type
 733                match that of the component reference, which is a type
 734                at least as wide as the field; so VALUE should have
 735                a mode that corresponds to that type.  */
 736             gcc_assert (CONSTANT_P (value));
 737         }
 738
 739       /* If this machine's insv insists on a register,
 740          get VALUE1 into a register.  */
 741       if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
 742              (value1, maxmode)))
 743         value1 = force_reg (maxmode, value1);
 744
 745       pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
 746       if (pat)
 747         emit_insn (pat);
 748       else
 749         {
 750           delete_insns_since (last);
 751           store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 752         }
 753     }
 754   else
 755     insv_loses:
 756     /* Insv is not available; store using shifts and boolean ops.  */
 757     store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 758   return value;
 759 }
 760 \f
 761 /* Use shifts and boolean operations to store VALUE
 762    into a bit field of width BITSIZE
 763    in a memory location specified by OP0 except offset by OFFSET bytes.
 764      (OFFSET must be 0 if OP0 is a register.)
 765    The field starts at position BITPOS within the byte.
 766     (If OP0 is a register, it may be a full word or a narrower mode,
 767      but BITPOS still counts within a full word,
 768      which is significant on bigendian machines.)  */
 769
 770 static void
 771 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
 772                        unsigned HOST_WIDE_INT bitsize,
 773                        unsigned HOST_WIDE_INT bitpos, rtx value)
 774 {
 775   enum machine_mode mode;
 776   unsigned int total_bits = BITS_PER_WORD;
 777   rtx subtarget, temp;
 778   int all_zero = 0;
 779   int all_one = 0;
 780
 781   /* There is a case not handled here:
 782      a structure with a known alignment of just a halfword
 783      and a field split across two aligned halfwords within the structure.
 784      Or likewise a structure with a known alignment of just a byte
 785      and a field split across two bytes.
 786      Such cases are not supposed to be able to occur.  */
 787
 788   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 789     {
 790       gcc_assert (!offset);
 791       /* Special treatment for a bit field split across two registers.  */
 792       if (bitsize + bitpos > BITS_PER_WORD)
 793         {
 794           store_split_bit_field (op0, bitsize, bitpos, value);
 795           return;
 796         }
 797     }
 798   else
 799     {
 800       /* Get the proper mode to use for this field.  We want a mode that
 801          includes the entire field.  If such a mode would be larger than
 802          a word, we won't be doing the extraction the normal way.
 803          We don't want a mode bigger than the destination.  */
 804
 805       mode = GET_MODE (op0);
 806       if (GET_MODE_BITSIZE (mode) == 0
 807           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 808         mode = word_mode;
 809       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 810                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 811
 812       if (mode == VOIDmode)
 813         {
 814           /* The only way this should occur is if the field spans word
 815              boundaries.  */
 816           store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
 817                                  value);
 818           return;
 819         }
 820
 821       total_bits = GET_MODE_BITSIZE (mode);
 822
 823       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 824          be in the range 0 to total_bits-1, and put any excess bytes in
 825          OFFSET.  */
 826       if (bitpos >= total_bits)
 827         {
 828           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 829           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 830                      * BITS_PER_UNIT);
 831         }
 832
 833       /* Get ref to an aligned byte, halfword, or word containing the field.
 834          Adjust BITPOS to be position within a word,
 835          and OFFSET to be the offset of that word.
 836          Then alter OP0 to refer to that word.  */
 837       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 838       offset -= (offset % (total_bits / BITS_PER_UNIT));
 839       op0 = adjust_address (op0, mode, offset);
 840     }
 841
 842   mode = GET_MODE (op0);
 843
 844   /* Now MODE is either some integral mode for a MEM as OP0,
 845      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 846      The bit field is contained entirely within OP0.
 847      BITPOS is the starting bit number within OP0.
 848      (OP0's mode may actually be narrower than MODE.)  */
 849
 850   if (BYTES_BIG_ENDIAN)
 851       /* BITPOS is the distance between our msb
 852          and that of the containing datum.
 853          Convert it to the distance from the lsb.  */
 854       bitpos = total_bits - bitsize - bitpos;
 855
 856   /* Now BITPOS is always the distance between our lsb
 857      and that of OP0.  */
 858
 859   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 860      we must first convert its mode to MODE.  */
 861
 862   if (GET_CODE (value) == CONST_INT)
 863     {
 864       HOST_WIDE_INT v = INTVAL (value);
 865
 866       if (bitsize < HOST_BITS_PER_WIDE_INT)
 867         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 868
 869       if (v == 0)
 870         all_zero = 1;
 871       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 872                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 873                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 874         all_one = 1;
 875
 876       value = lshift_value (mode, value, bitpos, bitsize);
 877     }
 878   else
 879     {
 880       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 881                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 882
 883       if (GET_MODE (value) != mode)
 884         {
 885           if ((REG_P (value) || GET_CODE (value) == SUBREG)
 886               && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
 887             value = gen_lowpart (mode, value);
 888           else
 889             value = convert_to_mode (mode, value, 1);
 890         }
 891
 892       if (must_and)
 893         value = expand_binop (mode, and_optab, value,
 894                               mask_rtx (mode, 0, bitsize, 0),
 895                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
 896       if (bitpos > 0)
 897         value = expand_shift (LSHIFT_EXPR, mode, value,
 898                               build_int_cst (NULL_TREE, bitpos), NULL_RTX, 1);
 899     }
 900
 901   /* Now clear the chosen bits in OP0,
 902      except that if VALUE is -1 we need not bother.  */
 903
 904   subtarget = (REG_P (op0) || ! flag_force_mem) ? op0 : 0;
 905
 906   if (! all_one)
 907     {
 908       temp = expand_binop (mode, and_optab, op0,
 909                            mask_rtx (mode, bitpos, bitsize, 1),
 910                            subtarget, 1, OPTAB_LIB_WIDEN);
 911       subtarget = temp;
 912     }
 913   else
 914     temp = op0;
 915
 916   /* Now logical-or VALUE into OP0, unless it is zero.  */
 917
 918   if (! all_zero)
 919     temp = expand_binop (mode, ior_optab, temp, value,
 920                          subtarget, 1, OPTAB_LIB_WIDEN);
 921   if (op0 != temp)
 922     emit_move_insn (op0, temp);
 923 }
 924 \f
 925 /* Store a bit field that is split across multiple accessible memory objects.
 926
 927    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
 928    BITSIZE is the field width; BITPOS the position of its first bit
 929    (within the word).
 930    VALUE is the value to store.
 931
 932    This does not yet handle fields wider than BITS_PER_WORD.  */
 933
 934 static void
 935 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
 936                        unsigned HOST_WIDE_INT bitpos, rtx value)
 937 {
 938   unsigned int unit;
 939   unsigned int bitsdone = 0;
 940
 941   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
 942      much at a time.  */
 943   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 944     unit = BITS_PER_WORD;
 945   else
 946     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
 947
 948   /* If VALUE is a constant other than a CONST_INT, get it into a register in
 949      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
 950      that VALUE might be a floating-point constant.  */
 951   if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
 952     {
 953       rtx word = gen_lowpart_common (word_mode, value);
 954
 955       if (word && (value != word))
 956         value = word;
 957       else
 958         value = gen_lowpart_common (word_mode,
 959                                     force_reg (GET_MODE (value) != VOIDmode
 960                                                ? GET_MODE (value)
 961                                                : word_mode, value));
 962     }
 963
 964   while (bitsdone < bitsize)
 965     {
 966       unsigned HOST_WIDE_INT thissize;
 967       rtx part, word;
 968       unsigned HOST_WIDE_INT thispos;
 969       unsigned HOST_WIDE_INT offset;
 970
 971       offset = (bitpos + bitsdone) / unit;
 972       thispos = (bitpos + bitsdone) % unit;
 973
 974       /* THISSIZE must not overrun a word boundary.  Otherwise,
 975          store_fixed_bit_field will call us again, and we will mutually
 976          recurse forever.  */
 977       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
 978       thissize = MIN (thissize, unit - thispos);
 979
 980       if (BYTES_BIG_ENDIAN)
 981         {
 982           int total_bits;
 983
 984           /* We must do an endian conversion exactly the same way as it is
 985              done in extract_bit_field, so that the two calls to
 986              extract_fixed_bit_field will have comparable arguments.  */
 987           if (!MEM_P (value) || GET_MODE (value) == BLKmode)
 988             total_bits = BITS_PER_WORD;
 989           else
 990             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
 991
 992           /* Fetch successively less significant portions.  */
 993           if (GET_CODE (value) == CONST_INT)
 994             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 995                              >> (bitsize - bitsdone - thissize))
 996                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
 997           else
 998             /* The args are chosen so that the last part includes the
 999                lsb.  Give extract_bit_field the value it needs (with
1000                endianness compensation) to fetch the piece we want.  */
1001             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1002                                             total_bits - bitsize + bitsdone,
1003                                             NULL_RTX, 1);
1004         }
1005       else
1006         {
1007           /* Fetch successively more significant portions.  */
1008           if (GET_CODE (value) == CONST_INT)
1009             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1010                              >> bitsdone)
1011                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1012           else
1013             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1014                                             bitsdone, NULL_RTX, 1);
1015         }
1016
1017       /* If OP0 is a register, then handle OFFSET here.
1018
1019          When handling multiword bitfields, extract_bit_field may pass
1020          down a word_mode SUBREG of a larger REG for a bitfield that actually
1021          crosses a word boundary.  Thus, for a SUBREG, we must find
1022          the current word starting from the base register.  */
1023       if (GET_CODE (op0) == SUBREG)
1024         {
1025           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1026           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1027                                         GET_MODE (SUBREG_REG (op0)));
1028           offset = 0;
1029         }
1030       else if (REG_P (op0))
1031         {
1032           word = operand_subword_force (op0, offset, GET_MODE (op0));
1033           offset = 0;
1034         }
1035       else
1036         word = op0;
1037
1038       /* OFFSET is in UNITs, and UNIT is in bits.
1039          store_fixed_bit_field wants offset in bytes.  */
1040       store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1041                              thispos, part);
1042       bitsdone += thissize;
1043     }
1044 }
1045 \f
1046 /* Generate code to extract a byte-field from STR_RTX
1047    containing BITSIZE bits, starting at BITNUM,
1048    and put it in TARGET if possible (if TARGET is nonzero).
1049    Regardless of TARGET, we return the rtx for where the value is placed.
1050
1051    STR_RTX is the structure containing the byte (a REG or MEM).
1052    UNSIGNEDP is nonzero if this is an unsigned bit field.
1053    MODE is the natural mode of the field value once extracted.
1054    TMODE is the mode the caller would like the value to have;
1055    but the value may be returned with type MODE instead.
1056
1057    TOTAL_SIZE is the size in bytes of the containing structure,
1058    or -1 if varying.
1059
1060    If a TARGET is specified and we can store in it at no extra cost,
1061    we do so, and return TARGET.
1062    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1063    if they are equally easy.  */
1064
1065 rtx
1066 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1067                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1068                    enum machine_mode mode, enum machine_mode tmode)
1069 {
1070   unsigned int unit
1071     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1072   unsigned HOST_WIDE_INT offset = bitnum / unit;
1073   unsigned HOST_WIDE_INT bitpos = bitnum % unit;
1074   rtx op0 = str_rtx;
1075   rtx spec_target = target;
1076   rtx spec_target_subreg = 0;
1077   enum machine_mode int_mode;
1078   enum machine_mode extv_mode = mode_for_extraction (EP_extv, 0);
1079   enum machine_mode extzv_mode = mode_for_extraction (EP_extzv, 0);
1080   enum machine_mode mode1;
1081   int byte_offset;
1082
1083   if (tmode == VOIDmode)
1084     tmode = mode;
1085
1086   while (GET_CODE (op0) == SUBREG)
1087     {
1088       bitpos += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1089       if (bitpos > unit)
1090         {
1091           offset += (bitpos / unit);
1092           bitpos %= unit;
1093         }
1094       op0 = SUBREG_REG (op0);
1095     }
1096
1097   if (REG_P (op0)
1098       && mode == GET_MODE (op0)
1099       && bitnum == 0
1100       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1101     {
1102       /* We're trying to extract a full register from itself.  */
1103       return op0;
1104     }
1105
1106   /* Use vec_extract patterns for extracting parts of vectors whenever
1107      available.  */
1108   if (VECTOR_MODE_P (GET_MODE (op0))
1109       && !MEM_P (op0)
1110       && (vec_extract_optab->handlers[GET_MODE (op0)].insn_code
1111           != CODE_FOR_nothing)
1112       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1113           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1114     {
1115       enum machine_mode outermode = GET_MODE (op0);
1116       enum machine_mode innermode = GET_MODE_INNER (outermode);
1117       int icode = (int) vec_extract_optab->handlers[outermode].insn_code;
1118       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1119       rtx rtxpos = GEN_INT (pos);
1120       rtx src = op0;
1121       rtx dest = NULL, pat, seq;
1122       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
1123       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
1124       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
1125
1126       if (innermode == tmode || innermode == mode)
1127         dest = target;
1128
1129       if (!dest)
1130         dest = gen_reg_rtx (innermode);
1131
1132       start_sequence ();
1133
1134       if (! (*insn_data[icode].operand[0].predicate) (dest, mode0))
1135         dest = copy_to_mode_reg (mode0, dest);
1136
1137       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
1138         src = copy_to_mode_reg (mode1, src);
1139
1140       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1141         rtxpos = copy_to_mode_reg (mode1, rtxpos);
1142
1143       /* We could handle this, but we should always be called with a pseudo
1144          for our targets and all insns should take them as outputs.  */
1145       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
1146                   && (*insn_data[icode].operand[1].predicate) (src, mode1)
1147                   && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
1148
1149       pat = GEN_FCN (icode) (dest, src, rtxpos);
1150       seq = get_insns ();
1151       end_sequence ();
1152       if (pat)
1153         {
1154           emit_insn (seq);
1155           emit_insn (pat);
1156           return dest;
1157         }
1158     }
1159
1160   /* Make sure we are playing with integral modes.  Pun with subregs
1161      if we aren't.  */
1162   {
1163     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1164     if (imode != GET_MODE (op0))
1165       {
1166         if (MEM_P (op0))
1167           op0 = adjust_address (op0, imode, 0);
1168         else
1169           {
1170             gcc_assert (imode != BLKmode);
1171             op0 = gen_lowpart (imode, op0);
1172           }
1173       }
1174   }
1175
1176   /* We may be accessing data outside the field, which means
1177      we can alias adjacent data.  */
1178   if (MEM_P (op0))
1179     {
1180       op0 = shallow_copy_rtx (op0);
1181       set_mem_alias_set (op0, 0);
1182       set_mem_expr (op0, 0);
1183     }
1184
1185   /* Extraction of a full-word or multi-word value from a structure
1186      in a register or aligned memory can be done with just a SUBREG.
1187      A subword value in the least significant part of a register
1188      can also be extracted with a SUBREG.  For this, we need the
1189      byte offset of the value in op0.  */
1190
1191   byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1192
1193   /* If OP0 is a register, BITPOS must count within a word.
1194      But as we have it, it counts within whatever size OP0 now has.
1195      On a bigendian machine, these are not the same, so convert.  */
1196   if (BYTES_BIG_ENDIAN
1197       && !MEM_P (op0)
1198       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1199     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1200
1201   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1202      If that's wrong, the solution is to test for it and set TARGET to 0
1203      if needed.  */
1204
1205   /* Only scalar integer modes can be converted via subregs.  There is an
1206      additional problem for FP modes here in that they can have a precision
1207      which is different from the size.  mode_for_size uses precision, but
1208      we want a mode based on the size, so we must avoid calling it for FP
1209      modes.  */
1210   mode1  = (SCALAR_INT_MODE_P (tmode)
1211             ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1212             : mode);
1213
1214   if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1215         && bitpos % BITS_PER_WORD == 0)
1216        || (mode1 != BLKmode
1217            /* ??? The big endian test here is wrong.  This is correct
1218               if the value is in a register, and if mode_for_size is not
1219               the same mode as op0.  This causes us to get unnecessarily
1220               inefficient code from the Thumb port when -mbig-endian.  */
1221            && (BYTES_BIG_ENDIAN
1222                ? bitpos + bitsize == BITS_PER_WORD
1223                : bitpos == 0)))
1224       && ((!MEM_P (op0)
1225            && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
1226                                      GET_MODE_BITSIZE (GET_MODE (op0)))
1227            && GET_MODE_SIZE (mode1) != 0
1228            && byte_offset % GET_MODE_SIZE (mode1) == 0)
1229           || (MEM_P (op0)
1230               && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1231                   || (offset * BITS_PER_UNIT % bitsize == 0
1232                       && MEM_ALIGN (op0) % bitsize == 0)))))
1233     {
1234       if (mode1 != GET_MODE (op0))
1235         {
1236           if (GET_CODE (op0) == SUBREG)
1237             {
1238               if (GET_MODE (SUBREG_REG (op0)) == mode1
1239                   || GET_MODE_CLASS (mode1) == MODE_INT
1240                   || GET_MODE_CLASS (mode1) == MODE_PARTIAL_INT)
1241                 op0 = SUBREG_REG (op0);
1242               else
1243                 /* Else we've got some float mode source being extracted into
1244                    a different float mode destination -- this combination of
1245                    subregs results in Severe Tire Damage.  */
1246                 goto no_subreg_mode_swap;
1247             }
1248           if (REG_P (op0))
1249             op0 = gen_rtx_SUBREG (mode1, op0, byte_offset);
1250           else
1251             op0 = adjust_address (op0, mode1, offset);
1252         }
1253       if (mode1 != mode)
1254         return convert_to_mode (tmode, op0, unsignedp);
1255       return op0;
1256     }
1257  no_subreg_mode_swap:
1258
1259   /* Handle fields bigger than a word.  */
1260
1261   if (bitsize > BITS_PER_WORD)
1262     {
1263       /* Here we transfer the words of the field
1264          in the order least significant first.
1265          This is because the most significant word is the one which may
1266          be less than full.  */
1267
1268       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1269       unsigned int i;
1270
1271       if (target == 0 || !REG_P (target))
1272         target = gen_reg_rtx (mode);
1273
1274       /* Indicate for flow that the entire target reg is being set.  */
1275       emit_insn (gen_rtx_CLOBBER (VOIDmode, target));
1276
1277       for (i = 0; i < nwords; i++)
1278         {
1279           /* If I is 0, use the low-order word in both field and target;
1280              if I is 1, use the next to lowest word; and so on.  */
1281           /* Word number in TARGET to use.  */
1282           unsigned int wordnum
1283             = (WORDS_BIG_ENDIAN
1284                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1285                : i);
1286           /* Offset from start of field in OP0.  */
1287           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1288                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1289                                                 * (int) BITS_PER_WORD))
1290                                      : (int) i * BITS_PER_WORD);
1291           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1292           rtx result_part
1293             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1294                                            bitsize - i * BITS_PER_WORD),
1295                                  bitnum + bit_offset, 1, target_part, mode,
1296                                  word_mode);
1297
1298           gcc_assert (target_part);
1299
1300           if (result_part != target_part)
1301             emit_move_insn (target_part, result_part);
1302         }
1303
1304       if (unsignedp)
1305         {
1306           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1307              need to be zero'd out.  */
1308           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1309             {
1310               unsigned int i, total_words;
1311
1312               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1313               for (i = nwords; i < total_words; i++)
1314                 emit_move_insn
1315                   (operand_subword (target,
1316                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1317                                     1, VOIDmode),
1318                    const0_rtx);
1319             }
1320           return target;
1321         }
1322
1323       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1324       target = expand_shift (LSHIFT_EXPR, mode, target,
1325                              build_int_cst (NULL_TREE,
1326                                             GET_MODE_BITSIZE (mode) - bitsize),
1327                              NULL_RTX, 0);
1328       return expand_shift (RSHIFT_EXPR, mode, target,
1329                            build_int_cst (NULL_TREE,
1330                                           GET_MODE_BITSIZE (mode) - bitsize),
1331                            NULL_RTX, 0);
1332     }
1333
1334   /* From here on we know the desired field is smaller than a word.  */
1335
1336   /* Check if there is a correspondingly-sized integer field, so we can
1337      safely extract it as one size of integer, if necessary; then
1338      truncate or extend to the size that is wanted; then use SUBREGs or
1339      convert_to_mode to get one of the modes we really wanted.  */
1340
1341   int_mode = int_mode_for_mode (tmode);
1342   if (int_mode == BLKmode)
1343     int_mode = int_mode_for_mode (mode);
1344   /* Should probably push op0 out to memory and then do a load.  */
1345   gcc_assert (int_mode != BLKmode);
1346
1347   /* OFFSET is the number of words or bytes (UNIT says which)
1348      from STR_RTX to the first word or byte containing part of the field.  */
1349   if (!MEM_P (op0))
1350     {
1351       if (offset != 0
1352           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1353         {
1354           if (!REG_P (op0))
1355             op0 = copy_to_reg (op0);
1356           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1357                                 op0, (offset * UNITS_PER_WORD));
1358         }
1359       offset = 0;
1360     }
1361
1362   /* Now OFFSET is nonzero only for memory operands.  */
1363
1364   if (unsignedp)
1365     {
1366       if (HAVE_extzv
1367           && (GET_MODE_BITSIZE (extzv_mode) >= bitsize)
1368           && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1369                 && (bitsize + bitpos > GET_MODE_BITSIZE (extzv_mode))))
1370         {
1371           unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1372           rtx bitsize_rtx, bitpos_rtx;
1373           rtx last = get_last_insn ();
1374           rtx xop0 = op0;
1375           rtx xtarget = target;
1376           rtx xspec_target = spec_target;
1377           rtx xspec_target_subreg = spec_target_subreg;
1378           rtx pat;
1379           enum machine_mode maxmode = mode_for_extraction (EP_extzv, 0);
1380
1381           if (MEM_P (xop0))
1382             {
1383               int save_volatile_ok = volatile_ok;
1384               volatile_ok = 1;
1385
1386               /* Is the memory operand acceptable?  */
1387               if (! ((*insn_data[(int) CODE_FOR_extzv].operand[1].predicate)
1388                      (xop0, GET_MODE (xop0))))
1389                 {
1390                   /* No, load into a reg and extract from there.  */
1391                   enum machine_mode bestmode;
1392
1393                   /* Get the mode to use for inserting into this field.  If
1394                      OP0 is BLKmode, get the smallest mode consistent with the
1395                      alignment. If OP0 is a non-BLKmode object that is no
1396                      wider than MAXMODE, use its mode. Otherwise, use the
1397                      smallest mode containing the field.  */
1398
1399                   if (GET_MODE (xop0) == BLKmode
1400                       || (GET_MODE_SIZE (GET_MODE (op0))
1401                           > GET_MODE_SIZE (maxmode)))
1402                     bestmode = get_best_mode (bitsize, bitnum,
1403                                               MEM_ALIGN (xop0), maxmode,
1404                                               MEM_VOLATILE_P (xop0));
1405                   else
1406                     bestmode = GET_MODE (xop0);
1407
1408                   if (bestmode == VOIDmode
1409                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1410                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1411                     goto extzv_loses;
1412
1413                   /* Compute offset as multiple of this unit,
1414                      counting in bytes.  */
1415                   unit = GET_MODE_BITSIZE (bestmode);
1416                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1417                   xbitpos = bitnum % unit;
1418                   xop0 = adjust_address (xop0, bestmode, xoffset);
1419
1420                   /* Fetch it to a register in that size.  */
1421                   xop0 = force_reg (bestmode, xop0);
1422
1423                   /* XBITPOS counts within UNIT, which is what is expected.  */
1424                 }
1425               else
1426                 /* Get ref to first byte containing part of the field.  */
1427                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1428
1429               volatile_ok = save_volatile_ok;
1430             }
1431
1432           /* If op0 is a register, we need it in MAXMODE (which is usually
1433              SImode). to make it acceptable to the format of extzv.  */
1434           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1435             goto extzv_loses;
1436           if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1437             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1438
1439           /* On big-endian machines, we count bits from the most significant.
1440              If the bit field insn does not, we must invert.  */
1441           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1442             xbitpos = unit - bitsize - xbitpos;
1443
1444           /* Now convert from counting within UNIT to counting in MAXMODE.  */
1445           if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1446             xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
1447
1448           unit = GET_MODE_BITSIZE (maxmode);
1449
1450           if (xtarget == 0
1451               || (flag_force_mem && MEM_P (xtarget)))
1452             xtarget = xspec_target = gen_reg_rtx (tmode);
1453
1454           if (GET_MODE (xtarget) != maxmode)
1455             {
1456               if (REG_P (xtarget))
1457                 {
1458                   int wider = (GET_MODE_SIZE (maxmode)
1459                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1460                   xtarget = gen_lowpart (maxmode, xtarget);
1461                   if (wider)
1462                     xspec_target_subreg = xtarget;
1463                 }
1464               else
1465                 xtarget = gen_reg_rtx (maxmode);
1466             }
1467
1468           /* If this machine's extzv insists on a register target,
1469              make sure we have one.  */
1470           if (! ((*insn_data[(int) CODE_FOR_extzv].operand[0].predicate)
1471                  (xtarget, maxmode)))
1472             xtarget = gen_reg_rtx (maxmode);
1473
1474           bitsize_rtx = GEN_INT (bitsize);
1475           bitpos_rtx = GEN_INT (xbitpos);
1476
1477           pat = gen_extzv (xtarget, xop0, bitsize_rtx, bitpos_rtx);
1478           if (pat)
1479             {
1480               emit_insn (pat);
1481               target = xtarget;
1482               spec_target = xspec_target;
1483               spec_target_subreg = xspec_target_subreg;
1484             }
1485           else
1486             {
1487               delete_insns_since (last);
1488               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1489                                                 bitpos, target, 1);
1490             }
1491         }
1492       else
1493       extzv_loses:
1494         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1495                                           bitpos, target, 1);
1496     }
1497   else
1498     {
1499       if (HAVE_extv
1500           && (GET_MODE_BITSIZE (extv_mode) >= bitsize)
1501           && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1502                 && (bitsize + bitpos > GET_MODE_BITSIZE (extv_mode))))
1503         {
1504           int xbitpos = bitpos, xoffset = offset;
1505           rtx bitsize_rtx, bitpos_rtx;
1506           rtx last = get_last_insn ();
1507           rtx xop0 = op0, xtarget = target;
1508           rtx xspec_target = spec_target;
1509           rtx xspec_target_subreg = spec_target_subreg;
1510           rtx pat;
1511           enum machine_mode maxmode = mode_for_extraction (EP_extv, 0);
1512
1513           if (MEM_P (xop0))
1514             {
1515               /* Is the memory operand acceptable?  */
1516               if (! ((*insn_data[(int) CODE_FOR_extv].operand[1].predicate)
1517                      (xop0, GET_MODE (xop0))))
1518                 {
1519                   /* No, load into a reg and extract from there.  */
1520                   enum machine_mode bestmode;
1521
1522                   /* Get the mode to use for inserting into this field.  If
1523                      OP0 is BLKmode, get the smallest mode consistent with the
1524                      alignment. If OP0 is a non-BLKmode object that is no
1525                      wider than MAXMODE, use its mode. Otherwise, use the
1526                      smallest mode containing the field.  */
1527
1528                   if (GET_MODE (xop0) == BLKmode
1529                       || (GET_MODE_SIZE (GET_MODE (op0))
1530                           > GET_MODE_SIZE (maxmode)))
1531                     bestmode = get_best_mode (bitsize, bitnum,
1532                                               MEM_ALIGN (xop0), maxmode,
1533                                               MEM_VOLATILE_P (xop0));
1534                   else
1535                     bestmode = GET_MODE (xop0);
1536
1537                   if (bestmode == VOIDmode
1538                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1539                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1540                     goto extv_loses;
1541
1542                   /* Compute offset as multiple of this unit,
1543                      counting in bytes.  */
1544                   unit = GET_MODE_BITSIZE (bestmode);
1545                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1546                   xbitpos = bitnum % unit;
1547                   xop0 = adjust_address (xop0, bestmode, xoffset);
1548
1549                   /* Fetch it to a register in that size.  */
1550                   xop0 = force_reg (bestmode, xop0);
1551
1552                   /* XBITPOS counts within UNIT, which is what is expected.  */
1553                 }
1554               else
1555                 /* Get ref to first byte containing part of the field.  */
1556                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1557             }
1558
1559           /* If op0 is a register, we need it in MAXMODE (which is usually
1560              SImode) to make it acceptable to the format of extv.  */
1561           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1562             goto extv_loses;
1563           if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1564             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1565
1566           /* On big-endian machines, we count bits from the most significant.
1567              If the bit field insn does not, we must invert.  */
1568           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1569             xbitpos = unit - bitsize - xbitpos;
1570
1571           /* XBITPOS counts within a size of UNIT.
1572              Adjust to count within a size of MAXMODE.  */
1573           if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1574             xbitpos += (GET_MODE_BITSIZE (maxmode) - unit);
1575
1576           unit = GET_MODE_BITSIZE (maxmode);
1577
1578           if (xtarget == 0
1579               || (flag_force_mem && MEM_P (xtarget)))
1580             xtarget = xspec_target = gen_reg_rtx (tmode);
1581
1582           if (GET_MODE (xtarget) != maxmode)
1583             {
1584               if (REG_P (xtarget))
1585                 {
1586                   int wider = (GET_MODE_SIZE (maxmode)
1587                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1588                   xtarget = gen_lowpart (maxmode, xtarget);
1589                   if (wider)
1590                     xspec_target_subreg = xtarget;
1591                 }
1592               else
1593                 xtarget = gen_reg_rtx (maxmode);
1594             }
1595
1596           /* If this machine's extv insists on a register target,
1597              make sure we have one.  */
1598           if (! ((*insn_data[(int) CODE_FOR_extv].operand[0].predicate)
1599                  (xtarget, maxmode)))
1600             xtarget = gen_reg_rtx (maxmode);
1601
1602           bitsize_rtx = GEN_INT (bitsize);
1603           bitpos_rtx = GEN_INT (xbitpos);
1604
1605           pat = gen_extv (xtarget, xop0, bitsize_rtx, bitpos_rtx);
1606           if (pat)
1607             {
1608               emit_insn (pat);
1609               target = xtarget;
1610               spec_target = xspec_target;
1611               spec_target_subreg = xspec_target_subreg;
1612             }
1613           else
1614             {
1615               delete_insns_since (last);
1616               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1617                                                 bitpos, target, 0);
1618             }
1619         }
1620       else
1621       extv_loses:
1622         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1623                                           bitpos, target, 0);
1624     }
1625   if (target == spec_target)
1626     return target;
1627   if (target == spec_target_subreg)
1628     return spec_target;
1629   if (GET_MODE (target) != tmode && GET_MODE (target) != mode)
1630     {
1631       /* If the target mode is floating-point, first convert to the
1632          integer mode of that size and then access it as a floating-point
1633          value via a SUBREG.  */
1634       if (GET_MODE_CLASS (tmode) != MODE_INT
1635           && GET_MODE_CLASS (tmode) != MODE_PARTIAL_INT)
1636         {
1637           target = convert_to_mode (mode_for_size (GET_MODE_BITSIZE (tmode),
1638                                                    MODE_INT, 0),
1639                                     target, unsignedp);
1640           return gen_lowpart (tmode, target);
1641         }
1642       else
1643         return convert_to_mode (tmode, target, unsignedp);
1644     }
1645   return target;
1646 }
1647 \f
1648 /* Extract a bit field using shifts and boolean operations
1649    Returns an rtx to represent the value.
1650    OP0 addresses a register (word) or memory (byte).
1651    BITPOS says which bit within the word or byte the bit field starts in.
1652    OFFSET says how many bytes farther the bit field starts;
1653     it is 0 if OP0 is a register.
1654    BITSIZE says how many bits long the bit field is.
1655     (If OP0 is a register, it may be narrower than a full word,
1656      but BITPOS still counts within a full word,
1657      which is significant on bigendian machines.)
1658
1659    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1660    If TARGET is nonzero, attempts to store the value there
1661    and return TARGET, but this is not guaranteed.
1662    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1663
1664 static rtx
1665 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1666                          unsigned HOST_WIDE_INT offset,
1667                          unsigned HOST_WIDE_INT bitsize,
1668                          unsigned HOST_WIDE_INT bitpos, rtx target,
1669                          int unsignedp)
1670 {
1671   unsigned int total_bits = BITS_PER_WORD;
1672   enum machine_mode mode;
1673
1674   if (GET_CODE (op0) == SUBREG || REG_P (op0))
1675     {
1676       /* Special treatment for a bit field split across two registers.  */
1677       if (bitsize + bitpos > BITS_PER_WORD)
1678         return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1679     }
1680   else
1681     {
1682       /* Get the proper mode to use for this field.  We want a mode that
1683          includes the entire field.  If such a mode would be larger than
1684          a word, we won't be doing the extraction the normal way.  */
1685
1686       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1687                             MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1688
1689       if (mode == VOIDmode)
1690         /* The only way this should occur is if the field spans word
1691            boundaries.  */
1692         return extract_split_bit_field (op0, bitsize,
1693                                         bitpos + offset * BITS_PER_UNIT,
1694                                         unsignedp);
1695
1696       total_bits = GET_MODE_BITSIZE (mode);
1697
1698       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1699          be in the range 0 to total_bits-1, and put any excess bytes in
1700          OFFSET.  */
1701       if (bitpos >= total_bits)
1702         {
1703           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1704           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1705                      * BITS_PER_UNIT);
1706         }
1707
1708       /* Get ref to an aligned byte, halfword, or word containing the field.
1709          Adjust BITPOS to be position within a word,
1710          and OFFSET to be the offset of that word.
1711          Then alter OP0 to refer to that word.  */
1712       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1713       offset -= (offset % (total_bits / BITS_PER_UNIT));
1714       op0 = adjust_address (op0, mode, offset);
1715     }
1716
1717   mode = GET_MODE (op0);
1718
1719   if (BYTES_BIG_ENDIAN)
1720     /* BITPOS is the distance between our msb and that of OP0.
1721        Convert it to the distance from the lsb.  */
1722     bitpos = total_bits - bitsize - bitpos;
1723
1724   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1725      We have reduced the big-endian case to the little-endian case.  */
1726
1727   if (unsignedp)
1728     {
1729       if (bitpos)
1730         {
1731           /* If the field does not already start at the lsb,
1732              shift it so it does.  */
1733           tree amount = build_int_cst (NULL_TREE, bitpos);
1734           /* Maybe propagate the target for the shift.  */
1735           /* But not if we will return it--could confuse integrate.c.  */
1736           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1737           if (tmode != mode) subtarget = 0;
1738           op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1739         }
1740       /* Convert the value to the desired mode.  */
1741       if (mode != tmode)
1742         op0 = convert_to_mode (tmode, op0, 1);
1743
1744       /* Unless the msb of the field used to be the msb when we shifted,
1745          mask out the upper bits.  */
1746
1747       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1748         return expand_binop (GET_MODE (op0), and_optab, op0,
1749                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1750                              target, 1, OPTAB_LIB_WIDEN);
1751       return op0;
1752     }
1753
1754   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1755      then arithmetic-shift its lsb to the lsb of the word.  */
1756   op0 = force_reg (mode, op0);
1757   if (mode != tmode)
1758     target = 0;
1759
1760   /* Find the narrowest integer mode that contains the field.  */
1761
1762   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1763        mode = GET_MODE_WIDER_MODE (mode))
1764     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1765       {
1766         op0 = convert_to_mode (mode, op0, 0);
1767         break;
1768       }
1769
1770   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1771     {
1772       tree amount
1773         = build_int_cst (NULL_TREE,
1774                          GET_MODE_BITSIZE (mode) - (bitsize + bitpos));
1775       /* Maybe propagate the target for the shift.  */
1776       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1777       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1778     }
1779
1780   return expand_shift (RSHIFT_EXPR, mode, op0,
1781                        build_int_cst (NULL_TREE,
1782                                       GET_MODE_BITSIZE (mode) - bitsize),
1783                        target, 0);
1784 }
1785 \f
1786 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1787    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1788    complement of that if COMPLEMENT.  The mask is truncated if
1789    necessary to the width of mode MODE.  The mask is zero-extended if
1790    BITSIZE+BITPOS is too small for MODE.  */
1791
1792 static rtx
1793 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1794 {
1795   HOST_WIDE_INT masklow, maskhigh;
1796
1797   if (bitsize == 0)
1798     masklow = 0;
1799   else if (bitpos < HOST_BITS_PER_WIDE_INT)
1800     masklow = (HOST_WIDE_INT) -1 << bitpos;
1801   else
1802     masklow = 0;
1803
1804   if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1805     masklow &= ((unsigned HOST_WIDE_INT) -1
1806                 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1807
1808   if (bitpos <= HOST_BITS_PER_WIDE_INT)
1809     maskhigh = -1;
1810   else
1811     maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1812
1813   if (bitsize == 0)
1814     maskhigh = 0;
1815   else if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1816     maskhigh &= ((unsigned HOST_WIDE_INT) -1
1817                  >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1818   else
1819     maskhigh = 0;
1820
1821   if (complement)
1822     {
1823       maskhigh = ~maskhigh;
1824       masklow = ~masklow;
1825     }
1826
1827   return immed_double_const (masklow, maskhigh, mode);
1828 }
1829
1830 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1831    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1832
1833 static rtx
1834 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1835 {
1836   unsigned HOST_WIDE_INT v = INTVAL (value);
1837   HOST_WIDE_INT low, high;
1838
1839   if (bitsize < HOST_BITS_PER_WIDE_INT)
1840     v &= ~((HOST_WIDE_INT) -1 << bitsize);
1841
1842   if (bitpos < HOST_BITS_PER_WIDE_INT)
1843     {
1844       low = v << bitpos;
1845       high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1846     }
1847   else
1848     {
1849       low = 0;
1850       high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1851     }
1852
1853   return immed_double_const (low, high, mode);
1854 }
1855 \f
1856 /* Extract a bit field from a memory by forcing the alignment of the
1857    memory.  This efficient only if the field spans at least 4 boundaries.
1858
1859    OP0 is the MEM.
1860    BITSIZE is the field width; BITPOS is the position of the first bit.
1861    UNSIGNEDP is true if the result should be zero-extended.  */
1862
1863 static rtx
1864 extract_force_align_mem_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1865                                    unsigned HOST_WIDE_INT bitpos,
1866                                    int unsignedp)
1867 {
1868   enum machine_mode mode, dmode;
1869   unsigned int m_bitsize, m_size;
1870   unsigned int sign_shift_up, sign_shift_dn;
1871   rtx base, a1, a2, v1, v2, comb, shift, result, start;
1872
1873   /* Choose a mode that will fit BITSIZE.  */
1874   mode = smallest_mode_for_size (bitsize, MODE_INT);
1875   m_size = GET_MODE_SIZE (mode);
1876   m_bitsize = GET_MODE_BITSIZE (mode);
1877
1878   /* Choose a mode twice as wide.  Fail if no such mode exists.  */
1879   dmode = mode_for_size (m_bitsize * 2, MODE_INT, false);
1880   if (dmode == BLKmode)
1881     return NULL;
1882
1883   do_pending_stack_adjust ();
1884   start = get_last_insn ();
1885
1886   /* At the end, we'll need an additional shift to deal with sign/zero
1887      extension.  By default this will be a left+right shift of the
1888      appropriate size.  But we may be able to eliminate one of them.  */
1889   sign_shift_up = sign_shift_dn = m_bitsize - bitsize;
1890
1891   if (STRICT_ALIGNMENT)
1892     {
1893       base = plus_constant (XEXP (op0, 0), bitpos / BITS_PER_UNIT);
1894       bitpos %= BITS_PER_UNIT;
1895
1896       /* We load two values to be concatenate.  There's an edge condition
1897          that bears notice -- an aligned value at the end of a page can
1898          only load one value lest we segfault.  So the two values we load
1899          are at "base & -size" and "(base + size - 1) & -size".  If base
1900          is unaligned, the addresses will be aligned and sequential; if
1901          base is aligned, the addresses will both be equal to base.  */
1902
1903       a1 = expand_simple_binop (Pmode, AND, force_operand (base, NULL),
1904                                 GEN_INT (-(HOST_WIDE_INT)m_size),
1905                                 NULL, true, OPTAB_LIB_WIDEN);
1906       mark_reg_pointer (a1, m_bitsize);
1907       v1 = gen_rtx_MEM (mode, a1);
1908       set_mem_align (v1, m_bitsize);
1909       v1 = force_reg (mode, validize_mem (v1));
1910
1911       a2 = plus_constant (base, GET_MODE_SIZE (mode) - 1);
1912       a2 = expand_simple_binop (Pmode, AND, force_operand (a2, NULL),
1913                                 GEN_INT (-(HOST_WIDE_INT)m_size),
1914                                 NULL, true, OPTAB_LIB_WIDEN);
1915       v2 = gen_rtx_MEM (mode, a2);
1916       set_mem_align (v2, m_bitsize);
1917       v2 = force_reg (mode, validize_mem (v2));
1918
1919       /* Combine these two values into a double-word value.  */
1920       if (m_bitsize == BITS_PER_WORD)
1921         {
1922           comb = gen_reg_rtx (dmode);
1923           emit_insn (gen_rtx_CLOBBER (VOIDmode, comb));
1924           emit_move_insn (gen_rtx_SUBREG (mode, comb, 0), v1);
1925           emit_move_insn (gen_rtx_SUBREG (mode, comb, m_size), v2);
1926         }
1927       else
1928         {
1929           if (BYTES_BIG_ENDIAN)
1930             comb = v1, v1 = v2, v2 = comb;
1931           v1 = convert_modes (dmode, mode, v1, true);
1932           if (v1 == NULL)
1933             goto fail;
1934           v2 = convert_modes (dmode, mode, v2, true);
1935           v2 = expand_simple_binop (dmode, ASHIFT, v2, GEN_INT (m_bitsize),
1936                                     NULL, true, OPTAB_LIB_WIDEN);
1937           if (v2 == NULL)
1938             goto fail;
1939           comb = expand_simple_binop (dmode, IOR, v1, v2, NULL,
1940                                       true, OPTAB_LIB_WIDEN);
1941           if (comb == NULL)
1942             goto fail;
1943         }
1944
1945       shift = expand_simple_binop (Pmode, AND, base, GEN_INT (m_size - 1),
1946                                    NULL, true, OPTAB_LIB_WIDEN);
1947       shift = expand_mult (Pmode, shift, GEN_INT (BITS_PER_UNIT), NULL, 1);
1948
1949       if (bitpos != 0)
1950         {
1951           if (sign_shift_up <= bitpos)
1952             bitpos -= sign_shift_up, sign_shift_up = 0;
1953           shift = expand_simple_binop (Pmode, PLUS, shift, GEN_INT (bitpos),
1954                                        NULL, true, OPTAB_LIB_WIDEN);
1955         }
1956     }
1957   else
1958     {
1959       unsigned HOST_WIDE_INT offset = bitpos / BITS_PER_UNIT;
1960       bitpos %= BITS_PER_UNIT;
1961
1962       /* When strict alignment is not required, we can just load directly
1963          from memory without masking.  If the remaining BITPOS offset is
1964          small enough, we may be able to do all operations in MODE as
1965          opposed to DMODE.  */
1966       if (bitpos + bitsize <= m_bitsize)
1967         dmode = mode;
1968       comb = adjust_address (op0, dmode, offset);
1969
1970       if (sign_shift_up <= bitpos)
1971         bitpos -= sign_shift_up, sign_shift_up = 0;
1972       shift = GEN_INT (bitpos);
1973     }
1974
1975   /* Shift down the double-word such that the requested value is at bit 0.  */
1976   if (shift != const0_rtx)
1977     comb = expand_simple_binop (dmode, unsignedp ? LSHIFTRT : ASHIFTRT,
1978                                 comb, shift, NULL, unsignedp, OPTAB_LIB_WIDEN);
1979   if (comb == NULL)
1980     goto fail;
1981
1982   /* If the field exactly matches MODE, then all we need to do is return the
1983      lowpart.  Otherwise, shift to get the sign bits set properly.  */
1984   result = force_reg (mode, gen_lowpart (mode, comb));
1985
1986   if (sign_shift_up)
1987     result = expand_simple_binop (mode, ASHIFT, result,
1988                                   GEN_INT (sign_shift_up),
1989                                   NULL_RTX, 0, OPTAB_LIB_WIDEN);
1990   if (sign_shift_dn)
1991     result = expand_simple_binop (mode, unsignedp ? LSHIFTRT : ASHIFTRT,
1992                                   result, GEN_INT (sign_shift_dn),
1993                                   NULL_RTX, 0, OPTAB_LIB_WIDEN);
1994
1995   return result;
1996
1997  fail:
1998   delete_insns_since (start);
1999   return NULL;
2000 }
2001
2002 /* Extract a bit field that is split across two words
2003    and return an RTX for the result.
2004
2005    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2006    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2007    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
2008
2009 static rtx
2010 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
2011                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
2012 {
2013   unsigned int unit;
2014   unsigned int bitsdone = 0;
2015   rtx result = NULL_RTX;
2016   int first = 1;
2017
2018   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2019      much at a time.  */
2020   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2021     unit = BITS_PER_WORD;
2022   else
2023     {
2024       unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2025       if (0 && bitsize / unit > 2)
2026         {
2027           rtx tmp = extract_force_align_mem_bit_field (op0, bitsize, bitpos,
2028                                                        unsignedp);
2029           if (tmp)
2030             return tmp;
2031         }
2032     }
2033
2034   while (bitsdone < bitsize)
2035     {
2036       unsigned HOST_WIDE_INT thissize;
2037       rtx part, word;
2038       unsigned HOST_WIDE_INT thispos;
2039       unsigned HOST_WIDE_INT offset;
2040
2041       offset = (bitpos + bitsdone) / unit;
2042       thispos = (bitpos + bitsdone) % unit;
2043
2044       /* THISSIZE must not overrun a word boundary.  Otherwise,
2045          extract_fixed_bit_field will call us again, and we will mutually
2046          recurse forever.  */
2047       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2048       thissize = MIN (thissize, unit - thispos);
2049
2050       /* If OP0 is a register, then handle OFFSET here.
2051
2052          When handling multiword bitfields, extract_bit_field may pass
2053          down a word_mode SUBREG of a larger REG for a bitfield that actually
2054          crosses a word boundary.  Thus, for a SUBREG, we must find
2055          the current word starting from the base register.  */
2056       if (GET_CODE (op0) == SUBREG)
2057         {
2058           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
2059           word = operand_subword_force (SUBREG_REG (op0), word_offset,
2060                                         GET_MODE (SUBREG_REG (op0)));
2061           offset = 0;
2062         }
2063       else if (REG_P (op0))
2064         {
2065           word = operand_subword_force (op0, offset, GET_MODE (op0));
2066           offset = 0;
2067         }
2068       else
2069         word = op0;
2070
2071       /* Extract the parts in bit-counting order,
2072          whose meaning is determined by BYTES_PER_UNIT.
2073          OFFSET is in UNITs, and UNIT is in bits.
2074          extract_fixed_bit_field wants offset in bytes.  */
2075       part = extract_fixed_bit_field (word_mode, word,
2076                                       offset * unit / BITS_PER_UNIT,
2077                                       thissize, thispos, 0, 1);
2078       bitsdone += thissize;
2079
2080       /* Shift this part into place for the result.  */
2081       if (BYTES_BIG_ENDIAN)
2082         {
2083           if (bitsize != bitsdone)
2084             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2085                                  build_int_cst (NULL_TREE, bitsize - bitsdone),
2086                                  0, 1);
2087         }
2088       else
2089         {
2090           if (bitsdone != thissize)
2091             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2092                                  build_int_cst (NULL_TREE,
2093                                                 bitsdone - thissize), 0, 1);
2094         }
2095
2096       if (first)
2097         result = part;
2098       else
2099         /* Combine the parts with bitwise or.  This works
2100            because we extracted each part as an unsigned bit field.  */
2101         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2102                                OPTAB_LIB_WIDEN);
2103
2104       first = 0;
2105     }
2106
2107   /* Unsigned bit field: we are done.  */
2108   if (unsignedp)
2109     return result;
2110   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2111   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2112                          build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
2113                          NULL_RTX, 0);
2114   return expand_shift (RSHIFT_EXPR, word_mode, result,
2115                        build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
2116                        NULL_RTX, 0);
2117 }
2118 \f
2119 /* Add INC into TARGET.  */
2120
2121 void
2122 expand_inc (rtx target, rtx inc)
2123 {
2124   rtx value = expand_binop (GET_MODE (target), add_optab,
2125                             target, inc,
2126                             target, 0, OPTAB_LIB_WIDEN);
2127   if (value != target)
2128     emit_move_insn (target, value);
2129 }
2130
2131 /* Subtract DEC from TARGET.  */
2132
2133 void
2134 expand_dec (rtx target, rtx dec)
2135 {
2136   rtx value = expand_binop (GET_MODE (target), sub_optab,
2137                             target, dec,
2138                             target, 0, OPTAB_LIB_WIDEN);
2139   if (value != target)
2140     emit_move_insn (target, value);
2141 }
2142 \f
2143 /* Output a shift instruction for expression code CODE,
2144    with SHIFTED being the rtx for the value to shift,
2145    and AMOUNT the tree for the amount to shift by.
2146    Store the result in the rtx TARGET, if that is convenient.
2147    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2148    Return the rtx for where the value is.  */
2149
2150 rtx
2151 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2152               tree amount, rtx target, int unsignedp)
2153 {
2154   rtx op1, temp = 0;
2155   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2156   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2157   int try;
2158
2159   /* Previously detected shift-counts computed by NEGATE_EXPR
2160      and shifted in the other direction; but that does not work
2161      on all machines.  */
2162
2163   op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0);
2164
2165   if (SHIFT_COUNT_TRUNCATED)
2166     {
2167       if (GET_CODE (op1) == CONST_INT
2168           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2169               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2170         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2171                        % GET_MODE_BITSIZE (mode));
2172       else if (GET_CODE (op1) == SUBREG
2173                && subreg_lowpart_p (op1))
2174         op1 = SUBREG_REG (op1);
2175     }
2176
2177   if (op1 == const0_rtx)
2178     return shifted;
2179
2180   /* Check whether its cheaper to implement a left shift by a constant
2181      bit count by a sequence of additions.  */
2182   if (code == LSHIFT_EXPR
2183       && GET_CODE (op1) == CONST_INT
2184       && INTVAL (op1) > 0
2185       && INTVAL (op1) < GET_MODE_BITSIZE (mode)
2186       && shift_cost[mode][INTVAL (op1)] > INTVAL (op1) * add_cost[mode])
2187     {
2188       int i;
2189       for (i = 0; i < INTVAL (op1); i++)
2190         {
2191           temp = force_reg (mode, shifted);
2192           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2193                                   unsignedp, OPTAB_LIB_WIDEN);
2194         }
2195       return shifted;
2196     }
2197
2198   for (try = 0; temp == 0 && try < 3; try++)
2199     {
2200       enum optab_methods methods;
2201
2202       if (try == 0)
2203         methods = OPTAB_DIRECT;
2204       else if (try == 1)
2205         methods = OPTAB_WIDEN;
2206       else
2207         methods = OPTAB_LIB_WIDEN;
2208
2209       if (rotate)
2210         {
2211           /* Widening does not work for rotation.  */
2212           if (methods == OPTAB_WIDEN)
2213             continue;
2214           else if (methods == OPTAB_LIB_WIDEN)
2215             {
2216               /* If we have been unable to open-code this by a rotation,
2217                  do it as the IOR of two shifts.  I.e., to rotate A
2218                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2219                  where C is the bitsize of A.
2220
2221                  It is theoretically possible that the target machine might
2222                  not be able to perform either shift and hence we would
2223                  be making two libcalls rather than just the one for the
2224                  shift (similarly if IOR could not be done).  We will allow
2225                  this extremely unlikely lossage to avoid complicating the
2226                  code below.  */
2227
2228               rtx subtarget = target == shifted ? 0 : target;
2229               rtx temp1;
2230               tree type = TREE_TYPE (amount);
2231               tree new_amount = make_tree (type, op1);
2232               tree other_amount
2233                 = fold (build2 (MINUS_EXPR, type, convert
2234                                 (type, build_int_cst
2235                                  (NULL_TREE, GET_MODE_BITSIZE (mode))),
2236                                 amount));
2237
2238               shifted = force_reg (mode, shifted);
2239
2240               temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2241                                    mode, shifted, new_amount, subtarget, 1);
2242               temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2243                                     mode, shifted, other_amount, 0, 1);
2244               return expand_binop (mode, ior_optab, temp, temp1, target,
2245                                    unsignedp, methods);
2246             }
2247
2248           temp = expand_binop (mode,
2249                                left ? rotl_optab : rotr_optab,
2250                                shifted, op1, target, unsignedp, methods);
2251
2252           /* If we don't have the rotate, but we are rotating by a constant
2253              that is in range, try a rotate in the opposite direction.  */
2254
2255           if (temp == 0 && GET_CODE (op1) == CONST_INT
2256               && INTVAL (op1) > 0
2257               && (unsigned int) INTVAL (op1) < GET_MODE_BITSIZE (mode))
2258             temp = expand_binop (mode,
2259                                  left ? rotr_optab : rotl_optab,
2260                                  shifted,
2261                                  GEN_INT (GET_MODE_BITSIZE (mode)
2262                                           - INTVAL (op1)),
2263                                  target, unsignedp, methods);
2264         }
2265       else if (unsignedp)
2266         temp = expand_binop (mode,
2267                              left ? ashl_optab : lshr_optab,
2268                              shifted, op1, target, unsignedp, methods);
2269
2270       /* Do arithmetic shifts.
2271          Also, if we are going to widen the operand, we can just as well
2272          use an arithmetic right-shift instead of a logical one.  */
2273       if (temp == 0 && ! rotate
2274           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2275         {
2276           enum optab_methods methods1 = methods;
2277
2278           /* If trying to widen a log shift to an arithmetic shift,
2279              don't accept an arithmetic shift of the same size.  */
2280           if (unsignedp)
2281             methods1 = OPTAB_MUST_WIDEN;
2282
2283           /* Arithmetic shift */
2284
2285           temp = expand_binop (mode,
2286                                left ? ashl_optab : ashr_optab,
2287                                shifted, op1, target, unsignedp, methods1);
2288         }
2289
2290       /* We used to try extzv here for logical right shifts, but that was
2291          only useful for one machine, the VAX, and caused poor code
2292          generation there for lshrdi3, so the code was deleted and a
2293          define_expand for lshrsi3 was added to vax.md.  */
2294     }
2295
2296   gcc_assert (temp);
2297   return temp;
2298 }
2299 \f
2300 enum alg_code { alg_unknown, alg_zero, alg_m, alg_shift,
2301                   alg_add_t_m2, alg_sub_t_m2,
2302                   alg_add_factor, alg_sub_factor,
2303                   alg_add_t2_m, alg_sub_t2_m };
2304
2305 /* This structure holds the "cost" of a multiply sequence.  The
2306    "cost" field holds the total rtx_cost of every operator in the
2307    synthetic multiplication sequence, hence cost(a op b) is defined
2308    as rtx_cost(op) + cost(a) + cost(b), where cost(leaf) is zero.
2309    The "latency" field holds the minimum possible latency of the
2310    synthetic multiply, on a hypothetical infinitely parallel CPU.
2311    This is the critical path, or the maximum height, of the expression
2312    tree which is the sum of rtx_costs on the most expensive path from
2313    any leaf to the root.  Hence latency(a op b) is defined as zero for
2314    leaves and rtx_cost(op) + max(latency(a), latency(b)) otherwise.  */
2315
2316 struct mult_cost {
2317   short cost;     /* Total rtx_cost of the multiplication sequence.  */
2318   short latency;  /* The latency of the multiplication sequence.  */
2319 };
2320
2321 /* This macro is used to compare a pointer to a mult_cost against an
2322    single integer "rtx_cost" value.  This is equivalent to the macro
2323    CHEAPER_MULT_COST(X,Z) where Z = {Y,Y}.  */
2324 #define MULT_COST_LESS(X,Y) ((X)->cost < (Y)    \
2325                              || ((X)->cost == (Y) && (X)->latency < (Y)))
2326
2327 /* This macro is used to compare two pointers to mult_costs against
2328    each other.  The macro returns true if X is cheaper than Y.
2329    Currently, the cheaper of two mult_costs is the one with the
2330    lower "cost".  If "cost"s are tied, the lower latency is cheaper.  */
2331 #define CHEAPER_MULT_COST(X,Y)  ((X)->cost < (Y)->cost          \
2332                                  || ((X)->cost == (Y)->cost     \
2333                                      && (X)->latency < (Y)->latency))
2334
2335 /* This structure records a sequence of operations.
2336    `ops' is the number of operations recorded.
2337    `cost' is their total cost.
2338    The operations are stored in `op' and the corresponding
2339    logarithms of the integer coefficients in `log'.
2340
2341    These are the operations:
2342    alg_zero             total := 0;
2343    alg_m                total := multiplicand;
2344    alg_shift            total := total * coeff
2345    alg_add_t_m2         total := total + multiplicand * coeff;
2346    alg_sub_t_m2         total := total - multiplicand * coeff;
2347    alg_add_factor       total := total * coeff + total;
2348    alg_sub_factor       total := total * coeff - total;
2349    alg_add_t2_m         total := total * coeff + multiplicand;
2350    alg_sub_t2_m         total := total * coeff - multiplicand;
2351
2352    The first operand must be either alg_zero or alg_m.  */
2353
2354 struct algorithm
2355 {
2356   struct mult_cost cost;
2357   short ops;
2358   /* The size of the OP and LOG fields are not directly related to the
2359      word size, but the worst-case algorithms will be if we have few
2360      consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2361      In that case we will generate shift-by-2, add, shift-by-2, add,...,
2362      in total wordsize operations.  */
2363   enum alg_code op[MAX_BITS_PER_WORD];
2364   char log[MAX_BITS_PER_WORD];
2365 };
2366
2367 /* The entry for our multiplication cache/hash table.  */
2368 struct alg_hash_entry {
2369   /* The number we are multiplying by.  */
2370   unsigned int t;
2371
2372   /* The mode in which we are multiplying something by T.  */
2373   enum machine_mode mode;
2374
2375   /* The best multiplication algorithm for t.  */
2376   enum alg_code alg;
2377 };
2378
2379 /* The number of cache/hash entries.  */
2380 #define NUM_ALG_HASH_ENTRIES 307
2381
2382 /* Each entry of ALG_HASH caches alg_code for some integer.  This is
2383    actually a hash table.  If we have a collision, that the older
2384    entry is kicked out.  */
2385 static struct alg_hash_entry alg_hash[NUM_ALG_HASH_ENTRIES];
2386
2387 /* Indicates the type of fixup needed after a constant multiplication.
2388    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2389    the result should be negated, and ADD_VARIANT means that the
2390    multiplicand should be added to the result.  */
2391 enum mult_variant {basic_variant, negate_variant, add_variant};
2392
2393 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2394                         const struct mult_cost *, enum machine_mode mode);
2395 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2396                                  struct algorithm *, enum mult_variant *, int);
2397 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2398                               const struct algorithm *, enum mult_variant);
2399 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2400                                                  int, unsigned HOST_WIDE_INT *,
2401                                                  int *, int *);
2402 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2403 static rtx extract_high_half (enum machine_mode, rtx);
2404 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2405                                        int, int);
2406 /* Compute and return the best algorithm for multiplying by T.
2407    The algorithm must cost less than cost_limit
2408    If retval.cost >= COST_LIMIT, no algorithm was found and all
2409    other field of the returned struct are undefined.
2410    MODE is the machine mode of the multiplication.  */
2411
2412 static void
2413 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2414             const struct mult_cost *cost_limit, enum machine_mode mode)
2415 {
2416   int m;
2417   struct algorithm *alg_in, *best_alg;
2418   struct mult_cost best_cost;
2419   struct mult_cost new_limit;
2420   int op_cost, op_latency;
2421   unsigned HOST_WIDE_INT q;
2422   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2423   int hash_index;
2424   bool cache_hit = false;
2425   enum alg_code cache_alg = alg_zero;
2426
2427   /* Indicate that no algorithm is yet found.  If no algorithm
2428      is found, this value will be returned and indicate failure.  */
2429   alg_out->cost.cost = cost_limit->cost + 1;
2430   alg_out->cost.latency = cost_limit->latency + 1;
2431
2432   if (cost_limit->cost < 0
2433       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2434     return;
2435
2436   /* Restrict the bits of "t" to the multiplication's mode.  */
2437   t &= GET_MODE_MASK (mode);
2438
2439   /* t == 1 can be done in zero cost.  */
2440   if (t == 1)
2441     {
2442       alg_out->ops = 1;
2443       alg_out->cost.cost = 0;
2444       alg_out->cost.latency = 0;
2445       alg_out->op[0] = alg_m;
2446       return;
2447     }
2448
2449   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2450      fail now.  */
2451   if (t == 0)
2452     {
2453       if (MULT_COST_LESS (cost_limit, zero_cost))
2454         return;
2455       else
2456         {
2457           alg_out->ops = 1;
2458           alg_out->cost.cost = zero_cost;
2459           alg_out->cost.latency = zero_cost;
2460           alg_out->op[0] = alg_zero;
2461           return;
2462         }
2463     }
2464
2465   /* We'll be needing a couple extra algorithm structures now.  */
2466
2467   alg_in = alloca (sizeof (struct algorithm));
2468   best_alg = alloca (sizeof (struct algorithm));
2469   best_cost = *cost_limit;
2470
2471   /* Compute the hash index.  */
2472   hash_index = (t ^ (unsigned int) mode) % NUM_ALG_HASH_ENTRIES;
2473
2474   /* See if we already know what to do for T.  */
2475   if (alg_hash[hash_index].t == t
2476       && alg_hash[hash_index].mode == mode
2477       && alg_hash[hash_index].alg != alg_unknown)
2478     {
2479       cache_hit = true;
2480       cache_alg = alg_hash[hash_index].alg;
2481       switch (cache_alg)
2482         {
2483         case alg_shift:
2484           goto do_alg_shift;
2485
2486         case alg_add_t_m2:
2487         case alg_sub_t_m2:
2488           goto do_alg_addsub_t_m2;
2489
2490         case alg_add_factor:
2491         case alg_sub_factor:
2492           goto do_alg_addsub_factor;
2493
2494         case alg_add_t2_m:
2495           goto do_alg_add_t2_m;
2496
2497         case alg_sub_t2_m:
2498           goto do_alg_sub_t2_m;
2499
2500         default:
2501           gcc_unreachable ();
2502         }
2503     }
2504
2505   /* If we have a group of zero bits at the low-order part of T, try
2506      multiplying by the remaining bits and then doing a shift.  */
2507
2508   if ((t & 1) == 0)
2509     {
2510     do_alg_shift:
2511       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2512       if (m < maxm)
2513         {
2514           q = t >> m;
2515           /* The function expand_shift will choose between a shift and
2516              a sequence of additions, so the observed cost is given as
2517              MIN (m * add_cost[mode], shift_cost[mode][m]).  */
2518           op_cost = m * add_cost[mode];
2519           if (shift_cost[mode][m] < op_cost)
2520             op_cost = shift_cost[mode][m];
2521           new_limit.cost = best_cost.cost - op_cost;
2522           new_limit.latency = best_cost.latency - op_cost;
2523           synth_mult (alg_in, q, &new_limit, mode);
2524
2525           alg_in->cost.cost += op_cost;
2526           alg_in->cost.latency += op_cost;
2527           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2528             {
2529               struct algorithm *x;
2530               best_cost = alg_in->cost;
2531               x = alg_in, alg_in = best_alg, best_alg = x;
2532               best_alg->log[best_alg->ops] = m;
2533               best_alg->op[best_alg->ops] = alg_shift;
2534             }
2535         }
2536       if (cache_hit)
2537         goto done;
2538     }
2539
2540   /* If we have an odd number, add or subtract one.  */
2541   if ((t & 1) != 0)
2542     {
2543       unsigned HOST_WIDE_INT w;
2544
2545     do_alg_addsub_t_m2:
2546       for (w = 1; (w & t) != 0; w <<= 1)
2547         ;
2548       /* If T was -1, then W will be zero after the loop.  This is another
2549          case where T ends with ...111.  Handling this with (T + 1) and
2550          subtract 1 produces slightly better code and results in algorithm
2551          selection much faster than treating it like the ...0111 case
2552          below.  */
2553       if (w == 0
2554           || (w > 2
2555               /* Reject the case where t is 3.
2556                  Thus we prefer addition in that case.  */
2557               && t != 3))
2558         {
2559           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2560
2561           op_cost = add_cost[mode];
2562           new_limit.cost = best_cost.cost - op_cost;
2563           new_limit.latency = best_cost.latency - op_cost;
2564           synth_mult (alg_in, t + 1, &new_limit, mode);
2565
2566           alg_in->cost.cost += op_cost;
2567           alg_in->cost.latency += op_cost;
2568           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2569             {
2570               struct algorithm *x;
2571               best_cost = alg_in->cost;
2572               x = alg_in, alg_in = best_alg, best_alg = x;
2573               best_alg->log[best_alg->ops] = 0;
2574               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2575             }
2576         }
2577       else
2578         {
2579           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2580
2581           op_cost = add_cost[mode];
2582           new_limit.cost = best_cost.cost - op_cost;
2583           new_limit.latency = best_cost.latency - op_cost;
2584           synth_mult (alg_in, t - 1, &new_limit, mode);
2585
2586           alg_in->cost.cost += op_cost;
2587           alg_in->cost.latency += op_cost;
2588           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2589             {
2590               struct algorithm *x;
2591               best_cost = alg_in->cost;
2592               x = alg_in, alg_in = best_alg, best_alg = x;
2593               best_alg->log[best_alg->ops] = 0;
2594               best_alg->op[best_alg->ops] = alg_add_t_m2;
2595             }
2596         }
2597       if (cache_hit)
2598         goto done;
2599     }
2600
2601   /* Look for factors of t of the form
2602      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2603      If we find such a factor, we can multiply by t using an algorithm that
2604      multiplies by q, shift the result by m and add/subtract it to itself.
2605
2606      We search for large factors first and loop down, even if large factors
2607      are less probable than small; if we find a large factor we will find a
2608      good sequence quickly, and therefore be able to prune (by decreasing
2609      COST_LIMIT) the search.  */
2610
2611  do_alg_addsub_factor:
2612   for (m = floor_log2 (t - 1); m >= 2; m--)
2613     {
2614       unsigned HOST_WIDE_INT d;
2615
2616       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2617       if (t % d == 0 && t > d && m < maxm
2618           && (!cache_hit || cache_alg == alg_add_factor))
2619         {
2620           /* If the target has a cheap shift-and-add instruction use
2621              that in preference to a shift insn followed by an add insn.
2622              Assume that the shift-and-add is "atomic" with a latency
2623              equal to its cost, otherwise assume that on superscalar
2624              hardware the shift may be executed concurrently with the
2625              earlier steps in the algorithm.  */
2626           op_cost = add_cost[mode] + shift_cost[mode][m];
2627           if (shiftadd_cost[mode][m] < op_cost)
2628             {
2629               op_cost = shiftadd_cost[mode][m];
2630               op_latency = op_cost;
2631             }
2632           else
2633             op_latency = add_cost[mode];
2634
2635           new_limit.cost = best_cost.cost - op_cost;
2636           new_limit.latency = best_cost.latency - op_latency;
2637           synth_mult (alg_in, t / d, &new_limit, mode);
2638
2639           alg_in->cost.cost += op_cost;
2640           alg_in->cost.latency += op_latency;
2641           if (alg_in->cost.latency < op_cost)
2642             alg_in->cost.latency = op_cost;
2643           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2644             {
2645               struct algorithm *x;
2646               best_cost = alg_in->cost;
2647               x = alg_in, alg_in = best_alg, best_alg = x;
2648               best_alg->log[best_alg->ops] = m;
2649               best_alg->op[best_alg->ops] = alg_add_factor;
2650             }
2651           /* Other factors will have been taken care of in the recursion.  */
2652           break;
2653         }
2654
2655       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2656       if (t % d == 0 && t > d && m < maxm
2657           && (!cache_hit || cache_alg == alg_sub_factor))
2658         {
2659           /* If the target has a cheap shift-and-subtract insn use
2660              that in preference to a shift insn followed by a sub insn.
2661              Assume that the shift-and-sub is "atomic" with a latency
2662              equal to it's cost, otherwise assume that on superscalar
2663              hardware the shift may be executed concurrently with the
2664              earlier steps in the algorithm.  */
2665           op_cost = add_cost[mode] + shift_cost[mode][m];
2666           if (shiftsub_cost[mode][m] < op_cost)
2667             {
2668               op_cost = shiftsub_cost[mode][m];
2669               op_latency = op_cost;
2670             }
2671           else
2672             op_latency = add_cost[mode];
2673
2674           new_limit.cost = best_cost.cost - op_cost;
2675           new_limit.cost = best_cost.cost - op_latency;
2676           synth_mult (alg_in, t / d, &new_limit, mode);
2677
2678           alg_in->cost.cost += op_cost;
2679           alg_in->cost.latency += op_latency;
2680           if (alg_in->cost.latency < op_cost)
2681             alg_in->cost.latency = op_cost;
2682           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2683             {
2684               struct algorithm *x;
2685               best_cost = alg_in->cost;
2686               x = alg_in, alg_in = best_alg, best_alg = x;
2687               best_alg->log[best_alg->ops] = m;
2688               best_alg->op[best_alg->ops] = alg_sub_factor;
2689             }
2690           break;
2691         }
2692     }
2693   if (cache_hit)
2694     goto done;
2695
2696   /* Try shift-and-add (load effective address) instructions,
2697      i.e. do a*3, a*5, a*9.  */
2698   if ((t & 1) != 0)
2699     {
2700     do_alg_add_t2_m:
2701       q = t - 1;
2702       q = q & -q;
2703       m = exact_log2 (q);
2704       if (m >= 0 && m < maxm)
2705         {
2706           op_cost = shiftadd_cost[mode][m];
2707           new_limit.cost = best_cost.cost - op_cost;
2708           new_limit.latency = best_cost.latency - op_cost;
2709           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2710
2711           alg_in->cost.cost += op_cost;
2712           alg_in->cost.latency += op_cost;
2713           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2714             {
2715               struct algorithm *x;
2716               best_cost = alg_in->cost;
2717               x = alg_in, alg_in = best_alg, best_alg = x;
2718               best_alg->log[best_alg->ops] = m;
2719               best_alg->op[best_alg->ops] = alg_add_t2_m;
2720             }
2721         }
2722       if (cache_hit)
2723         goto done;
2724
2725     do_alg_sub_t2_m:
2726       q = t + 1;
2727       q = q & -q;
2728       m = exact_log2 (q);
2729       if (m >= 0 && m < maxm)
2730         {
2731           op_cost = shiftsub_cost[mode][m];
2732           new_limit.cost = best_cost.cost - op_cost;
2733           new_limit.latency = best_cost.latency - op_cost;
2734           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2735
2736           alg_in->cost.cost += op_cost;
2737           alg_in->cost.latency += op_cost;
2738           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2739             {
2740               struct algorithm *x;
2741               best_cost = alg_in->cost;
2742               x = alg_in, alg_in = best_alg, best_alg = x;
2743               best_alg->log[best_alg->ops] = m;
2744               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2745             }
2746         }
2747       if (cache_hit)
2748         goto done;
2749     }
2750
2751  done:
2752   /* If best_cost has not decreased, we have not found any algorithm.  */
2753   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2754     return;
2755
2756   /* Cache the result.  */
2757   if (!cache_hit)
2758     {
2759       alg_hash[hash_index].t = t;
2760       alg_hash[hash_index].mode = mode;
2761       alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2762     }
2763
2764   /* If we are getting a too long sequence for `struct algorithm'
2765      to record, make this search fail.  */
2766   if (best_alg->ops == MAX_BITS_PER_WORD)
2767     return;
2768
2769   /* Copy the algorithm from temporary space to the space at alg_out.
2770      We avoid using structure assignment because the majority of
2771      best_alg is normally undefined, and this is a critical function.  */
2772   alg_out->ops = best_alg->ops + 1;
2773   alg_out->cost = best_cost;
2774   memcpy (alg_out->op, best_alg->op,
2775           alg_out->ops * sizeof *alg_out->op);
2776   memcpy (alg_out->log, best_alg->log,
2777           alg_out->ops * sizeof *alg_out->log);
2778 }
2779 \f
2780 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2781    Try three variations:
2782
2783        - a shift/add sequence based on VAL itself
2784        - a shift/add sequence based on -VAL, followed by a negation
2785        - a shift/add sequence based on VAL - 1, followed by an addition.
2786
2787    Return true if the cheapest of these cost less than MULT_COST,
2788    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2789
2790 static bool
2791 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2792                      struct algorithm *alg, enum mult_variant *variant,
2793                      int mult_cost)
2794 {
2795   struct algorithm alg2;
2796   struct mult_cost limit;
2797   int op_cost;
2798
2799   *variant = basic_variant;
2800   limit.cost = mult_cost;
2801   limit.latency = mult_cost;
2802   synth_mult (alg, val, &limit, mode);
2803
2804   /* This works only if the inverted value actually fits in an
2805      `unsigned int' */
2806   if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2807     {
2808       op_cost = neg_cost[mode];
2809       if (MULT_COST_LESS (&alg->cost, mult_cost))
2810         {
2811           limit.cost = alg->cost.cost - op_cost;
2812           limit.latency = alg->cost.latency - op_cost;
2813         }
2814       else
2815         {
2816           limit.cost = mult_cost - op_cost;
2817           limit.latency = mult_cost - op_cost;
2818         }
2819
2820       synth_mult (&alg2, -val, &limit, mode);
2821       alg2.cost.cost += op_cost;
2822       alg2.cost.latency += op_cost;
2823       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2824         *alg = alg2, *variant = negate_variant;
2825     }
2826
2827   /* This proves very useful for division-by-constant.  */
2828   op_cost = add_cost[mode];
2829   if (MULT_COST_LESS (&alg->cost, mult_cost))
2830     {
2831       limit.cost = alg->cost.cost - op_cost;
2832       limit.latency = alg->cost.latency - op_cost;
2833     }
2834   else
2835     {
2836       limit.cost = mult_cost - op_cost;
2837       limit.latency = mult_cost - op_cost;
2838     }
2839
2840   synth_mult (&alg2, val - 1, &limit, mode);
2841   alg2.cost.cost += op_cost;
2842   alg2.cost.latency += op_cost;
2843   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2844     *alg = alg2, *variant = add_variant;
2845
2846   return MULT_COST_LESS (&alg->cost, mult_cost);
2847 }
2848
2849 /* A subroutine of expand_mult, used for constant multiplications.
2850    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2851    convenient.  Use the shift/add sequence described by ALG and apply
2852    the final fixup specified by VARIANT.  */
2853
2854 static rtx
2855 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2856                    rtx target, const struct algorithm *alg,
2857                    enum mult_variant variant)
2858 {
2859   HOST_WIDE_INT val_so_far;
2860   rtx insn, accum, tem;
2861   int opno;
2862   enum machine_mode nmode;
2863
2864   /* Avoid referencing memory over and over.
2865      For speed, but also for correctness when mem is volatile.  */
2866   if (MEM_P (op0))
2867     op0 = force_reg (mode, op0);
2868
2869   /* ACCUM starts out either as OP0 or as a zero, depending on
2870      the first operation.  */
2871
2872   if (alg->op[0] == alg_zero)
2873     {
2874       accum = copy_to_mode_reg (mode, const0_rtx);
2875       val_so_far = 0;
2876     }
2877   else if (alg->op[0] == alg_m)
2878     {
2879       accum = copy_to_mode_reg (mode, op0);
2880       val_so_far = 1;
2881     }
2882   else
2883     gcc_unreachable ();
2884
2885   for (opno = 1; opno < alg->ops; opno++)
2886     {
2887       int log = alg->log[opno];
2888       rtx shift_subtarget = optimize ? 0 : accum;
2889       rtx add_target
2890         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2891            && !optimize)
2892           ? target : 0;
2893       rtx accum_target = optimize ? 0 : accum;
2894
2895       switch (alg->op[opno])
2896         {
2897         case alg_shift:
2898           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2899                                 build_int_cst (NULL_TREE, log),
2900                                 NULL_RTX, 0);
2901           val_so_far <<= log;
2902           break;
2903
2904         case alg_add_t_m2:
2905           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2906                               build_int_cst (NULL_TREE, log),
2907                               NULL_RTX, 0);
2908           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2909                                  add_target ? add_target : accum_target);
2910           val_so_far += (HOST_WIDE_INT) 1 << log;
2911           break;
2912
2913         case alg_sub_t_m2:
2914           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2915                               build_int_cst (NULL_TREE, log),
2916                               NULL_RTX, 0);
2917           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2918                                  add_target ? add_target : accum_target);
2919           val_so_far -= (HOST_WIDE_INT) 1 << log;
2920           break;
2921
2922         case alg_add_t2_m:
2923           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2924                                 build_int_cst (NULL_TREE, log),
2925                                 shift_subtarget,
2926                                 0);
2927           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2928                                  add_target ? add_target : accum_target);
2929           val_so_far = (val_so_far << log) + 1;
2930           break;
2931
2932         case alg_sub_t2_m:
2933           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2934                                 build_int_cst (NULL_TREE, log),
2935                                 shift_subtarget, 0);
2936           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2937                                  add_target ? add_target : accum_target);
2938           val_so_far = (val_so_far << log) - 1;
2939           break;
2940
2941         case alg_add_factor:
2942           tem = expand_shift (LSHIFT_EXPR, mode, accum,
2943                               build_int_cst (NULL_TREE, log),
2944                               NULL_RTX, 0);
2945           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2946                                  add_target ? add_target : accum_target);
2947           val_so_far += val_so_far << log;
2948           break;
2949
2950         case alg_sub_factor:
2951           tem = expand_shift (LSHIFT_EXPR, mode, accum,
2952                               build_int_cst (NULL_TREE, log),
2953                               NULL_RTX, 0);
2954           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2955                                  (add_target
2956                                   ? add_target : (optimize ? 0 : tem)));
2957           val_so_far = (val_so_far << log) - val_so_far;
2958           break;
2959
2960         default:
2961           gcc_unreachable ();
2962         }
2963
2964       /* Write a REG_EQUAL note on the last insn so that we can cse
2965          multiplication sequences.  Note that if ACCUM is a SUBREG,
2966          we've set the inner register and must properly indicate
2967          that.  */
2968
2969       tem = op0, nmode = mode;
2970       if (GET_CODE (accum) == SUBREG)
2971         {
2972           nmode = GET_MODE (SUBREG_REG (accum));
2973           tem = gen_lowpart (nmode, op0);
2974         }
2975
2976       insn = get_last_insn ();
2977       set_unique_reg_note (insn, REG_EQUAL,
2978                            gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)));
2979     }
2980
2981   if (variant == negate_variant)
2982     {
2983       val_so_far = -val_so_far;
2984       accum = expand_unop (mode, neg_optab, accum, target, 0);
2985     }
2986   else if (variant == add_variant)
2987     {
2988       val_so_far = val_so_far + 1;
2989       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
2990     }
2991
2992   /* Compare only the bits of val and val_so_far that are significant
2993      in the result mode, to avoid sign-/zero-extension confusion.  */
2994   val &= GET_MODE_MASK (mode);
2995   val_so_far &= GET_MODE_MASK (mode);
2996   gcc_assert (val == val_so_far);
2997
2998   return accum;
2999 }
3000
3001 /* Perform a multiplication and return an rtx for the result.
3002    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3003    TARGET is a suggestion for where to store the result (an rtx).
3004
3005    We check specially for a constant integer as OP1.
3006    If you want this check for OP0 as well, then before calling
3007    you should swap the two operands if OP0 would be constant.  */
3008
3009 rtx
3010 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3011              int unsignedp)
3012 {
3013   rtx const_op1 = op1;
3014   enum mult_variant variant;
3015   struct algorithm algorithm;
3016
3017   /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3018      less than or equal in size to `unsigned int' this doesn't matter.
3019      If the mode is larger than `unsigned int', then synth_mult works only
3020      if the constant value exactly fits in an `unsigned int' without any
3021      truncation.  This means that multiplying by negative values does
3022      not work; results are off by 2^32 on a 32 bit machine.  */
3023
3024   /* If we are multiplying in DImode, it may still be a win
3025      to try to work with shifts and adds.  */
3026   if (GET_CODE (op1) == CONST_DOUBLE
3027       && GET_MODE_CLASS (GET_MODE (op1)) == MODE_INT
3028       && HOST_BITS_PER_INT >= BITS_PER_WORD
3029       && CONST_DOUBLE_HIGH (op1) == 0)
3030     const_op1 = GEN_INT (CONST_DOUBLE_LOW (op1));
3031   else if (HOST_BITS_PER_INT < GET_MODE_BITSIZE (mode)
3032            && GET_CODE (op1) == CONST_INT
3033            && INTVAL (op1) < 0)
3034     const_op1 = 0;
3035
3036   /* We used to test optimize here, on the grounds that it's better to
3037      produce a smaller program when -O is not used.
3038      But this causes such a terrible slowdown sometimes
3039      that it seems better to use synth_mult always.  */
3040
3041   if (const_op1 && GET_CODE (const_op1) == CONST_INT
3042       && (unsignedp || !flag_trapv))
3043     {
3044       int mult_cost = rtx_cost (gen_rtx_MULT (mode, op0, op1), SET);
3045
3046       if (choose_mult_variant (mode, INTVAL (const_op1), &algorithm, &variant,
3047                                mult_cost))
3048         return expand_mult_const (mode, op0, INTVAL (const_op1), target,
3049                                   &algorithm, variant);
3050     }
3051
3052   if (GET_CODE (op0) == CONST_DOUBLE)
3053     {
3054       rtx temp = op0;
3055       op0 = op1;
3056       op1 = temp;
3057     }
3058
3059   /* Expand x*2.0 as x+x.  */
3060   if (GET_CODE (op1) == CONST_DOUBLE
3061       && GET_MODE_CLASS (mode) == MODE_FLOAT)
3062     {
3063       REAL_VALUE_TYPE d;
3064       REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3065
3066       if (REAL_VALUES_EQUAL (d, dconst2))
3067         {
3068           op0 = force_reg (GET_MODE (op0), op0);
3069           return expand_binop (mode, add_optab, op0, op0,
3070                                target, unsignedp, OPTAB_LIB_WIDEN);
3071         }
3072     }
3073
3074   /* This used to use umul_optab if unsigned, but for non-widening multiply
3075      there is no difference between signed and unsigned.  */
3076   op0 = expand_binop (mode,
3077                       ! unsignedp
3078                       && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3079                       ? smulv_optab : smul_optab,
3080                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3081   gcc_assert (op0);
3082   return op0;
3083 }
3084 \f
3085 /* Return the smallest n such that 2**n >= X.  */
3086
3087 int
3088 ceil_log2 (unsigned HOST_WIDE_INT x)
3089 {
3090   return floor_log2 (x - 1) + 1;
3091 }
3092
3093 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3094    replace division by D, and put the least significant N bits of the result
3095    in *MULTIPLIER_PTR and return the most significant bit.
3096
3097    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3098    needed precision is in PRECISION (should be <= N).
3099
3100    PRECISION should be as small as possible so this function can choose
3101    multiplier more freely.
3102
3103    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3104    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3105
3106    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3107    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3108
3109 static
3110 unsigned HOST_WIDE_INT
3111 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3112                    unsigned HOST_WIDE_INT *multiplier_ptr,
3113                    int *post_shift_ptr, int *lgup_ptr)
3114 {
3115   HOST_WIDE_INT mhigh_hi, mlow_hi;
3116   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3117   int lgup, post_shift;
3118   int pow, pow2;
3119   unsigned HOST_WIDE_INT nl, dummy1;
3120   HOST_WIDE_INT nh, dummy2;
3121
3122   /* lgup = ceil(log2(divisor)); */
3123   lgup = ceil_log2 (d);
3124
3125   gcc_assert (lgup <= n);
3126
3127   pow = n + lgup;
3128   pow2 = n + lgup - precision;
3129
3130   /* We could handle this with some effort, but this case is much
3131      better handled directly with a scc insn, so rely on caller using
3132      that.  */
3133   gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3134
3135   /* mlow = 2^(N + lgup)/d */
3136  if (pow >= HOST_BITS_PER_WIDE_INT)
3137     {
3138       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3139       nl = 0;
3140     }
3141   else
3142     {
3143       nh = 0;
3144       nl = (unsigned HOST_WIDE_INT) 1 << pow;
3145     }
3146   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3147                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3148
3149   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3150   if (pow2 >= HOST_BITS_PER_WIDE_INT)
3151     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3152   else
3153     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3154   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3155                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3156
3157   gcc_assert (!mhigh_hi || nh - d < d);
3158   gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3159   /* Assert that mlow < mhigh.  */
3160   gcc_assert (mlow_hi < mhigh_hi
3161               || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3162
3163   /* If precision == N, then mlow, mhigh exceed 2^N
3164      (but they do not exceed 2^(N+1)).  */
3165
3166   /* Reduce to lowest terms.  */
3167   for (post_shift = lgup; post_shift > 0; post_shift--)
3168     {
3169       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3170       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3171       if (ml_lo >= mh_lo)
3172         break;
3173
3174       mlow_hi = 0;
3175       mlow_lo = ml_lo;
3176       mhigh_hi = 0;
3177       mhigh_lo = mh_lo;
3178     }
3179
3180   *post_shift_ptr = post_shift;
3181   *lgup_ptr = lgup;
3182   if (n < HOST_BITS_PER_WIDE_INT)
3183     {
3184       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3185       *multiplier_ptr = mhigh_lo & mask;
3186       return mhigh_lo >= mask;
3187     }
3188   else
3189     {
3190       *multiplier_ptr = mhigh_lo;
3191       return mhigh_hi;
3192     }
3193 }
3194
3195 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3196    congruent to 1 (mod 2**N).  */
3197
3198 static unsigned HOST_WIDE_INT
3199 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3200 {
3201   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3202
3203   /* The algorithm notes that the choice y = x satisfies
3204      x*y == 1 mod 2^3, since x is assumed odd.
3205      Each iteration doubles the number of bits of significance in y.  */
3206
3207   unsigned HOST_WIDE_INT mask;
3208   unsigned HOST_WIDE_INT y = x;
3209   int nbit = 3;
3210
3211   mask = (n == HOST_BITS_PER_WIDE_INT
3212           ? ~(unsigned HOST_WIDE_INT) 0
3213           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3214
3215   while (nbit < n)
3216     {
3217       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3218       nbit *= 2;
3219     }
3220   return y;
3221 }
3222
3223 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3224    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3225    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3226    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3227    become signed.
3228
3229    The result is put in TARGET if that is convenient.
3230
3231    MODE is the mode of operation.  */
3232
3233 rtx
3234 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3235                              rtx op1, rtx target, int unsignedp)
3236 {
3237   rtx tem;
3238   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3239
3240   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3241                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3242                       NULL_RTX, 0);
3243   tem = expand_and (mode, tem, op1, NULL_RTX);
3244   adj_operand
3245     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3246                      adj_operand);
3247
3248   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3249                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3250                       NULL_RTX, 0);
3251   tem = expand_and (mode, tem, op0, NULL_RTX);
3252   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3253                           target);
3254
3255   return target;
3256 }
3257
3258 /* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */
3259
3260 static rtx
3261 extract_high_half (enum machine_mode mode, rtx op)
3262 {
3263   enum machine_mode wider_mode;
3264
3265   if (mode == word_mode)
3266     return gen_highpart (mode, op);
3267
3268   wider_mode = GET_MODE_WIDER_MODE (mode);
3269   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3270                      build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode)), 0, 1);
3271   return convert_modes (mode, wider_mode, op, 0);
3272 }
3273
3274 /* Like expand_mult_highpart, but only consider using a multiplication
3275    optab.  OP1 is an rtx for the constant operand.  */
3276
3277 static rtx
3278 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3279                             rtx target, int unsignedp, int max_cost)
3280 {
3281   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3282   enum machine_mode wider_mode;
3283   optab moptab;
3284   rtx tem;
3285   int size;
3286
3287   wider_mode = GET_MODE_WIDER_MODE (mode);
3288   size = GET_MODE_BITSIZE (mode);
3289
3290   /* Firstly, try using a multiplication insn that only generates the needed
3291      high part of the product, and in the sign flavor of unsignedp.  */
3292   if (mul_highpart_cost[mode] < max_cost)
3293     {
3294       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3295       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3296                           unsignedp, OPTAB_DIRECT);
3297       if (tem)
3298         return tem;
3299     }
3300
3301   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3302      Need to adjust the result after the multiplication.  */
3303   if (size - 1 < BITS_PER_WORD
3304       && (mul_highpart_cost[mode] + 2 * shift_cost[mode][size-1]
3305           + 4 * add_cost[mode] < max_cost))
3306     {
3307       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3308       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3309                           unsignedp, OPTAB_DIRECT);
3310       if (tem)
3311         /* We used the wrong signedness.  Adjust the result.  */
3312         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3313                                             tem, unsignedp);
3314     }
3315
3316   /* Try widening multiplication.  */
3317   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3318   if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3319       && mul_widen_cost[wider_mode] < max_cost)
3320     {
3321       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3322                           unsignedp, OPTAB_WIDEN);
3323       if (tem)
3324         return extract_high_half (mode, tem);
3325     }
3326
3327   /* Try widening the mode and perform a non-widening multiplication.  */
3328   moptab = smul_optab;
3329   if (smul_optab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3330       && size - 1 < BITS_PER_WORD
3331       && mul_cost[wider_mode] + shift_cost[mode][size-1] < max_cost)
3332     {
3333       tem = expand_binop (wider_mode, moptab, op0, op1, 0,
3334                           unsignedp, OPTAB_WIDEN);
3335       if (tem)
3336         return extract_high_half (mode, tem);
3337     }
3338
3339   /* Try widening multiplication of opposite signedness, and adjust.  */
3340   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3341   if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3342       && size - 1 < BITS_PER_WORD
3343       && (mul_widen_cost[wider_mode] + 2 * shift_cost[mode][size-1]
3344           + 4 * add_cost[mode] < max_cost))
3345     {
3346       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3347                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3348       if (tem != 0)
3349         {
3350           tem = extract_high_half (mode, tem);
3351           /* We used the wrong signedness.  Adjust the result.  */
3352           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3353                                               target, unsignedp);
3354         }
3355     }
3356
3357   return 0;
3358 }
3359
3360 /* Emit code to multiply OP0 and CNST1, putting the high half of the result
3361    in TARGET if that is convenient, and return where the result is.  If the
3362    operation can not be performed, 0 is returned.
3363
3364    MODE is the mode of operation and result.
3365
3366    UNSIGNEDP nonzero means unsigned multiply.
3367
3368    MAX_COST is the total allowed cost for the expanded RTL.  */
3369
3370 rtx
3371 expand_mult_highpart (enum machine_mode mode, rtx op0,
3372                       unsigned HOST_WIDE_INT cnst1, rtx target,
3373                       int unsignedp, int max_cost)
3374 {
3375   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3376   int extra_cost;
3377   bool sign_adjust = false;
3378   enum mult_variant variant;
3379   struct algorithm alg;
3380   rtx op1, tem;
3381
3382   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3383   gcc_assert (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT);
3384
3385   op1 = gen_int_mode (cnst1, wider_mode);
3386   cnst1 &= GET_MODE_MASK (mode);
3387
3388   /* We can't optimize modes wider than BITS_PER_WORD.
3389      ??? We might be able to perform double-word arithmetic if
3390      mode == word_mode, however all the cost calculations in
3391      synth_mult etc. assume single-word operations.  */
3392   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3393     return expand_mult_highpart_optab (mode, op0, op1, target,
3394                                        unsignedp, max_cost);
3395
3396   extra_cost = shift_cost[mode][GET_MODE_BITSIZE (mode) - 1];
3397
3398   /* Check whether we try to multiply by a negative constant.  */
3399   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3400     {
3401       sign_adjust = true;
3402       extra_cost += add_cost[mode];
3403     }
3404
3405   /* See whether shift/add multiplication is cheap enough.  */
3406   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3407                            max_cost - extra_cost))
3408     {
3409       /* See whether the specialized multiplication optabs are
3410          cheaper than the shift/add version.  */
3411       tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3412                                         alg.cost.cost + extra_cost);
3413       if (tem)
3414         return tem;
3415
3416       tem = convert_to_mode (wider_mode, op0, unsignedp);
3417       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3418       tem = extract_high_half (mode, tem);
3419
3420       /* Adjust result for signedness.  */
3421       if (sign_adjust)
3422         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3423
3424       return tem;
3425     }
3426   return expand_mult_highpart_optab (mode, op0, op1, target,
3427                                      unsignedp, max_cost);
3428 }
3429
3430
3431 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3432
3433 static rtx
3434 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3435 {
3436   unsigned HOST_WIDE_INT masklow, maskhigh;
3437   rtx result, temp, shift, label;
3438   int logd;
3439
3440   logd = floor_log2 (d);
3441   result = gen_reg_rtx (mode);
3442
3443   /* Avoid conditional branches when they're expensive.  */
3444   if (BRANCH_COST >= 2
3445       && !optimize_size)
3446     {
3447       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3448                                       mode, 0, -1);
3449       if (signmask)
3450         {
3451           signmask = force_reg (mode, signmask);
3452           masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3453           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3454
3455           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3456              which instruction sequence to use.  If logical right shifts
3457              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3458              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3459
3460           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3461           if (lshr_optab->handlers[mode].insn_code == CODE_FOR_nothing
3462               || rtx_cost (temp, SET) > COSTS_N_INSNS (2))
3463             {
3464               temp = expand_binop (mode, xor_optab, op0, signmask,
3465                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3466               temp = expand_binop (mode, sub_optab, temp, signmask,
3467                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3468               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3469                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3470               temp = expand_binop (mode, xor_optab, temp, signmask,
3471                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3472               temp = expand_binop (mode, sub_optab, temp, signmask,
3473                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3474             }
3475           else
3476             {
3477               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3478                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3479               signmask = force_reg (mode, signmask);
3480
3481               temp = expand_binop (mode, add_optab, op0, signmask,
3482                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3483               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3484                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3485               temp = expand_binop (mode, sub_optab, temp, signmask,
3486                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3487             }
3488           return temp;
3489         }
3490     }
3491
3492   /* Mask contains the mode's signbit and the significant bits of the
3493      modulus.  By including the signbit in the operation, many targets
3494      can avoid an explicit compare operation in the following comparison
3495      against zero.  */
3496
3497   masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3498   if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3499     {
3500       masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3501       maskhigh = -1;
3502     }
3503   else
3504     maskhigh = (HOST_WIDE_INT) -1
3505                  << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3506
3507   temp = expand_binop (mode, and_optab, op0,
3508                        immed_double_const (masklow, maskhigh, mode),
3509                        result, 1, OPTAB_LIB_WIDEN);
3510   if (temp != result)
3511     emit_move_insn (result, temp);
3512
3513   label = gen_label_rtx ();
3514   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3515
3516   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3517                        0, OPTAB_LIB_WIDEN);
3518   masklow = (HOST_WIDE_INT) -1 << logd;
3519   maskhigh = -1;
3520   temp = expand_binop (mode, ior_optab, temp,
3521                        immed_double_const (masklow, maskhigh, mode),
3522                        result, 1, OPTAB_LIB_WIDEN);
3523   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3524                        0, OPTAB_LIB_WIDEN);
3525   if (temp != result)
3526     emit_move_insn (result, temp);
3527   emit_label (label);
3528   return result;
3529 }
3530
3531 /* Expand signed division of OP0 by a power of two D in mode MODE.
3532    This routine is only called for positive values of D.  */
3533
3534 static rtx
3535 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3536 {
3537   rtx temp, label;
3538   tree shift;
3539   int logd;
3540
3541   logd = floor_log2 (d);
3542   shift = build_int_cst (NULL_TREE, logd);
3543
3544   if (d == 2 && BRANCH_COST >= 1)
3545     {
3546       temp = gen_reg_rtx (mode);
3547       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3548       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3549                            0, OPTAB_LIB_WIDEN);
3550       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3551     }
3552
3553 #ifdef HAVE_conditional_move
3554   if (BRANCH_COST >= 2)
3555     {
3556       rtx temp2;
3557
3558       /* ??? emit_conditional_move forces a stack adjustment via
3559          compare_from_rtx so, if the sequence is discarded, it will
3560          be lost.  Do it now instead.  */
3561       do_pending_stack_adjust ();
3562
3563       start_sequence ();
3564       temp2 = copy_to_mode_reg (mode, op0);
3565       temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3566                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3567       temp = force_reg (mode, temp);
3568
3569       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3570       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3571                                      mode, temp, temp2, mode, 0);
3572       if (temp2)
3573         {
3574           rtx seq = get_insns ();
3575           end_sequence ();
3576           emit_insn (seq);
3577           return expand_shift (RSHIFT_EXPR, mode, temp2, shift, NULL_RTX, 0);
3578         }
3579       end_sequence ();
3580     }
3581 #endif
3582
3583   if (BRANCH_COST >= 2)
3584     {
3585       int ushift = GET_MODE_BITSIZE (mode) - logd;
3586
3587       temp = gen_reg_rtx (mode);
3588       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3589       if (shift_cost[mode][ushift] > COSTS_N_INSNS (1))
3590         temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3591                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3592       else
3593         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3594                              build_int_cst (NULL_TREE, ushift),
3595                              NULL_RTX, 1);
3596       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3597                            0, OPTAB_LIB_WIDEN);
3598       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3599     }
3600
3601   label = gen_label_rtx ();
3602   temp = copy_to_mode_reg (mode, op0);
3603   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3604   expand_inc (temp, GEN_INT (d - 1));
3605   emit_label (label);
3606   return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3607 }
3608 \f
3609 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3610    if that is convenient, and returning where the result is.
3611    You may request either the quotient or the remainder as the result;
3612    specify REM_FLAG nonzero to get the remainder.
3613
3614    CODE is the expression code for which kind of division this is;
3615    it controls how rounding is done.  MODE is the machine mode to use.
3616    UNSIGNEDP nonzero means do unsigned division.  */
3617
3618 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3619    and then correct it by or'ing in missing high bits
3620    if result of ANDI is nonzero.
3621    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3622    This could optimize to a bfexts instruction.
3623    But C doesn't use these operations, so their optimizations are
3624    left for later.  */
3625 /* ??? For modulo, we don't actually need the highpart of the first product,
3626    the low part will do nicely.  And for small divisors, the second multiply
3627    can also be a low-part only multiply or even be completely left out.
3628    E.g. to calculate the remainder of a division by 3 with a 32 bit
3629    multiply, multiply with 0x55555556 and extract the upper two bits;
3630    the result is exact for inputs up to 0x1fffffff.
3631    The input range can be reduced by using cross-sum rules.
3632    For odd divisors >= 3, the following table gives right shift counts
3633    so that if a number is shifted by an integer multiple of the given
3634    amount, the remainder stays the same:
3635    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3636    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3637    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3638    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3639    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3640
3641    Cross-sum rules for even numbers can be derived by leaving as many bits
3642    to the right alone as the divisor has zeros to the right.
3643    E.g. if x is an unsigned 32 bit number:
3644    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3645    */
3646
3647 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
3648
3649 rtx
3650 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3651                rtx op0, rtx op1, rtx target, int unsignedp)
3652 {
3653   enum machine_mode compute_mode;
3654   rtx tquotient;
3655   rtx quotient = 0, remainder = 0;
3656   rtx last;
3657   int size;
3658   rtx insn, set;
3659   optab optab1, optab2;
3660   int op1_is_constant, op1_is_pow2 = 0;
3661   int max_cost, extra_cost;
3662   static HOST_WIDE_INT last_div_const = 0;
3663   static HOST_WIDE_INT ext_op1;
3664
3665   op1_is_constant = GET_CODE (op1) == CONST_INT;
3666   if (op1_is_constant)
3667     {
3668       ext_op1 = INTVAL (op1);
3669       if (unsignedp)
3670         ext_op1 &= GET_MODE_MASK (mode);
3671       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3672                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3673     }
3674
3675   /*
3676      This is the structure of expand_divmod:
3677
3678      First comes code to fix up the operands so we can perform the operations
3679      correctly and efficiently.
3680
3681      Second comes a switch statement with code specific for each rounding mode.
3682      For some special operands this code emits all RTL for the desired
3683      operation, for other cases, it generates only a quotient and stores it in
3684      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3685      to indicate that it has not done anything.
3686
3687      Last comes code that finishes the operation.  If QUOTIENT is set and
3688      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3689      QUOTIENT is not set, it is computed using trunc rounding.
3690
3691      We try to generate special code for division and remainder when OP1 is a
3692      constant.  If |OP1| = 2**n we can use shifts and some other fast
3693      operations.  For other values of OP1, we compute a carefully selected
3694      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3695      by m.
3696
3697      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3698      half of the product.  Different strategies for generating the product are
3699      implemented in expand_mult_highpart.
3700
3701      If what we actually want is the remainder, we generate that by another
3702      by-constant multiplication and a subtraction.  */
3703
3704   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3705      code below will malfunction if we are, so check here and handle
3706      the special case if so.  */
3707   if (op1 == const1_rtx)
3708     return rem_flag ? const0_rtx : op0;
3709
3710     /* When dividing by -1, we could get an overflow.
3711      negv_optab can handle overflows.  */
3712   if (! unsignedp && op1 == constm1_rtx)
3713     {
3714       if (rem_flag)
3715         return const0_rtx;
3716       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3717                           ? negv_optab : neg_optab, op0, target, 0);
3718     }
3719
3720   if (target
3721       /* Don't use the function value register as a target
3722          since we have to read it as well as write it,
3723          and function-inlining gets confused by this.  */
3724       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3725           /* Don't clobber an operand while doing a multi-step calculation.  */
3726           || ((rem_flag || op1_is_constant)
3727               && (reg_mentioned_p (target, op0)
3728                   || (MEM_P (op0) && MEM_P (target))))
3729           || reg_mentioned_p (target, op1)
3730           || (MEM_P (op1) && MEM_P (target))))
3731     target = 0;
3732
3733   /* Get the mode in which to perform this computation.  Normally it will
3734      be MODE, but sometimes we can't do the desired operation in MODE.
3735      If so, pick a wider mode in which we can do the operation.  Convert
3736      to that mode at the start to avoid repeated conversions.
3737
3738      First see what operations we need.  These depend on the expression
3739      we are evaluating.  (We assume that divxx3 insns exist under the
3740      same conditions that modxx3 insns and that these insns don't normally
3741      fail.  If these assumptions are not correct, we may generate less
3742      efficient code in some cases.)
3743
3744      Then see if we find a mode in which we can open-code that operation
3745      (either a division, modulus, or shift).  Finally, check for the smallest
3746      mode for which we can do the operation with a library call.  */
3747
3748   /* We might want to refine this now that we have division-by-constant
3749      optimization.  Since expand_mult_highpart tries so many variants, it is
3750      not straightforward to generalize this.  Maybe we should make an array
3751      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3752
3753   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3754             ? (unsignedp ? lshr_optab : ashr_optab)
3755             : (unsignedp ? udiv_optab : sdiv_optab));
3756   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3757             ? optab1
3758             : (unsignedp ? udivmod_optab : sdivmod_optab));
3759
3760   for (compute_mode = mode; compute_mode != VOIDmode;
3761        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3762     if (optab1->handlers[compute_mode].insn_code != CODE_FOR_nothing
3763         || optab2->handlers[compute_mode].insn_code != CODE_FOR_nothing)
3764       break;
3765
3766   if (compute_mode == VOIDmode)
3767     for (compute_mode = mode; compute_mode != VOIDmode;
3768          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3769       if (optab1->handlers[compute_mode].libfunc
3770           || optab2->handlers[compute_mode].libfunc)
3771         break;
3772
3773   /* If we still couldn't find a mode, use MODE, but we'll probably abort
3774      in expand_binop.  */
3775   if (compute_mode == VOIDmode)
3776     compute_mode = mode;
3777
3778   if (target && GET_MODE (target) == compute_mode)
3779     tquotient = target;
3780   else
3781     tquotient = gen_reg_rtx (compute_mode);
3782
3783   size = GET_MODE_BITSIZE (compute_mode);
3784 #if 0
3785   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3786      (mode), and thereby get better code when OP1 is a constant.  Do that
3787      later.  It will require going over all usages of SIZE below.  */
3788   size = GET_MODE_BITSIZE (mode);
3789 #endif
3790
3791   /* Only deduct something for a REM if the last divide done was
3792      for a different constant.   Then set the constant of the last
3793      divide.  */
3794   max_cost = div_cost[compute_mode]
3795     - (rem_flag && ! (last_div_const != 0 && op1_is_constant
3796                       && INTVAL (op1) == last_div_const)
3797        ? mul_cost[compute_mode] + add_cost[compute_mode]
3798        : 0);
3799
3800   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3801
3802   /* Now convert to the best mode to use.  */
3803   if (compute_mode != mode)
3804     {
3805       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3806       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3807
3808       /* convert_modes may have placed op1 into a register, so we
3809          must recompute the following.  */
3810       op1_is_constant = GET_CODE (op1) == CONST_INT;
3811       op1_is_pow2 = (op1_is_constant
3812                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3813                           || (! unsignedp
3814                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3815     }
3816
3817   /* If one of the operands is a volatile MEM, copy it into a register.  */
3818
3819   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3820     op0 = force_reg (compute_mode, op0);
3821   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3822     op1 = force_reg (compute_mode, op1);
3823
3824   /* If we need the remainder or if OP1 is constant, we need to
3825      put OP0 in a register in case it has any queued subexpressions.  */
3826   if (rem_flag || op1_is_constant)
3827     op0 = force_reg (compute_mode, op0);
3828
3829   last = get_last_insn ();
3830
3831   /* Promote floor rounding to trunc rounding for unsigned operations.  */
3832   if (unsignedp)
3833     {
3834       if (code == FLOOR_DIV_EXPR)
3835         code = TRUNC_DIV_EXPR;
3836       if (code == FLOOR_MOD_EXPR)
3837         code = TRUNC_MOD_EXPR;
3838       if (code == EXACT_DIV_EXPR && op1_is_pow2)
3839         code = TRUNC_DIV_EXPR;
3840     }
3841
3842   if (op1 != const0_rtx)
3843     switch (code)
3844       {
3845       case TRUNC_MOD_EXPR:
3846       case TRUNC_DIV_EXPR:
3847         if (op1_is_constant)
3848           {
3849             if (unsignedp)
3850               {
3851                 unsigned HOST_WIDE_INT mh, ml;
3852                 int pre_shift, post_shift;
3853                 int dummy;
3854                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
3855                                             & GET_MODE_MASK (compute_mode));
3856
3857                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3858                   {
3859                     pre_shift = floor_log2 (d);
3860                     if (rem_flag)
3861                       {
3862                         remainder
3863                           = expand_binop (compute_mode, and_optab, op0,
3864                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3865                                           remainder, 1,
3866                                           OPTAB_LIB_WIDEN);
3867                         if (remainder)
3868                           return gen_lowpart (mode, remainder);
3869                       }
3870                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3871                                              build_int_cst (NULL_TREE,
3872                                                             pre_shift),
3873                                              tquotient, 1);
3874                   }
3875                 else if (size <= HOST_BITS_PER_WIDE_INT)
3876                   {
3877                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
3878                       {
3879                         /* Most significant bit of divisor is set; emit an scc
3880                            insn.  */
3881                         quotient = emit_store_flag (tquotient, GEU, op0, op1,
3882                                                     compute_mode, 1, 1);
3883                         if (quotient == 0)
3884                           goto fail1;
3885                       }
3886                     else
3887                       {
3888                         /* Find a suitable multiplier and right shift count
3889                            instead of multiplying with D.  */
3890
3891                         mh = choose_multiplier (d, size, size,
3892                                                 &ml, &post_shift, &dummy);
3893
3894                         /* If the suggested multiplier is more than SIZE bits,
3895                            we can do better for even divisors, using an
3896                            initial right shift.  */
3897                         if (mh != 0 && (d & 1) == 0)
3898                           {
3899                             pre_shift = floor_log2 (d & -d);
3900                             mh = choose_multiplier (d >> pre_shift, size,
3901                                                     size - pre_shift,
3902                                                     &ml, &post_shift, &dummy);
3903                             gcc_assert (!mh);
3904                           }
3905                         else
3906                           pre_shift = 0;
3907
3908                         if (mh != 0)
3909                           {
3910                             rtx t1, t2, t3, t4;
3911
3912                             if (post_shift - 1 >= BITS_PER_WORD)
3913                               goto fail1;
3914
3915                             extra_cost
3916                               = (shift_cost[compute_mode][post_shift - 1]
3917                                  + shift_cost[compute_mode][1]
3918                                  + 2 * add_cost[compute_mode]);
3919                             t1 = expand_mult_highpart (compute_mode, op0, ml,
3920                                                        NULL_RTX, 1,
3921                                                        max_cost - extra_cost);
3922                             if (t1 == 0)
3923                               goto fail1;
3924                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
3925                                                                op0, t1),
3926                                                 NULL_RTX);
3927                             t3 = expand_shift
3928                               (RSHIFT_EXPR, compute_mode, t2,
3929                                build_int_cst (NULL_TREE, 1),
3930                                NULL_RTX,1);
3931                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
3932                                                               t1, t3),
3933                                                 NULL_RTX);
3934                             quotient = expand_shift
3935                               (RSHIFT_EXPR, compute_mode, t4,
3936                                build_int_cst (NULL_TREE, post_shift - 1),
3937                                tquotient, 1);
3938                           }
3939                         else
3940                           {
3941                             rtx t1, t2;
3942
3943                             if (pre_shift >= BITS_PER_WORD
3944                                 || post_shift >= BITS_PER_WORD)
3945                               goto fail1;
3946
3947                             t1 = expand_shift
3948                               (RSHIFT_EXPR, compute_mode, op0,
3949                                build_int_cst (NULL_TREE, pre_shift),
3950                                NULL_RTX, 1);
3951                             extra_cost
3952                               = (shift_cost[compute_mode][pre_shift]
3953                                  + shift_cost[compute_mode][post_shift]);
3954                             t2 = expand_mult_highpart (compute_mode, t1, ml,
3955                                                        NULL_RTX, 1,
3956                                                        max_cost - extra_cost);
3957                             if (t2 == 0)
3958                               goto fail1;
3959                             quotient = expand_shift
3960                               (RSHIFT_EXPR, compute_mode, t2,
3961                                build_int_cst (NULL_TREE, post_shift),
3962                                tquotient, 1);
3963                           }
3964                       }
3965                   }
3966                 else            /* Too wide mode to use tricky code */
3967                   break;
3968
3969                 insn = get_last_insn ();
3970                 if (insn != last
3971                     && (set = single_set (insn)) != 0
3972                     && SET_DEST (set) == quotient)
3973                   set_unique_reg_note (insn,
3974                                        REG_EQUAL,
3975                                        gen_rtx_UDIV (compute_mode, op0, op1));
3976               }
3977             else                /* TRUNC_DIV, signed */
3978               {
3979                 unsigned HOST_WIDE_INT ml;
3980                 int lgup, post_shift;
3981                 HOST_WIDE_INT d = INTVAL (op1);
3982                 unsigned HOST_WIDE_INT abs_d = d >= 0 ? d : -d;
3983
3984                 /* n rem d = n rem -d */
3985                 if (rem_flag && d < 0)
3986                   {
3987                     d = abs_d;
3988                     op1 = gen_int_mode (abs_d, compute_mode);
3989                   }
3990
3991                 if (d == 1)
3992                   quotient = op0;
3993                 else if (d == -1)
3994                   quotient = expand_unop (compute_mode, neg_optab, op0,
3995                                           tquotient, 0);
3996                 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
3997                   {
3998                     /* This case is not handled correctly below.  */
3999                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4000                                                 compute_mode, 1, 1);
4001                     if (quotient == 0)
4002                       goto fail1;
4003                   }
4004                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4005                          && (rem_flag ? smod_pow2_cheap[compute_mode]
4006                                       : sdiv_pow2_cheap[compute_mode])
4007                          /* We assume that cheap metric is true if the
4008                             optab has an expander for this mode.  */
4009                          && (((rem_flag ? smod_optab : sdiv_optab)
4010                               ->handlers[compute_mode].insn_code
4011                               != CODE_FOR_nothing)
4012                              || (sdivmod_optab->handlers[compute_mode]
4013                                  .insn_code != CODE_FOR_nothing)))
4014                   ;
4015                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4016                   {
4017                     if (rem_flag)
4018                       {
4019                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4020                         if (remainder)
4021                           return gen_lowpart (mode, remainder);
4022                       }
4023
4024                     if (sdiv_pow2_cheap[compute_mode]
4025                         && ((sdiv_optab->handlers[compute_mode].insn_code
4026                              != CODE_FOR_nothing)
4027                             || (sdivmod_optab->handlers[compute_mode].insn_code
4028                                 != CODE_FOR_nothing)))
4029                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4030                                                 compute_mode, op0,
4031                                                 gen_int_mode (abs_d,
4032                                                               compute_mode),
4033                                                 NULL_RTX, 0);
4034                     else
4035                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4036
4037                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4038                        negate the quotient.  */
4039                     if (d < 0)
4040                       {
4041                         insn = get_last_insn ();
4042                         if (insn != last
4043                             && (set = single_set (insn)) != 0
4044                             && SET_DEST (set) == quotient
4045                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4046                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4047                           set_unique_reg_note (insn,
4048                                                REG_EQUAL,
4049                                                gen_rtx_DIV (compute_mode,
4050                                                             op0,
4051                                                             GEN_INT
4052                                                             (trunc_int_for_mode
4053                                                              (abs_d,
4054                                                               compute_mode))));
4055
4056                         quotient = expand_unop (compute_mode, neg_optab,
4057                                                 quotient, quotient, 0);
4058                       }
4059                   }
4060                 else if (size <= HOST_BITS_PER_WIDE_INT)
4061                   {
4062                     choose_multiplier (abs_d, size, size - 1,
4063                                        &ml, &post_shift, &lgup);
4064                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4065                       {
4066                         rtx t1, t2, t3;
4067
4068                         if (post_shift >= BITS_PER_WORD
4069                             || size - 1 >= BITS_PER_WORD)
4070                           goto fail1;
4071
4072                         extra_cost = (shift_cost[compute_mode][post_shift]
4073                                       + shift_cost[compute_mode][size - 1]
4074                                       + add_cost[compute_mode]);
4075                         t1 = expand_mult_highpart (compute_mode, op0, ml,
4076                                                    NULL_RTX, 0,
4077                                                    max_cost - extra_cost);
4078                         if (t1 == 0)
4079                           goto fail1;
4080                         t2 = expand_shift
4081                           (RSHIFT_EXPR, compute_mode, t1,
4082                            build_int_cst (NULL_TREE, post_shift),
4083                            NULL_RTX, 0);
4084                         t3 = expand_shift
4085                           (RSHIFT_EXPR, compute_mode, op0,
4086                            build_int_cst (NULL_TREE, size - 1),
4087                            NULL_RTX, 0);
4088                         if (d < 0)
4089                           quotient
4090                             = force_operand (gen_rtx_MINUS (compute_mode,
4091                                                             t3, t2),
4092                                              tquotient);
4093                         else
4094                           quotient
4095                             = force_operand (gen_rtx_MINUS (compute_mode,
4096                                                             t2, t3),
4097                                              tquotient);
4098                       }
4099                     else
4100                       {
4101                         rtx t1, t2, t3, t4;
4102
4103                         if (post_shift >= BITS_PER_WORD
4104                             || size - 1 >= BITS_PER_WORD)
4105                           goto fail1;
4106
4107                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4108                         extra_cost = (shift_cost[compute_mode][post_shift]
4109                                       + shift_cost[compute_mode][size - 1]
4110                                       + 2 * add_cost[compute_mode]);
4111                         t1 = expand_mult_highpart (compute_mode, op0, ml,
4112                                                    NULL_RTX, 0,
4113                                                    max_cost - extra_cost);
4114                         if (t1 == 0)
4115                           goto fail1;
4116                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4117                                                           t1, op0),
4118                                             NULL_RTX);
4119                         t3 = expand_shift
4120                           (RSHIFT_EXPR, compute_mode, t2,
4121                            build_int_cst (NULL_TREE, post_shift),
4122                            NULL_RTX, 0);
4123                         t4 = expand_shift
4124                           (RSHIFT_EXPR, compute_mode, op0,
4125                            build_int_cst (NULL_TREE, size - 1),
4126                            NULL_RTX, 0);
4127                         if (d < 0)
4128                           quotient
4129                             = force_operand (gen_rtx_MINUS (compute_mode,
4130                                                             t4, t3),
4131                                              tquotient);
4132                         else
4133                           quotient
4134                             = force_operand (gen_rtx_MINUS (compute_mode,
4135                                                             t3, t4),
4136                                              tquotient);
4137                       }
4138                   }
4139                 else            /* Too wide mode to use tricky code */
4140                   break;
4141
4142                 insn = get_last_insn ();
4143                 if (insn != last
4144                     && (set = single_set (insn)) != 0
4145                     && SET_DEST (set) == quotient)
4146                   set_unique_reg_note (insn,
4147                                        REG_EQUAL,
4148                                        gen_rtx_DIV (compute_mode, op0, op1));
4149               }
4150             break;
4151           }
4152       fail1:
4153         delete_insns_since (last);
4154         break;
4155
4156       case FLOOR_DIV_EXPR:
4157       case FLOOR_MOD_EXPR:
4158       /* We will come here only for signed operations.  */
4159         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4160           {
4161             unsigned HOST_WIDE_INT mh, ml;
4162             int pre_shift, lgup, post_shift;
4163             HOST_WIDE_INT d = INTVAL (op1);
4164
4165             if (d > 0)
4166               {
4167                 /* We could just as easily deal with negative constants here,
4168                    but it does not seem worth the trouble for GCC 2.6.  */
4169                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4170                   {
4171                     pre_shift = floor_log2 (d);
4172                     if (rem_flag)
4173                       {
4174                         remainder = expand_binop (compute_mode, and_optab, op0,
4175                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4176                                                   remainder, 0, OPTAB_LIB_WIDEN);
4177                         if (remainder)
4178                           return gen_lowpart (mode, remainder);
4179                       }
4180                     quotient = expand_shift
4181                       (RSHIFT_EXPR, compute_mode, op0,
4182                        build_int_cst (NULL_TREE, pre_shift),
4183                        tquotient, 0);
4184                   }
4185                 else
4186                   {
4187                     rtx t1, t2, t3, t4;
4188
4189                     mh = choose_multiplier (d, size, size - 1,
4190                                             &ml, &post_shift, &lgup);
4191                     gcc_assert (!mh);
4192
4193                     if (post_shift < BITS_PER_WORD
4194                         && size - 1 < BITS_PER_WORD)
4195                       {
4196                         t1 = expand_shift
4197                           (RSHIFT_EXPR, compute_mode, op0,
4198                            build_int_cst (NULL_TREE, size - 1),
4199                            NULL_RTX, 0);
4200                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4201                                            NULL_RTX, 0, OPTAB_WIDEN);
4202                         extra_cost = (shift_cost[compute_mode][post_shift]
4203                                       + shift_cost[compute_mode][size - 1]
4204                                       + 2 * add_cost[compute_mode]);
4205                         t3 = expand_mult_highpart (compute_mode, t2, ml,
4206                                                    NULL_RTX, 1,
4207                                                    max_cost - extra_cost);
4208                         if (t3 != 0)
4209                           {
4210                             t4 = expand_shift
4211                               (RSHIFT_EXPR, compute_mode, t3,
4212                                build_int_cst (NULL_TREE, post_shift),
4213                                NULL_RTX, 1);
4214                             quotient = expand_binop (compute_mode, xor_optab,
4215                                                      t4, t1, tquotient, 0,
4216                                                      OPTAB_WIDEN);
4217                           }
4218                       }
4219                   }
4220               }
4221             else
4222               {
4223                 rtx nsign, t1, t2, t3, t4;
4224                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4225                                                   op0, constm1_rtx), NULL_RTX);
4226                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4227                                    0, OPTAB_WIDEN);
4228                 nsign = expand_shift
4229                   (RSHIFT_EXPR, compute_mode, t2,
4230                    build_int_cst (NULL_TREE, size - 1),
4231                    NULL_RTX, 0);
4232                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4233                                     NULL_RTX);
4234                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4235                                     NULL_RTX, 0);
4236                 if (t4)
4237                   {
4238                     rtx t5;
4239                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4240                                       NULL_RTX, 0);
4241                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4242                                                             t4, t5),
4243                                               tquotient);
4244                   }
4245               }
4246           }
4247
4248         if (quotient != 0)
4249           break;
4250         delete_insns_since (last);
4251
4252         /* Try using an instruction that produces both the quotient and
4253            remainder, using truncation.  We can easily compensate the quotient
4254            or remainder to get floor rounding, once we have the remainder.
4255            Notice that we compute also the final remainder value here,
4256            and return the result right away.  */
4257         if (target == 0 || GET_MODE (target) != compute_mode)
4258           target = gen_reg_rtx (compute_mode);
4259
4260         if (rem_flag)
4261           {
4262             remainder
4263               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4264             quotient = gen_reg_rtx (compute_mode);
4265           }
4266         else
4267           {
4268             quotient
4269               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4270             remainder = gen_reg_rtx (compute_mode);
4271           }
4272
4273         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4274                                  quotient, remainder, 0))
4275           {
4276             /* This could be computed with a branch-less sequence.
4277                Save that for later.  */
4278             rtx tem;
4279             rtx label = gen_label_rtx ();
4280             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4281             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4282                                 NULL_RTX, 0, OPTAB_WIDEN);
4283             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4284             expand_dec (quotient, const1_rtx);
4285             expand_inc (remainder, op1);
4286             emit_label (label);
4287             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4288           }
4289
4290         /* No luck with division elimination or divmod.  Have to do it
4291            by conditionally adjusting op0 *and* the result.  */
4292         {
4293           rtx label1, label2, label3, label4, label5;
4294           rtx adjusted_op0;
4295           rtx tem;
4296
4297           quotient = gen_reg_rtx (compute_mode);
4298           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4299           label1 = gen_label_rtx ();
4300           label2 = gen_label_rtx ();
4301           label3 = gen_label_rtx ();
4302           label4 = gen_label_rtx ();
4303           label5 = gen_label_rtx ();
4304           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4305           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4306           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4307                               quotient, 0, OPTAB_LIB_WIDEN);
4308           if (tem != quotient)
4309             emit_move_insn (quotient, tem);
4310           emit_jump_insn (gen_jump (label5));
4311           emit_barrier ();
4312           emit_label (label1);
4313           expand_inc (adjusted_op0, const1_rtx);
4314           emit_jump_insn (gen_jump (label4));
4315           emit_barrier ();
4316           emit_label (label2);
4317           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4318           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4319                               quotient, 0, OPTAB_LIB_WIDEN);
4320           if (tem != quotient)
4321             emit_move_insn (quotient, tem);
4322           emit_jump_insn (gen_jump (label5));
4323           emit_barrier ();
4324           emit_label (label3);
4325           expand_dec (adjusted_op0, const1_rtx);
4326           emit_label (label4);
4327           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4328                               quotient, 0, OPTAB_LIB_WIDEN);
4329           if (tem != quotient)
4330             emit_move_insn (quotient, tem);
4331           expand_dec (quotient, const1_rtx);
4332           emit_label (label5);
4333         }
4334         break;
4335
4336       case CEIL_DIV_EXPR:
4337       case CEIL_MOD_EXPR:
4338         if (unsignedp)
4339           {
4340             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4341               {
4342                 rtx t1, t2, t3;
4343                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4344                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4345                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4346                                    tquotient, 1);
4347                 t2 = expand_binop (compute_mode, and_optab, op0,
4348                                    GEN_INT (d - 1),
4349                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4350                 t3 = gen_reg_rtx (compute_mode);
4351                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4352                                       compute_mode, 1, 1);
4353                 if (t3 == 0)
4354                   {
4355                     rtx lab;
4356                     lab = gen_label_rtx ();
4357                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4358                     expand_inc (t1, const1_rtx);
4359                     emit_label (lab);
4360                     quotient = t1;
4361                   }
4362                 else
4363                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4364                                                           t1, t3),
4365                                             tquotient);
4366                 break;
4367               }
4368
4369             /* Try using an instruction that produces both the quotient and
4370                remainder, using truncation.  We can easily compensate the
4371                quotient or remainder to get ceiling rounding, once we have the
4372                remainder.  Notice that we compute also the final remainder
4373                value here, and return the result right away.  */
4374             if (target == 0 || GET_MODE (target) != compute_mode)
4375               target = gen_reg_rtx (compute_mode);
4376
4377             if (rem_flag)
4378               {
4379                 remainder = (REG_P (target)
4380                              ? target : gen_reg_rtx (compute_mode));
4381                 quotient = gen_reg_rtx (compute_mode);
4382               }
4383             else
4384               {
4385                 quotient = (REG_P (target)
4386                             ? target : gen_reg_rtx (compute_mode));
4387                 remainder = gen_reg_rtx (compute_mode);
4388               }
4389
4390             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4391                                      remainder, 1))
4392               {
4393                 /* This could be computed with a branch-less sequence.
4394                    Save that for later.  */
4395                 rtx label = gen_label_rtx ();
4396                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4397                                  compute_mode, label);
4398                 expand_inc (quotient, const1_rtx);
4399                 expand_dec (remainder, op1);
4400                 emit_label (label);
4401                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4402               }
4403
4404             /* No luck with division elimination or divmod.  Have to do it
4405                by conditionally adjusting op0 *and* the result.  */
4406             {
4407               rtx label1, label2;
4408               rtx adjusted_op0, tem;
4409
4410               quotient = gen_reg_rtx (compute_mode);
4411               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4412               label1 = gen_label_rtx ();
4413               label2 = gen_label_rtx ();
4414               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4415                                compute_mode, label1);
4416               emit_move_insn  (quotient, const0_rtx);
4417               emit_jump_insn (gen_jump (label2));
4418               emit_barrier ();
4419               emit_label (label1);
4420               expand_dec (adjusted_op0, const1_rtx);
4421               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4422                                   quotient, 1, OPTAB_LIB_WIDEN);
4423               if (tem != quotient)
4424                 emit_move_insn (quotient, tem);
4425               expand_inc (quotient, const1_rtx);
4426               emit_label (label2);
4427             }
4428           }
4429         else /* signed */
4430           {
4431             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4432                 && INTVAL (op1) >= 0)
4433               {
4434                 /* This is extremely similar to the code for the unsigned case
4435                    above.  For 2.7 we should merge these variants, but for
4436                    2.6.1 I don't want to touch the code for unsigned since that
4437                    get used in C.  The signed case will only be used by other
4438                    languages (Ada).  */
4439
4440                 rtx t1, t2, t3;
4441                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4442                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4443                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4444                                    tquotient, 0);
4445                 t2 = expand_binop (compute_mode, and_optab, op0,
4446                                    GEN_INT (d - 1),
4447                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4448                 t3 = gen_reg_rtx (compute_mode);
4449                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4450                                       compute_mode, 1, 1);
4451                 if (t3 == 0)
4452                   {
4453                     rtx lab;
4454                     lab = gen_label_rtx ();
4455                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4456                     expand_inc (t1, const1_rtx);
4457                     emit_label (lab);
4458                     quotient = t1;
4459                   }
4460                 else
4461                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4462                                                           t1, t3),
4463                                             tquotient);
4464                 break;
4465               }
4466
4467             /* Try using an instruction that produces both the quotient and
4468                remainder, using truncation.  We can easily compensate the
4469                quotient or remainder to get ceiling rounding, once we have the
4470                remainder.  Notice that we compute also the final remainder
4471                value here, and return the result right away.  */
4472             if (target == 0 || GET_MODE (target) != compute_mode)
4473               target = gen_reg_rtx (compute_mode);
4474             if (rem_flag)
4475               {
4476                 remainder= (REG_P (target)
4477                             ? target : gen_reg_rtx (compute_mode));
4478                 quotient = gen_reg_rtx (compute_mode);
4479               }
4480             else
4481               {
4482                 quotient = (REG_P (target)
4483                             ? target : gen_reg_rtx (compute_mode));
4484                 remainder = gen_reg_rtx (compute_mode);
4485               }
4486
4487             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4488                                      remainder, 0))
4489               {
4490                 /* This could be computed with a branch-less sequence.
4491                    Save that for later.  */
4492                 rtx tem;
4493                 rtx label = gen_label_rtx ();
4494                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4495                                  compute_mode, label);
4496                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4497                                     NULL_RTX, 0, OPTAB_WIDEN);
4498                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4499                 expand_inc (quotient, const1_rtx);
4500                 expand_dec (remainder, op1);
4501                 emit_label (label);
4502                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4503               }
4504
4505             /* No luck with division elimination or divmod.  Have to do it
4506                by conditionally adjusting op0 *and* the result.  */
4507             {
4508               rtx label1, label2, label3, label4, label5;
4509               rtx adjusted_op0;
4510               rtx tem;
4511
4512               quotient = gen_reg_rtx (compute_mode);
4513               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4514               label1 = gen_label_rtx ();
4515               label2 = gen_label_rtx ();
4516               label3 = gen_label_rtx ();
4517               label4 = gen_label_rtx ();
4518               label5 = gen_label_rtx ();
4519               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4520               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4521                                compute_mode, label1);
4522               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4523                                   quotient, 0, OPTAB_LIB_WIDEN);
4524               if (tem != quotient)
4525                 emit_move_insn (quotient, tem);
4526               emit_jump_insn (gen_jump (label5));
4527               emit_barrier ();
4528               emit_label (label1);
4529               expand_dec (adjusted_op0, const1_rtx);
4530               emit_jump_insn (gen_jump (label4));
4531               emit_barrier ();
4532               emit_label (label2);
4533               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4534                                compute_mode, label3);
4535               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4536                                   quotient, 0, OPTAB_LIB_WIDEN);
4537               if (tem != quotient)
4538                 emit_move_insn (quotient, tem);
4539               emit_jump_insn (gen_jump (label5));
4540               emit_barrier ();
4541               emit_label (label3);
4542               expand_inc (adjusted_op0, const1_rtx);
4543               emit_label (label4);
4544               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4545                                   quotient, 0, OPTAB_LIB_WIDEN);
4546               if (tem != quotient)
4547                 emit_move_insn (quotient, tem);
4548               expand_inc (quotient, const1_rtx);
4549               emit_label (label5);
4550             }
4551           }
4552         break;
4553
4554       case EXACT_DIV_EXPR:
4555         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4556           {
4557             HOST_WIDE_INT d = INTVAL (op1);
4558             unsigned HOST_WIDE_INT ml;
4559             int pre_shift;
4560             rtx t1;
4561
4562             pre_shift = floor_log2 (d & -d);
4563             ml = invert_mod2n (d >> pre_shift, size);
4564             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4565                                build_int_cst (NULL_TREE, pre_shift),
4566                                NULL_RTX, unsignedp);
4567             quotient = expand_mult (compute_mode, t1,
4568                                     gen_int_mode (ml, compute_mode),
4569                                     NULL_RTX, 1);
4570
4571             insn = get_last_insn ();
4572             set_unique_reg_note (insn,
4573                                  REG_EQUAL,
4574                                  gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4575                                                  compute_mode,
4576                                                  op0, op1));
4577           }
4578         break;
4579
4580       case ROUND_DIV_EXPR:
4581       case ROUND_MOD_EXPR:
4582         if (unsignedp)
4583           {
4584             rtx tem;
4585             rtx label;
4586             label = gen_label_rtx ();
4587             quotient = gen_reg_rtx (compute_mode);
4588             remainder = gen_reg_rtx (compute_mode);
4589             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4590               {
4591                 rtx tem;
4592                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4593                                          quotient, 1, OPTAB_LIB_WIDEN);
4594                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4595                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4596                                           remainder, 1, OPTAB_LIB_WIDEN);
4597               }
4598             tem = plus_constant (op1, -1);
4599             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4600                                 build_int_cst (NULL_TREE, 1),
4601                                 NULL_RTX, 1);
4602             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4603             expand_inc (quotient, const1_rtx);
4604             expand_dec (remainder, op1);
4605             emit_label (label);
4606           }
4607         else
4608           {
4609             rtx abs_rem, abs_op1, tem, mask;
4610             rtx label;
4611             label = gen_label_rtx ();
4612             quotient = gen_reg_rtx (compute_mode);
4613             remainder = gen_reg_rtx (compute_mode);
4614             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4615               {
4616                 rtx tem;
4617                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4618                                          quotient, 0, OPTAB_LIB_WIDEN);
4619                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4620                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4621                                           remainder, 0, OPTAB_LIB_WIDEN);
4622               }
4623             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4624             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4625             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4626                                 build_int_cst (NULL_TREE, 1),
4627                                 NULL_RTX, 1);
4628             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4629             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4630                                 NULL_RTX, 0, OPTAB_WIDEN);
4631             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4632                                  build_int_cst (NULL_TREE, size - 1),
4633                                  NULL_RTX, 0);
4634             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4635                                 NULL_RTX, 0, OPTAB_WIDEN);
4636             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4637                                 NULL_RTX, 0, OPTAB_WIDEN);
4638             expand_inc (quotient, tem);
4639             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4640                                 NULL_RTX, 0, OPTAB_WIDEN);
4641             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4642                                 NULL_RTX, 0, OPTAB_WIDEN);
4643             expand_dec (remainder, tem);
4644             emit_label (label);
4645           }
4646         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4647
4648       default:
4649         gcc_unreachable ();
4650       }
4651
4652   if (quotient == 0)
4653     {
4654       if (target && GET_MODE (target) != compute_mode)
4655         target = 0;
4656
4657       if (rem_flag)
4658         {
4659           /* Try to produce the remainder without producing the quotient.
4660              If we seem to have a divmod pattern that does not require widening,
4661              don't try widening here.  We should really have a WIDEN argument
4662              to expand_twoval_binop, since what we'd really like to do here is
4663              1) try a mod insn in compute_mode
4664              2) try a divmod insn in compute_mode
4665              3) try a div insn in compute_mode and multiply-subtract to get
4666                 remainder
4667              4) try the same things with widening allowed.  */
4668           remainder
4669             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4670                                  op0, op1, target,
4671                                  unsignedp,
4672                                  ((optab2->handlers[compute_mode].insn_code
4673                                    != CODE_FOR_nothing)
4674                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4675           if (remainder == 0)
4676             {
4677               /* No luck there.  Can we do remainder and divide at once
4678                  without a library call?  */
4679               remainder = gen_reg_rtx (compute_mode);
4680               if (! expand_twoval_binop ((unsignedp
4681                                           ? udivmod_optab
4682                                           : sdivmod_optab),
4683                                          op0, op1,
4684                                          NULL_RTX, remainder, unsignedp))
4685                 remainder = 0;
4686             }
4687
4688           if (remainder)
4689             return gen_lowpart (mode, remainder);
4690         }
4691
4692       /* Produce the quotient.  Try a quotient insn, but not a library call.
4693          If we have a divmod in this mode, use it in preference to widening
4694          the div (for this test we assume it will not fail). Note that optab2
4695          is set to the one of the two optabs that the call below will use.  */
4696       quotient
4697         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4698                              op0, op1, rem_flag ? NULL_RTX : target,
4699                              unsignedp,
4700                              ((optab2->handlers[compute_mode].insn_code
4701                                != CODE_FOR_nothing)
4702                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4703
4704       if (quotient == 0)
4705         {
4706           /* No luck there.  Try a quotient-and-remainder insn,
4707              keeping the quotient alone.  */
4708           quotient = gen_reg_rtx (compute_mode);
4709           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4710                                      op0, op1,
4711                                      quotient, NULL_RTX, unsignedp))
4712             {
4713               quotient = 0;
4714               if (! rem_flag)
4715                 /* Still no luck.  If we are not computing the remainder,
4716                    use a library call for the quotient.  */
4717                 quotient = sign_expand_binop (compute_mode,
4718                                               udiv_optab, sdiv_optab,
4719                                               op0, op1, target,
4720                                               unsignedp, OPTAB_LIB_WIDEN);
4721             }
4722         }
4723     }
4724
4725   if (rem_flag)
4726     {
4727       if (target && GET_MODE (target) != compute_mode)
4728         target = 0;
4729
4730       if (quotient == 0)
4731         {
4732           /* No divide instruction either.  Use library for remainder.  */
4733           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4734                                          op0, op1, target,
4735                                          unsignedp, OPTAB_LIB_WIDEN);
4736           /* No remainder function.  Try a quotient-and-remainder
4737              function, keeping the remainder.  */
4738           if (!remainder)
4739             {
4740               remainder = gen_reg_rtx (compute_mode);
4741               if (!expand_twoval_binop_libfunc
4742                   (unsignedp ? udivmod_optab : sdivmod_optab,
4743                    op0, op1,
4744                    NULL_RTX, remainder,
4745                    unsignedp ? UMOD : MOD))
4746                 remainder = NULL_RTX;
4747             }
4748         }
4749       else
4750         {
4751           /* We divided.  Now finish doing X - Y * (X / Y).  */
4752           remainder = expand_mult (compute_mode, quotient, op1,
4753                                    NULL_RTX, unsignedp);
4754           remainder = expand_binop (compute_mode, sub_optab, op0,
4755                                     remainder, target, unsignedp,
4756                                     OPTAB_LIB_WIDEN);
4757         }
4758     }
4759
4760   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4761 }
4762 \f
4763 /* Return a tree node with data type TYPE, describing the value of X.
4764    Usually this is an VAR_DECL, if there is no obvious better choice.
4765    X may be an expression, however we only support those expressions
4766    generated by loop.c.  */
4767
4768 tree
4769 make_tree (tree type, rtx x)
4770 {
4771   tree t;
4772
4773   switch (GET_CODE (x))
4774     {
4775     case CONST_INT:
4776       {
4777         HOST_WIDE_INT hi = 0;
4778
4779         if (INTVAL (x) < 0
4780             && !(TYPE_UNSIGNED (type)
4781                  && (GET_MODE_BITSIZE (TYPE_MODE (type))
4782                      < HOST_BITS_PER_WIDE_INT)))
4783           hi = -1;
4784
4785         t = build_int_cst_wide (type, INTVAL (x), hi);
4786
4787         return t;
4788       }
4789
4790     case CONST_DOUBLE:
4791       if (GET_MODE (x) == VOIDmode)
4792         t = build_int_cst_wide (type,
4793                                 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4794       else
4795         {
4796           REAL_VALUE_TYPE d;
4797
4798           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4799           t = build_real (type, d);
4800         }
4801
4802       return t;
4803
4804     case CONST_VECTOR:
4805       {
4806         int i, units;
4807         rtx elt;
4808         tree t = NULL_TREE;
4809
4810         units = CONST_VECTOR_NUNITS (x);
4811
4812         /* Build a tree with vector elements.  */
4813         for (i = units - 1; i >= 0; --i)
4814           {
4815             elt = CONST_VECTOR_ELT (x, i);
4816             t = tree_cons (NULL_TREE, make_tree (type, elt), t);
4817           }
4818
4819         return build_vector (type, t);
4820       }
4821
4822     case PLUS:
4823       return fold (build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4824                            make_tree (type, XEXP (x, 1))));
4825
4826     case MINUS:
4827       return fold (build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4828                            make_tree (type, XEXP (x, 1))));
4829
4830     case NEG:
4831       return fold (build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0))));
4832
4833     case MULT:
4834       return fold (build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4835                            make_tree (type, XEXP (x, 1))));
4836
4837     case ASHIFT:
4838       return fold (build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4839                            make_tree (type, XEXP (x, 1))));
4840
4841     case LSHIFTRT:
4842       t = lang_hooks.types.unsigned_type (type);
4843       return fold (convert (type,
4844                             build2 (RSHIFT_EXPR, t,
4845                                     make_tree (t, XEXP (x, 0)),
4846                                     make_tree (type, XEXP (x, 1)))));
4847
4848     case ASHIFTRT:
4849       t = lang_hooks.types.signed_type (type);
4850       return fold (convert (type,
4851                             build2 (RSHIFT_EXPR, t,
4852                                     make_tree (t, XEXP (x, 0)),
4853                                     make_tree (type, XEXP (x, 1)))));
4854
4855     case DIV:
4856       if (TREE_CODE (type) != REAL_TYPE)
4857         t = lang_hooks.types.signed_type (type);
4858       else
4859         t = type;
4860
4861       return fold (convert (type,
4862                             build2 (TRUNC_DIV_EXPR, t,
4863                                     make_tree (t, XEXP (x, 0)),
4864                                     make_tree (t, XEXP (x, 1)))));
4865     case UDIV:
4866       t = lang_hooks.types.unsigned_type (type);
4867       return fold (convert (type,
4868                             build2 (TRUNC_DIV_EXPR, t,
4869                                     make_tree (t, XEXP (x, 0)),
4870                                     make_tree (t, XEXP (x, 1)))));
4871
4872     case SIGN_EXTEND:
4873     case ZERO_EXTEND:
4874       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
4875                                           GET_CODE (x) == ZERO_EXTEND);
4876       return fold (convert (type, make_tree (t, XEXP (x, 0))));
4877
4878     default:
4879       t = build_decl (VAR_DECL, NULL_TREE, type);
4880
4881       /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
4882          ptr_mode.  So convert.  */
4883       if (POINTER_TYPE_P (type))
4884         x = convert_memory_address (TYPE_MODE (type), x);
4885
4886       /* Note that we do *not* use SET_DECL_RTL here, because we do not
4887          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
4888       t->decl.rtl = x;
4889
4890       return t;
4891     }
4892 }
4893
4894 /* Check whether the multiplication X * MULT + ADD overflows.
4895    X, MULT and ADD must be CONST_*.
4896    MODE is the machine mode for the computation.
4897    X and MULT must have mode MODE.  ADD may have a different mode.
4898    So can X (defaults to same as MODE).
4899    UNSIGNEDP is nonzero to do unsigned multiplication.  */
4900
4901 bool
4902 const_mult_add_overflow_p (rtx x, rtx mult, rtx add,
4903                            enum machine_mode mode, int unsignedp)
4904 {
4905   tree type, mult_type, add_type, result;
4906
4907   type = lang_hooks.types.type_for_mode (mode, unsignedp);
4908
4909   /* In order to get a proper overflow indication from an unsigned
4910      type, we have to pretend that it's a sizetype.  */
4911   mult_type = type;
4912   if (unsignedp)
4913     {
4914       /* FIXME:It would be nice if we could step directly from this
4915          type to its sizetype equivalent.  */
4916       mult_type = build_distinct_type_copy (type);
4917       TYPE_IS_SIZETYPE (mult_type) = 1;
4918     }
4919
4920   add_type = (GET_MODE (add) == VOIDmode ? mult_type
4921               : lang_hooks.types.type_for_mode (GET_MODE (add), unsignedp));
4922
4923   result = fold (build2 (PLUS_EXPR, mult_type,
4924                          fold (build2 (MULT_EXPR, mult_type,
4925                                        make_tree (mult_type, x),
4926                                        make_tree (mult_type, mult))),
4927                          make_tree (add_type, add)));
4928
4929   return TREE_CONSTANT_OVERFLOW (result);
4930 }
4931
4932 /* Return an rtx representing the value of X * MULT + ADD.
4933    TARGET is a suggestion for where to store the result (an rtx).
4934    MODE is the machine mode for the computation.
4935    X and MULT must have mode MODE.  ADD may have a different mode.
4936    So can X (defaults to same as MODE).
4937    UNSIGNEDP is nonzero to do unsigned multiplication.
4938    This may emit insns.  */
4939
4940 rtx
4941 expand_mult_add (rtx x, rtx target, rtx mult, rtx add, enum machine_mode mode,
4942                  int unsignedp)
4943 {
4944   tree type = lang_hooks.types.type_for_mode (mode, unsignedp);
4945   tree add_type = (GET_MODE (add) == VOIDmode
4946                    ? type: lang_hooks.types.type_for_mode (GET_MODE (add),
4947                                                            unsignedp));
4948   tree result =  fold (build2 (PLUS_EXPR, type,
4949                                fold (build2 (MULT_EXPR, type,
4950                                              make_tree (type, x),
4951                                              make_tree (type, mult))),
4952                                make_tree (add_type, add)));
4953
4954   return expand_expr (result, target, VOIDmode, 0);
4955 }
4956 \f
4957 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
4958    and returning TARGET.
4959
4960    If TARGET is 0, a pseudo-register or constant is returned.  */
4961
4962 rtx
4963 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
4964 {
4965   rtx tem = 0;
4966
4967   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
4968     tem = simplify_binary_operation (AND, mode, op0, op1);
4969   if (tem == 0)
4970     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
4971
4972   if (target == 0)
4973     target = tem;
4974   else if (tem != target)
4975     emit_move_insn (target, tem);
4976   return target;
4977 }
4978 \f
4979 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
4980    and storing in TARGET.  Normally return TARGET.
4981    Return 0 if that cannot be done.
4982
4983    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
4984    it is VOIDmode, they cannot both be CONST_INT.
4985
4986    UNSIGNEDP is for the case where we have to widen the operands
4987    to perform the operation.  It says to use zero-extension.
4988
4989    NORMALIZEP is 1 if we should convert the result to be either zero
4990    or one.  Normalize is -1 if we should convert the result to be
4991    either zero or -1.  If NORMALIZEP is zero, the result will be left
4992    "raw" out of the scc insn.  */
4993
4994 rtx
4995 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
4996                  enum machine_mode mode, int unsignedp, int normalizep)
4997 {
4998   rtx subtarget;
4999   enum insn_code icode;
5000   enum machine_mode compare_mode;
5001   enum machine_mode target_mode = GET_MODE (target);
5002   rtx tem;
5003   rtx last = get_last_insn ();
5004   rtx pattern, comparison;
5005
5006   if (unsignedp)
5007     code = unsigned_condition (code);
5008
5009   /* If one operand is constant, make it the second one.  Only do this
5010      if the other operand is not constant as well.  */
5011
5012   if (swap_commutative_operands_p (op0, op1))
5013     {
5014       tem = op0;
5015       op0 = op1;
5016       op1 = tem;
5017       code = swap_condition (code);
5018     }
5019
5020   if (mode == VOIDmode)
5021     mode = GET_MODE (op0);
5022
5023   /* For some comparisons with 1 and -1, we can convert this to
5024      comparisons with zero.  This will often produce more opportunities for
5025      store-flag insns.  */
5026
5027   switch (code)
5028     {
5029     case LT:
5030       if (op1 == const1_rtx)
5031         op1 = const0_rtx, code = LE;
5032       break;
5033     case LE:
5034       if (op1 == constm1_rtx)
5035         op1 = const0_rtx, code = LT;
5036       break;
5037     case GE:
5038       if (op1 == const1_rtx)
5039         op1 = const0_rtx, code = GT;
5040       break;
5041     case GT:
5042       if (op1 == constm1_rtx)
5043         op1 = const0_rtx, code = GE;
5044       break;
5045     case GEU:
5046       if (op1 == const1_rtx)
5047         op1 = const0_rtx, code = NE;
5048       break;
5049     case LTU:
5050       if (op1 == const1_rtx)
5051         op1 = const0_rtx, code = EQ;
5052       break;
5053     default:
5054       break;
5055     }
5056
5057   /* If we are comparing a double-word integer with zero or -1, we can
5058      convert the comparison into one involving a single word.  */
5059   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5060       && GET_MODE_CLASS (mode) == MODE_INT
5061       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5062     {
5063       if ((code == EQ || code == NE)
5064           && (op1 == const0_rtx || op1 == constm1_rtx))
5065         {
5066           rtx op00, op01, op0both;
5067
5068           /* Do a logical OR or AND of the two words and compare the result.  */
5069           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5070           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5071           op0both = expand_binop (word_mode,
5072                                   op1 == const0_rtx ? ior_optab : and_optab,
5073                                   op00, op01, NULL_RTX, unsignedp, OPTAB_DIRECT);
5074
5075           if (op0both != 0)
5076             return emit_store_flag (target, code, op0both, op1, word_mode,
5077                                     unsignedp, normalizep);
5078         }
5079       else if ((code == LT || code == GE) && op1 == const0_rtx)
5080         {
5081           rtx op0h;
5082
5083           /* If testing the sign bit, can just test on high word.  */
5084           op0h = simplify_gen_subreg (word_mode, op0, mode,
5085                                       subreg_highpart_offset (word_mode, mode));
5086           return emit_store_flag (target, code, op0h, op1, word_mode,
5087                                   unsignedp, normalizep);
5088         }
5089     }
5090
5091   /* From now on, we won't change CODE, so set ICODE now.  */
5092   icode = setcc_gen_code[(int) code];
5093
5094   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5095      complement of A (for GE) and shifting the sign bit to the low bit.  */
5096   if (op1 == const0_rtx && (code == LT || code == GE)
5097       && GET_MODE_CLASS (mode) == MODE_INT
5098       && (normalizep || STORE_FLAG_VALUE == 1
5099           || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5100               && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5101                   == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
5102     {
5103       subtarget = target;
5104
5105       /* If the result is to be wider than OP0, it is best to convert it
5106          first.  If it is to be narrower, it is *incorrect* to convert it
5107          first.  */
5108       if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5109         {
5110           op0 = convert_modes (target_mode, mode, op0, 0);
5111           mode = target_mode;
5112         }
5113
5114       if (target_mode != mode)
5115         subtarget = 0;
5116
5117       if (code == GE)
5118         op0 = expand_unop (mode, one_cmpl_optab, op0,
5119                            ((STORE_FLAG_VALUE == 1 || normalizep)
5120                             ? 0 : subtarget), 0);
5121
5122       if (STORE_FLAG_VALUE == 1 || normalizep)
5123         /* If we are supposed to produce a 0/1 value, we want to do
5124            a logical shift from the sign bit to the low-order bit; for
5125            a -1/0 value, we do an arithmetic shift.  */
5126         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5127                             size_int (GET_MODE_BITSIZE (mode) - 1),
5128                             subtarget, normalizep != -1);
5129
5130       if (mode != target_mode)
5131         op0 = convert_modes (target_mode, mode, op0, 0);
5132
5133       return op0;
5134     }
5135
5136   if (icode != CODE_FOR_nothing)
5137     {
5138       insn_operand_predicate_fn pred;
5139
5140       /* We think we may be able to do this with a scc insn.  Emit the
5141          comparison and then the scc insn.  */
5142
5143       do_pending_stack_adjust ();
5144       last = get_last_insn ();
5145
5146       comparison
5147         = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX);
5148       if (CONSTANT_P (comparison))
5149         {
5150           switch (GET_CODE (comparison))
5151             {
5152             case CONST_INT:
5153               if (comparison == const0_rtx)
5154                 return const0_rtx;
5155               break;
5156
5157 #ifdef FLOAT_STORE_FLAG_VALUE
5158             case CONST_DOUBLE:
5159               if (comparison == CONST0_RTX (GET_MODE (comparison)))
5160                 return const0_rtx;
5161               break;
5162 #endif
5163             default:
5164               gcc_unreachable ();
5165             }
5166
5167           if (normalizep == 1)
5168             return const1_rtx;
5169           if (normalizep == -1)
5170             return constm1_rtx;
5171           return const_true_rtx;
5172         }
5173
5174       /* The code of COMPARISON may not match CODE if compare_from_rtx
5175          decided to swap its operands and reverse the original code.
5176
5177          We know that compare_from_rtx returns either a CONST_INT or
5178          a new comparison code, so it is safe to just extract the
5179          code from COMPARISON.  */
5180       code = GET_CODE (comparison);
5181
5182       /* Get a reference to the target in the proper mode for this insn.  */
5183       compare_mode = insn_data[(int) icode].operand[0].mode;
5184       subtarget = target;
5185       pred = insn_data[(int) icode].operand[0].predicate;
5186       if (optimize || ! (*pred) (subtarget, compare_mode))
5187         subtarget = gen_reg_rtx (compare_mode);
5188
5189       pattern = GEN_FCN (icode) (subtarget);
5190       if (pattern)
5191         {
5192           emit_insn (pattern);
5193
5194           /* If we are converting to a wider mode, first convert to
5195              TARGET_MODE, then normalize.  This produces better combining
5196              opportunities on machines that have a SIGN_EXTRACT when we are
5197              testing a single bit.  This mostly benefits the 68k.
5198
5199              If STORE_FLAG_VALUE does not have the sign bit set when
5200              interpreted in COMPARE_MODE, we can do this conversion as
5201              unsigned, which is usually more efficient.  */
5202           if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (compare_mode))
5203             {
5204               convert_move (target, subtarget,
5205                             (GET_MODE_BITSIZE (compare_mode)
5206                              <= HOST_BITS_PER_WIDE_INT)
5207                             && 0 == (STORE_FLAG_VALUE
5208                                      & ((HOST_WIDE_INT) 1
5209                                         << (GET_MODE_BITSIZE (compare_mode) -1))));
5210               op0 = target;
5211               compare_mode = target_mode;
5212             }
5213           else
5214             op0 = subtarget;
5215
5216           /* If we want to keep subexpressions around, don't reuse our
5217              last target.  */
5218
5219           if (optimize)
5220             subtarget = 0;
5221
5222           /* Now normalize to the proper value in COMPARE_MODE.  Sometimes
5223              we don't have to do anything.  */
5224           if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5225             ;
5226           /* STORE_FLAG_VALUE might be the most negative number, so write
5227              the comparison this way to avoid a compiler-time warning.  */
5228           else if (- normalizep == STORE_FLAG_VALUE)
5229             op0 = expand_unop (compare_mode, neg_optab, op0, subtarget, 0);
5230
5231           /* We don't want to use STORE_FLAG_VALUE < 0 below since this
5232              makes it hard to use a value of just the sign bit due to
5233              ANSI integer constant typing rules.  */
5234           else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_WIDE_INT
5235                    && (STORE_FLAG_VALUE
5236                        & ((HOST_WIDE_INT) 1
5237                           << (GET_MODE_BITSIZE (compare_mode) - 1))))
5238             op0 = expand_shift (RSHIFT_EXPR, compare_mode, op0,
5239                                 size_int (GET_MODE_BITSIZE (compare_mode) - 1),
5240                                 subtarget, normalizep == 1);
5241           else
5242             {
5243               gcc_assert (STORE_FLAG_VALUE & 1);
5244
5245               op0 = expand_and (compare_mode, op0, const1_rtx, subtarget);
5246               if (normalizep == -1)
5247                 op0 = expand_unop (compare_mode, neg_optab, op0, op0, 0);
5248             }
5249
5250           /* If we were converting to a smaller mode, do the
5251              conversion now.  */
5252           if (target_mode != compare_mode)
5253             {
5254               convert_move (target, op0, 0);
5255               return target;
5256             }
5257           else
5258             return op0;
5259         }
5260     }
5261
5262   delete_insns_since (last);
5263
5264   /* If optimizing, use different pseudo registers for each insn, instead
5265      of reusing the same pseudo.  This leads to better CSE, but slows
5266      down the compiler, since there are more pseudos */
5267   subtarget = (!optimize
5268                && (target_mode == mode)) ? target : NULL_RTX;
5269
5270   /* If we reached here, we can't do this with a scc insn.  However, there
5271      are some comparisons that can be done directly.  For example, if
5272      this is an equality comparison of integers, we can try to exclusive-or
5273      (or subtract) the two operands and use a recursive call to try the
5274      comparison with zero.  Don't do any of these cases if branches are
5275      very cheap.  */
5276
5277   if (BRANCH_COST > 0
5278       && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
5279       && op1 != const0_rtx)
5280     {
5281       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5282                           OPTAB_WIDEN);
5283
5284       if (tem == 0)
5285         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5286                             OPTAB_WIDEN);
5287       if (tem != 0)
5288         tem = emit_store_flag (target, code, tem, const0_rtx,
5289                                mode, unsignedp, normalizep);
5290       if (tem == 0)
5291         delete_insns_since (last);
5292       return tem;
5293     }
5294
5295   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5296      the constant zero.  Reject all other comparisons at this point.  Only
5297      do LE and GT if branches are expensive since they are expensive on
5298      2-operand machines.  */
5299
5300   if (BRANCH_COST == 0
5301       || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
5302       || (code != EQ && code != NE
5303           && (BRANCH_COST <= 1 || (code != LE && code != GT))))
5304     return 0;
5305
5306   /* See what we need to return.  We can only return a 1, -1, or the
5307      sign bit.  */
5308
5309   if (normalizep == 0)
5310     {
5311       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5312         normalizep = STORE_FLAG_VALUE;
5313
5314       else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5315                && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5316                    == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
5317         ;
5318       else
5319         return 0;
5320     }
5321
5322   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5323      do the necessary operation below.  */
5324
5325   tem = 0;
5326
5327   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5328      the sign bit set.  */
5329
5330   if (code == LE)
5331     {
5332       /* This is destructive, so SUBTARGET can't be OP0.  */
5333       if (rtx_equal_p (subtarget, op0))
5334         subtarget = 0;
5335
5336       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5337                           OPTAB_WIDEN);
5338       if (tem)
5339         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5340                             OPTAB_WIDEN);
5341     }
5342
5343   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5344      number of bits in the mode of OP0, minus one.  */
5345
5346   if (code == GT)
5347     {
5348       if (rtx_equal_p (subtarget, op0))
5349         subtarget = 0;
5350
5351       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5352                           size_int (GET_MODE_BITSIZE (mode) - 1),
5353                           subtarget, 0);
5354       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5355                           OPTAB_WIDEN);
5356     }
5357
5358   if (code == EQ || code == NE)
5359     {
5360       /* For EQ or NE, one way to do the comparison is to apply an operation
5361          that converts the operand into a positive number if it is nonzero
5362          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5363          for NE we negate.  This puts the result in the sign bit.  Then we
5364          normalize with a shift, if needed.
5365
5366          Two operations that can do the above actions are ABS and FFS, so try
5367          them.  If that doesn't work, and MODE is smaller than a full word,
5368          we can use zero-extension to the wider mode (an unsigned conversion)
5369          as the operation.  */
5370
5371       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5372          that is compensated by the subsequent overflow when subtracting
5373          one / negating.  */
5374
5375       if (abs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
5376         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5377       else if (ffs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
5378         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5379       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5380         {
5381           tem = convert_modes (word_mode, mode, op0, 1);
5382           mode = word_mode;
5383         }
5384
5385       if (tem != 0)
5386         {
5387           if (code == EQ)
5388             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5389                                 0, OPTAB_WIDEN);
5390           else
5391             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5392         }
5393
5394       /* If we couldn't do it that way, for NE we can "or" the two's complement
5395          of the value with itself.  For EQ, we take the one's complement of
5396          that "or", which is an extra insn, so we only handle EQ if branches
5397          are expensive.  */
5398
5399       if (tem == 0 && (code == NE || BRANCH_COST > 1))
5400         {
5401           if (rtx_equal_p (subtarget, op0))
5402             subtarget = 0;
5403
5404           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5405           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5406                               OPTAB_WIDEN);
5407
5408           if (tem && code == EQ)
5409             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5410         }
5411     }
5412
5413   if (tem && normalizep)
5414     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5415                         size_int (GET_MODE_BITSIZE (mode) - 1),
5416                         subtarget, normalizep == 1);
5417
5418   if (tem)
5419     {
5420       if (GET_MODE (tem) != target_mode)
5421         {
5422           convert_move (target, tem, 0);
5423           tem = target;
5424         }
5425       else if (!subtarget)
5426         {
5427           emit_move_insn (target, tem);
5428           tem = target;
5429         }
5430     }
5431   else
5432     delete_insns_since (last);
5433
5434   return tem;
5435 }
5436
5437 /* Like emit_store_flag, but always succeeds.  */
5438
5439 rtx
5440 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5441                        enum machine_mode mode, int unsignedp, int normalizep)
5442 {
5443   rtx tem, label;
5444
5445   /* First see if emit_store_flag can do the job.  */
5446   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5447   if (tem != 0)
5448     return tem;
5449
5450   if (normalizep == 0)
5451     normalizep = 1;
5452
5453   /* If this failed, we have to do this with set/compare/jump/set code.  */
5454
5455   if (!REG_P (target)
5456       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5457     target = gen_reg_rtx (GET_MODE (target));
5458
5459   emit_move_insn (target, const1_rtx);
5460   label = gen_label_rtx ();
5461   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5462                            NULL_RTX, label);
5463
5464   emit_move_insn (target, const0_rtx);
5465   emit_label (label);
5466
5467   return target;
5468 }
5469 \f
5470 /* Perform possibly multi-word comparison and conditional jump to LABEL
5471    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE
5472
5473    The algorithm is based on the code in expr.c:do_jump.
5474
5475    Note that this does not perform a general comparison.  Only variants
5476    generated within expmed.c are correctly handled, others abort (but could
5477    be handled if needed).  */
5478
5479 static void
5480 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5481                  rtx label)
5482 {
5483   /* If this mode is an integer too wide to compare properly,
5484      compare word by word.  Rely on cse to optimize constant cases.  */
5485
5486   if (GET_MODE_CLASS (mode) == MODE_INT
5487       && ! can_compare_p (op, mode, ccp_jump))
5488     {
5489       rtx label2 = gen_label_rtx ();
5490
5491       switch (op)
5492         {
5493         case LTU:
5494           do_jump_by_parts_greater_rtx (mode, 1, arg2, arg1, label2, label);
5495           break;
5496
5497         case LEU:
5498           do_jump_by_parts_greater_rtx (mode, 1, arg1, arg2, label, label2);
5499           break;
5500
5501         case LT:
5502           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label2, label);
5503           break;
5504
5505         case GT:
5506           do_jump_by_parts_greater_rtx (mode, 0, arg1, arg2, label2, label);
5507           break;
5508
5509         case GE:
5510           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label, label2);
5511           break;
5512
5513           /* do_jump_by_parts_equality_rtx compares with zero.  Luckily
5514              that's the only equality operations we do */
5515         case EQ:
5516           gcc_assert (arg2 == const0_rtx && mode == GET_MODE(arg1));
5517           do_jump_by_parts_equality_rtx (arg1, label2, label);
5518           break;
5519
5520         case NE:
5521           gcc_assert (arg2 == const0_rtx && mode == GET_MODE(arg1));
5522           do_jump_by_parts_equality_rtx (arg1, label, label2);
5523           break;
5524
5525         default:
5526           gcc_unreachable ();
5527         }
5528
5529       emit_label (label2);
5530     }
5531   else
5532     emit_cmp_and_jump_insns (arg1, arg2, op, NULL_RTX, mode, 0, label);
5533 }