gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 2, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING.  If not, write to the Free
  20 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  21 02111-1307, USA.  */
  22
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "coretypes.h"
  27 #include "tm.h"
  28 #include "toplev.h"
  29 #include "rtl.h"
  30 #include "tree.h"
  31 #include "tm_p.h"
  32 #include "flags.h"
  33 #include "insn-config.h"
  34 #include "expr.h"
  35 #include "optabs.h"
  36 #include "real.h"
  37 #include "recog.h"
  38 #include "langhooks.h"
  39
  40 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  41                                    unsigned HOST_WIDE_INT,
  42                                    unsigned HOST_WIDE_INT, rtx);
  43 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  44                                    unsigned HOST_WIDE_INT, rtx);
  45 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  46                                     unsigned HOST_WIDE_INT,
  47                                     unsigned HOST_WIDE_INT,
  48                                     unsigned HOST_WIDE_INT, rtx, int);
  49 static rtx mask_rtx (enum machine_mode, int, int, int);
  50 static rtx lshift_value (enum machine_mode, rtx, int, int);
  51 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  52                                     unsigned HOST_WIDE_INT, int);
  53 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  54 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  55 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  56
  57 /* Test whether a value is zero of a power of two.  */
  58 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
  59
  60 /* Nonzero means divides or modulus operations are relatively cheap for
  61    powers of two, so don't use branches; emit the operation instead.
  62    Usually, this will mean that the MD file will emit non-branch
  63    sequences.  */
  64
  65 static bool sdiv_pow2_cheap[NUM_MACHINE_MODES];
  66 static bool smod_pow2_cheap[NUM_MACHINE_MODES];
  67
  68 #ifndef SLOW_UNALIGNED_ACCESS
  69 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  70 #endif
  71
  72 /* For compilers that support multiple targets with different word sizes,
  73    MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
  74    is the H8/300(H) compiler.  */
  75
  76 #ifndef MAX_BITS_PER_WORD
  77 #define MAX_BITS_PER_WORD BITS_PER_WORD
  78 #endif
  79
  80 /* Reduce conditional compilation elsewhere.  */
  81 #ifndef HAVE_insv
  82 #define HAVE_insv       0
  83 #define CODE_FOR_insv   CODE_FOR_nothing
  84 #define gen_insv(a,b,c,d) NULL_RTX
  85 #endif
  86 #ifndef HAVE_extv
  87 #define HAVE_extv       0
  88 #define CODE_FOR_extv   CODE_FOR_nothing
  89 #define gen_extv(a,b,c,d) NULL_RTX
  90 #endif
  91 #ifndef HAVE_extzv
  92 #define HAVE_extzv      0
  93 #define CODE_FOR_extzv  CODE_FOR_nothing
  94 #define gen_extzv(a,b,c,d) NULL_RTX
  95 #endif
  96
  97 /* Cost of various pieces of RTL.  Note that some of these are indexed by
  98    shift count and some by mode.  */
  99 static int zero_cost;
 100 static int add_cost[NUM_MACHINE_MODES];
 101 static int neg_cost[NUM_MACHINE_MODES];
 102 static int shift_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 103 static int shiftadd_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 104 static int shiftsub_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 105 static int mul_cost[NUM_MACHINE_MODES];
 106 static int div_cost[NUM_MACHINE_MODES];
 107 static int mul_widen_cost[NUM_MACHINE_MODES];
 108 static int mul_highpart_cost[NUM_MACHINE_MODES];
 109
 110 void
 111 init_expmed (void)
 112 {
 113   struct
 114   {
 115     struct rtx_def reg;         rtunion reg_fld[2];
 116     struct rtx_def plus;        rtunion plus_fld1;
 117     struct rtx_def neg;
 118     struct rtx_def udiv;        rtunion udiv_fld1;
 119     struct rtx_def mult;        rtunion mult_fld1;
 120     struct rtx_def div;         rtunion div_fld1;
 121     struct rtx_def mod;         rtunion mod_fld1;
 122     struct rtx_def zext;
 123     struct rtx_def wide_mult;   rtunion wide_mult_fld1;
 124     struct rtx_def wide_lshr;   rtunion wide_lshr_fld1;
 125     struct rtx_def wide_trunc;
 126     struct rtx_def shift;       rtunion shift_fld1;
 127     struct rtx_def shift_mult;  rtunion shift_mult_fld1;
 128     struct rtx_def shift_add;   rtunion shift_add_fld1;
 129     struct rtx_def shift_sub;   rtunion shift_sub_fld1;
 130   } all;
 131
 132   rtx pow2[MAX_BITS_PER_WORD];
 133   rtx cint[MAX_BITS_PER_WORD];
 134   int m, n;
 135   enum machine_mode mode, wider_mode;
 136
 137   zero_cost = rtx_cost (const0_rtx, 0);
 138
 139   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 140     {
 141       pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 142       cint[m] = GEN_INT (m);
 143     }
 144
 145   memset (&all, 0, sizeof all);
 146
 147   PUT_CODE (&all.reg, REG);
 148   REGNO (&all.reg) = 10000;
 149
 150   PUT_CODE (&all.plus, PLUS);
 151   XEXP (&all.plus, 0) = &all.reg;
 152   XEXP (&all.plus, 1) = &all.reg;
 153
 154   PUT_CODE (&all.neg, NEG);
 155   XEXP (&all.neg, 0) = &all.reg;
 156
 157   PUT_CODE (&all.udiv, UDIV);
 158   XEXP (&all.udiv, 0) = &all.reg;
 159   XEXP (&all.udiv, 1) = &all.reg;
 160
 161   PUT_CODE (&all.mult, MULT);
 162   XEXP (&all.mult, 0) = &all.reg;
 163   XEXP (&all.mult, 1) = &all.reg;
 164
 165   PUT_CODE (&all.div, DIV);
 166   XEXP (&all.div, 0) = &all.reg;
 167   XEXP (&all.div, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
 168
 169   PUT_CODE (&all.mod, MOD);
 170   XEXP (&all.mod, 0) = &all.reg;
 171   XEXP (&all.mod, 1) = XEXP (&all.div, 1);
 172
 173   PUT_CODE (&all.zext, ZERO_EXTEND);
 174   XEXP (&all.zext, 0) = &all.reg;
 175
 176   PUT_CODE (&all.wide_mult, MULT);
 177   XEXP (&all.wide_mult, 0) = &all.zext;
 178   XEXP (&all.wide_mult, 1) = &all.zext;
 179
 180   PUT_CODE (&all.wide_lshr, LSHIFTRT);
 181   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
 182
 183   PUT_CODE (&all.wide_trunc, TRUNCATE);
 184   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
 185
 186   PUT_CODE (&all.shift, ASHIFT);
 187   XEXP (&all.shift, 0) = &all.reg;
 188
 189   PUT_CODE (&all.shift_mult, MULT);
 190   XEXP (&all.shift_mult, 0) = &all.reg;
 191
 192   PUT_CODE (&all.shift_add, PLUS);
 193   XEXP (&all.shift_add, 0) = &all.shift_mult;
 194   XEXP (&all.shift_add, 1) = &all.reg;
 195
 196   PUT_CODE (&all.shift_sub, MINUS);
 197   XEXP (&all.shift_sub, 0) = &all.shift_mult;
 198   XEXP (&all.shift_sub, 1) = &all.reg;
 199
 200   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 201        mode != VOIDmode;
 202        mode = GET_MODE_WIDER_MODE (mode))
 203     {
 204       PUT_MODE (&all.reg, mode);
 205       PUT_MODE (&all.plus, mode);
 206       PUT_MODE (&all.neg, mode);
 207       PUT_MODE (&all.udiv, mode);
 208       PUT_MODE (&all.mult, mode);
 209       PUT_MODE (&all.div, mode);
 210       PUT_MODE (&all.mod, mode);
 211       PUT_MODE (&all.wide_trunc, mode);
 212       PUT_MODE (&all.shift, mode);
 213       PUT_MODE (&all.shift_mult, mode);
 214       PUT_MODE (&all.shift_add, mode);
 215       PUT_MODE (&all.shift_sub, mode);
 216
 217       add_cost[mode] = rtx_cost (&all.plus, SET);
 218       neg_cost[mode] = rtx_cost (&all.neg, SET);
 219       div_cost[mode] = rtx_cost (&all.udiv, SET);
 220       mul_cost[mode] = rtx_cost (&all.mult, SET);
 221
 222       sdiv_pow2_cheap[mode] = (rtx_cost (&all.div, SET) <= 2 * add_cost[mode]);
 223       smod_pow2_cheap[mode] = (rtx_cost (&all.mod, SET) <= 4 * add_cost[mode]);
 224
 225       wider_mode = GET_MODE_WIDER_MODE (mode);
 226       if (wider_mode != VOIDmode)
 227         {
 228           PUT_MODE (&all.zext, wider_mode);
 229           PUT_MODE (&all.wide_mult, wider_mode);
 230           PUT_MODE (&all.wide_lshr, wider_mode);
 231           XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
 232
 233           mul_widen_cost[wider_mode] = rtx_cost (&all.wide_mult, SET);
 234           mul_highpart_cost[mode] = rtx_cost (&all.wide_trunc, SET);
 235         }
 236
 237       shift_cost[mode][0] = 0;
 238       shiftadd_cost[mode][0] = shiftsub_cost[mode][0] = add_cost[mode];
 239
 240       n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
 241       for (m = 1; m < n; m++)
 242         {
 243           XEXP (&all.shift, 1) = cint[m];
 244           XEXP (&all.shift_mult, 1) = pow2[m];
 245
 246           shift_cost[mode][m] = rtx_cost (&all.shift, SET);
 247           shiftadd_cost[mode][m] = rtx_cost (&all.shift_add, SET);
 248           shiftsub_cost[mode][m] = rtx_cost (&all.shift_sub, SET);
 249         }
 250     }
 251 }
 252
 253 /* Return an rtx representing minus the value of X.
 254    MODE is the intended mode of the result,
 255    useful if X is a CONST_INT.  */
 256
 257 rtx
 258 negate_rtx (enum machine_mode mode, rtx x)
 259 {
 260   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 261
 262   if (result == 0)
 263     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 264
 265   return result;
 266 }
 267
 268 /* Report on the availability of insv/extv/extzv and the desired mode
 269    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
 270    is false; else the mode of the specified operand.  If OPNO is -1,
 271    all the caller cares about is whether the insn is available.  */
 272 enum machine_mode
 273 mode_for_extraction (enum extraction_pattern pattern, int opno)
 274 {
 275   const struct insn_data *data;
 276
 277   switch (pattern)
 278     {
 279     case EP_insv:
 280       if (HAVE_insv)
 281         {
 282           data = &insn_data[CODE_FOR_insv];
 283           break;
 284         }
 285       return MAX_MACHINE_MODE;
 286
 287     case EP_extv:
 288       if (HAVE_extv)
 289         {
 290           data = &insn_data[CODE_FOR_extv];
 291           break;
 292         }
 293       return MAX_MACHINE_MODE;
 294
 295     case EP_extzv:
 296       if (HAVE_extzv)
 297         {
 298           data = &insn_data[CODE_FOR_extzv];
 299           break;
 300         }
 301       return MAX_MACHINE_MODE;
 302
 303     default:
 304       gcc_unreachable ();
 305     }
 306
 307   if (opno == -1)
 308     return VOIDmode;
 309
 310   /* Everyone who uses this function used to follow it with
 311      if (result == VOIDmode) result = word_mode; */
 312   if (data->operand[opno].mode == VOIDmode)
 313     return word_mode;
 314   return data->operand[opno].mode;
 315 }
 316
 317 \f
 318 /* Generate code to store value from rtx VALUE
 319    into a bit-field within structure STR_RTX
 320    containing BITSIZE bits starting at bit BITNUM.
 321    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
 322    ALIGN is the alignment that STR_RTX is known to have.
 323    TOTAL_SIZE is the size of the structure in bytes, or -1 if varying.  */
 324
 325 /* ??? Note that there are two different ideas here for how
 326    to determine the size to count bits within, for a register.
 327    One is BITS_PER_WORD, and the other is the size of operand 3
 328    of the insv pattern.
 329
 330    If operand 3 of the insv pattern is VOIDmode, then we will use BITS_PER_WORD
 331    else, we use the mode of operand 3.  */
 332
 333 rtx
 334 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 335                  unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
 336                  rtx value)
 337 {
 338   unsigned int unit
 339     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
 340   unsigned HOST_WIDE_INT offset, bitpos;
 341   rtx op0 = str_rtx;
 342   int byte_offset;
 343   rtx orig_value;
 344
 345   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
 346
 347   while (GET_CODE (op0) == SUBREG)
 348     {
 349       /* The following line once was done only if WORDS_BIG_ENDIAN,
 350          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 351          meaningful at a much higher level; when structures are copied
 352          between memory and regs, the higher-numbered regs
 353          always get higher addresses.  */
 354       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
 355       op0 = SUBREG_REG (op0);
 356     }
 357
 358   /* No action is needed if the target is a register and if the field
 359      lies completely outside that register.  This can occur if the source
 360      code contains an out-of-bounds access to a small array.  */
 361   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 362     return value;
 363
 364   /* Use vec_set patterns for inserting parts of vectors whenever
 365      available.  */
 366   if (VECTOR_MODE_P (GET_MODE (op0))
 367       && !MEM_P (op0)
 368       && (vec_set_optab->handlers[GET_MODE (op0)].insn_code
 369           != CODE_FOR_nothing)
 370       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 371       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 372       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 373     {
 374       enum machine_mode outermode = GET_MODE (op0);
 375       enum machine_mode innermode = GET_MODE_INNER (outermode);
 376       int icode = (int) vec_set_optab->handlers[outermode].insn_code;
 377       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 378       rtx rtxpos = GEN_INT (pos);
 379       rtx src = value;
 380       rtx dest = op0;
 381       rtx pat, seq;
 382       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
 383       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
 384       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
 385
 386       start_sequence ();
 387
 388       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
 389         src = copy_to_mode_reg (mode1, src);
 390
 391       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
 392         rtxpos = copy_to_mode_reg (mode1, rtxpos);
 393
 394       /* We could handle this, but we should always be called with a pseudo
 395          for our targets and all insns should take them as outputs.  */
 396       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
 397                   && (*insn_data[icode].operand[1].predicate) (src, mode1)
 398                   && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
 399       pat = GEN_FCN (icode) (dest, src, rtxpos);
 400       seq = get_insns ();
 401       end_sequence ();
 402       if (pat)
 403         {
 404           emit_insn (seq);
 405           emit_insn (pat);
 406           return dest;
 407         }
 408     }
 409
 410   if (flag_force_mem)
 411     {
 412       int old_generating_concat_p = generating_concat_p;
 413       generating_concat_p = 0;
 414       value = force_not_mem (value);
 415       generating_concat_p = old_generating_concat_p;
 416     }
 417
 418   /* If the target is a register, overwriting the entire object, or storing
 419      a full-word or multi-word field can be done with just a SUBREG.
 420
 421      If the target is memory, storing any naturally aligned field can be
 422      done with a simple store.  For targets that support fast unaligned
 423      memory, any naturally sized, unit aligned field can be done directly.  */
 424
 425   offset = bitnum / unit;
 426   bitpos = bitnum % unit;
 427   byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 428                 + (offset * UNITS_PER_WORD);
 429
 430   if (bitpos == 0
 431       && bitsize == GET_MODE_BITSIZE (fieldmode)
 432       && (!MEM_P (op0)
 433           ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 434              || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
 435              && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
 436           : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 437              || (offset * BITS_PER_UNIT % bitsize == 0
 438                  && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
 439     {
 440       if (GET_MODE (op0) != fieldmode)
 441         {
 442           if (MEM_P (op0))
 443             op0 = adjust_address (op0, fieldmode, offset);
 444           else
 445             op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 446                                        byte_offset);
 447         }
 448       emit_move_insn (op0, value);
 449       return value;
 450     }
 451
 452   /* Make sure we are playing with integral modes.  Pun with subregs
 453      if we aren't.  This must come after the entire register case above,
 454      since that case is valid for any mode.  The following cases are only
 455      valid for integral modes.  */
 456   {
 457     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 458     if (imode != GET_MODE (op0))
 459       {
 460         if (MEM_P (op0))
 461           op0 = adjust_address (op0, imode, 0);
 462         else
 463           {
 464             gcc_assert (imode != BLKmode);
 465             op0 = gen_lowpart (imode, op0);
 466           }
 467       }
 468   }
 469
 470   /* We may be accessing data outside the field, which means
 471      we can alias adjacent data.  */
 472   if (MEM_P (op0))
 473     {
 474       op0 = shallow_copy_rtx (op0);
 475       set_mem_alias_set (op0, 0);
 476       set_mem_expr (op0, 0);
 477     }
 478
 479   /* If OP0 is a register, BITPOS must count within a word.
 480      But as we have it, it counts within whatever size OP0 now has.
 481      On a bigendian machine, these are not the same, so convert.  */
 482   if (BYTES_BIG_ENDIAN
 483       && !MEM_P (op0)
 484       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 485     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 486
 487   /* Storing an lsb-aligned field in a register
 488      can be done with a movestrict instruction.  */
 489
 490   if (!MEM_P (op0)
 491       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 492       && bitsize == GET_MODE_BITSIZE (fieldmode)
 493       && (movstrict_optab->handlers[fieldmode].insn_code
 494           != CODE_FOR_nothing))
 495     {
 496       int icode = movstrict_optab->handlers[fieldmode].insn_code;
 497
 498       /* Get appropriate low part of the value being stored.  */
 499       if (GET_CODE (value) == CONST_INT || REG_P (value))
 500         value = gen_lowpart (fieldmode, value);
 501       else if (!(GET_CODE (value) == SYMBOL_REF
 502                  || GET_CODE (value) == LABEL_REF
 503                  || GET_CODE (value) == CONST))
 504         value = convert_to_mode (fieldmode, value, 0);
 505
 506       if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
 507         value = copy_to_mode_reg (fieldmode, value);
 508
 509       if (GET_CODE (op0) == SUBREG)
 510         {
 511           /* Else we've got some float mode source being extracted into
 512              a different float mode destination -- this combination of
 513              subregs results in Severe Tire Damage.  */
 514           gcc_assert (GET_MODE (SUBREG_REG (op0)) == fieldmode
 515                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 516                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 517           op0 = SUBREG_REG (op0);
 518         }
 519
 520       emit_insn (GEN_FCN (icode)
 521                  (gen_rtx_SUBREG (fieldmode, op0,
 522                                   (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 523                                   + (offset * UNITS_PER_WORD)),
 524                                   value));
 525
 526       return value;
 527     }
 528
 529   /* Handle fields bigger than a word.  */
 530
 531   if (bitsize > BITS_PER_WORD)
 532     {
 533       /* Here we transfer the words of the field
 534          in the order least significant first.
 535          This is because the most significant word is the one which may
 536          be less than full.
 537          However, only do that if the value is not BLKmode.  */
 538
 539       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 540       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 541       unsigned int i;
 542
 543       /* This is the mode we must force value to, so that there will be enough
 544          subwords to extract.  Note that fieldmode will often (always?) be
 545          VOIDmode, because that is what store_field uses to indicate that this
 546          is a bit field, but passing VOIDmode to operand_subword_force will
 547          result in an abort.  */
 548       fieldmode = GET_MODE (value);
 549       if (fieldmode == VOIDmode)
 550         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 551
 552       for (i = 0; i < nwords; i++)
 553         {
 554           /* If I is 0, use the low-order word in both field and target;
 555              if I is 1, use the next to lowest word; and so on.  */
 556           unsigned int wordnum = (backwards ? nwords - i - 1 : i);
 557           unsigned int bit_offset = (backwards
 558                                      ? MAX ((int) bitsize - ((int) i + 1)
 559                                             * BITS_PER_WORD,
 560                                             0)
 561                                      : (int) i * BITS_PER_WORD);
 562
 563           store_bit_field (op0, MIN (BITS_PER_WORD,
 564                                      bitsize - i * BITS_PER_WORD),
 565                            bitnum + bit_offset, word_mode,
 566                            operand_subword_force (value, wordnum, fieldmode));
 567         }
 568       return value;
 569     }
 570
 571   /* From here on we can assume that the field to be stored in is
 572      a full-word (whatever type that is), since it is shorter than a word.  */
 573
 574   /* OFFSET is the number of words or bytes (UNIT says which)
 575      from STR_RTX to the first word or byte containing part of the field.  */
 576
 577   if (!MEM_P (op0))
 578     {
 579       if (offset != 0
 580           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 581         {
 582           if (!REG_P (op0))
 583             {
 584               /* Since this is a destination (lvalue), we can't copy it to a
 585                  pseudo.  We can trivially remove a SUBREG that does not
 586                  change the size of the operand.  Such a SUBREG may have been
 587                  added above.  Otherwise, abort.  */
 588               gcc_assert (GET_CODE (op0) == SUBREG
 589                           && (GET_MODE_SIZE (GET_MODE (op0))
 590                               == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
 591               op0 = SUBREG_REG (op0);
 592             }
 593           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 594                                 op0, (offset * UNITS_PER_WORD));
 595         }
 596       offset = 0;
 597     }
 598
 599   /* If VALUE has a floating-point or complex mode, access it as an
 600      integer of the corresponding size.  This can occur on a machine
 601      with 64 bit registers that uses SFmode for float.  It can also
 602      occur for unaligned float or complex fields.  */
 603   orig_value = value;
 604   if (GET_MODE (value) != VOIDmode
 605       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 606       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 607     {
 608       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 609       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 610     }
 611
 612   /* Now OFFSET is nonzero only if OP0 is memory
 613      and is therefore always measured in bytes.  */
 614
 615   if (HAVE_insv
 616       && GET_MODE (value) != BLKmode
 617       && !(bitsize == 1 && GET_CODE (value) == CONST_INT)
 618       /* Ensure insv's size is wide enough for this field.  */
 619       && (GET_MODE_BITSIZE (op_mode) >= bitsize)
 620       && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
 621             && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))))
 622     {
 623       int xbitpos = bitpos;
 624       rtx value1;
 625       rtx xop0 = op0;
 626       rtx last = get_last_insn ();
 627       rtx pat;
 628       enum machine_mode maxmode = mode_for_extraction (EP_insv, 3);
 629       int save_volatile_ok = volatile_ok;
 630
 631       volatile_ok = 1;
 632
 633       /* If this machine's insv can only insert into a register, copy OP0
 634          into a register and save it back later.  */
 635       /* This used to check flag_force_mem, but that was a serious
 636          de-optimization now that flag_force_mem is enabled by -O2.  */
 637       if (MEM_P (op0)
 638           && ! ((*insn_data[(int) CODE_FOR_insv].operand[0].predicate)
 639                 (op0, VOIDmode)))
 640         {
 641           rtx tempreg;
 642           enum machine_mode bestmode;
 643
 644           /* Get the mode to use for inserting into this field.  If OP0 is
 645              BLKmode, get the smallest mode consistent with the alignment. If
 646              OP0 is a non-BLKmode object that is no wider than MAXMODE, use its
 647              mode. Otherwise, use the smallest mode containing the field.  */
 648
 649           if (GET_MODE (op0) == BLKmode
 650               || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (maxmode))
 651             bestmode
 652               = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0), maxmode,
 653                                MEM_VOLATILE_P (op0));
 654           else
 655             bestmode = GET_MODE (op0);
 656
 657           if (bestmode == VOIDmode
 658               || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
 659                   && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
 660             goto insv_loses;
 661
 662           /* Adjust address to point to the containing unit of that mode.
 663              Compute offset as multiple of this unit, counting in bytes.  */
 664           unit = GET_MODE_BITSIZE (bestmode);
 665           offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 666           bitpos = bitnum % unit;
 667           op0 = adjust_address (op0, bestmode,  offset);
 668
 669           /* Fetch that unit, store the bitfield in it, then store
 670              the unit.  */
 671           tempreg = copy_to_reg (op0);
 672           store_bit_field (tempreg, bitsize, bitpos, fieldmode, orig_value);
 673           emit_move_insn (op0, tempreg);
 674           return value;
 675         }
 676       volatile_ok = save_volatile_ok;
 677
 678       /* Add OFFSET into OP0's address.  */
 679       if (MEM_P (xop0))
 680         xop0 = adjust_address (xop0, byte_mode, offset);
 681
 682       /* If xop0 is a register, we need it in MAXMODE
 683          to make it acceptable to the format of insv.  */
 684       if (GET_CODE (xop0) == SUBREG)
 685         /* We can't just change the mode, because this might clobber op0,
 686            and we will need the original value of op0 if insv fails.  */
 687         xop0 = gen_rtx_SUBREG (maxmode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 688       if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
 689         xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
 690
 691       /* On big-endian machines, we count bits from the most significant.
 692          If the bit field insn does not, we must invert.  */
 693
 694       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 695         xbitpos = unit - bitsize - xbitpos;
 696
 697       /* We have been counting XBITPOS within UNIT.
 698          Count instead within the size of the register.  */
 699       if (BITS_BIG_ENDIAN && !MEM_P (xop0))
 700         xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
 701
 702       unit = GET_MODE_BITSIZE (maxmode);
 703
 704       /* Convert VALUE to maxmode (which insv insn wants) in VALUE1.  */
 705       value1 = value;
 706       if (GET_MODE (value) != maxmode)
 707         {
 708           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 709             {
 710               /* Optimization: Don't bother really extending VALUE
 711                  if it has all the bits we will actually use.  However,
 712                  if we must narrow it, be sure we do it correctly.  */
 713
 714               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (maxmode))
 715                 {
 716                   rtx tmp;
 717
 718                   tmp = simplify_subreg (maxmode, value1, GET_MODE (value), 0);
 719                   if (! tmp)
 720                     tmp = simplify_gen_subreg (maxmode,
 721                                                force_reg (GET_MODE (value),
 722                                                           value1),
 723                                                GET_MODE (value), 0);
 724                   value1 = tmp;
 725                 }
 726               else
 727                 value1 = gen_lowpart (maxmode, value1);
 728             }
 729           else if (GET_CODE (value) == CONST_INT)
 730             value1 = gen_int_mode (INTVAL (value), maxmode);
 731           else
 732             /* Parse phase is supposed to make VALUE's data type
 733                match that of the component reference, which is a type
 734                at least as wide as the field; so VALUE should have
 735                a mode that corresponds to that type.  */
 736             gcc_assert (CONSTANT_P (value));
 737         }
 738
 739       /* If this machine's insv insists on a register,
 740          get VALUE1 into a register.  */
 741       if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
 742              (value1, maxmode)))
 743         value1 = force_reg (maxmode, value1);
 744
 745       pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
 746       if (pat)
 747         emit_insn (pat);
 748       else
 749         {
 750           delete_insns_since (last);
 751           store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 752         }
 753     }
 754   else
 755     insv_loses:
 756     /* Insv is not available; store using shifts and boolean ops.  */
 757     store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 758   return value;
 759 }
 760 \f
 761 /* Use shifts and boolean operations to store VALUE
 762    into a bit field of width BITSIZE
 763    in a memory location specified by OP0 except offset by OFFSET bytes.
 764      (OFFSET must be 0 if OP0 is a register.)
 765    The field starts at position BITPOS within the byte.
 766     (If OP0 is a register, it may be a full word or a narrower mode,
 767      but BITPOS still counts within a full word,
 768      which is significant on bigendian machines.)  */
 769
 770 static void
 771 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
 772                        unsigned HOST_WIDE_INT bitsize,
 773                        unsigned HOST_WIDE_INT bitpos, rtx value)
 774 {
 775   enum machine_mode mode;
 776   unsigned int total_bits = BITS_PER_WORD;
 777   rtx subtarget, temp;
 778   int all_zero = 0;
 779   int all_one = 0;
 780
 781   /* There is a case not handled here:
 782      a structure with a known alignment of just a halfword
 783      and a field split across two aligned halfwords within the structure.
 784      Or likewise a structure with a known alignment of just a byte
 785      and a field split across two bytes.
 786      Such cases are not supposed to be able to occur.  */
 787
 788   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 789     {
 790       gcc_assert (!offset);
 791       /* Special treatment for a bit field split across two registers.  */
 792       if (bitsize + bitpos > BITS_PER_WORD)
 793         {
 794           store_split_bit_field (op0, bitsize, bitpos, value);
 795           return;
 796         }
 797     }
 798   else
 799     {
 800       /* Get the proper mode to use for this field.  We want a mode that
 801          includes the entire field.  If such a mode would be larger than
 802          a word, we won't be doing the extraction the normal way.
 803          We don't want a mode bigger than the destination.  */
 804
 805       mode = GET_MODE (op0);
 806       if (GET_MODE_BITSIZE (mode) == 0
 807           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 808         mode = word_mode;
 809       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 810                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 811
 812       if (mode == VOIDmode)
 813         {
 814           /* The only way this should occur is if the field spans word
 815              boundaries.  */
 816           store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
 817                                  value);
 818           return;
 819         }
 820
 821       total_bits = GET_MODE_BITSIZE (mode);
 822
 823       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 824          be in the range 0 to total_bits-1, and put any excess bytes in
 825          OFFSET.  */
 826       if (bitpos >= total_bits)
 827         {
 828           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 829           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 830                      * BITS_PER_UNIT);
 831         }
 832
 833       /* Get ref to an aligned byte, halfword, or word containing the field.
 834          Adjust BITPOS to be position within a word,
 835          and OFFSET to be the offset of that word.
 836          Then alter OP0 to refer to that word.  */
 837       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 838       offset -= (offset % (total_bits / BITS_PER_UNIT));
 839       op0 = adjust_address (op0, mode, offset);
 840     }
 841
 842   mode = GET_MODE (op0);
 843
 844   /* Now MODE is either some integral mode for a MEM as OP0,
 845      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 846      The bit field is contained entirely within OP0.
 847      BITPOS is the starting bit number within OP0.
 848      (OP0's mode may actually be narrower than MODE.)  */
 849
 850   if (BYTES_BIG_ENDIAN)
 851       /* BITPOS is the distance between our msb
 852          and that of the containing datum.
 853          Convert it to the distance from the lsb.  */
 854       bitpos = total_bits - bitsize - bitpos;
 855
 856   /* Now BITPOS is always the distance between our lsb
 857      and that of OP0.  */
 858
 859   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 860      we must first convert its mode to MODE.  */
 861
 862   if (GET_CODE (value) == CONST_INT)
 863     {
 864       HOST_WIDE_INT v = INTVAL (value);
 865
 866       if (bitsize < HOST_BITS_PER_WIDE_INT)
 867         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 868
 869       if (v == 0)
 870         all_zero = 1;
 871       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 872                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 873                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 874         all_one = 1;
 875
 876       value = lshift_value (mode, value, bitpos, bitsize);
 877     }
 878   else
 879     {
 880       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 881                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 882
 883       if (GET_MODE (value) != mode)
 884         {
 885           if ((REG_P (value) || GET_CODE (value) == SUBREG)
 886               && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
 887             value = gen_lowpart (mode, value);
 888           else
 889             value = convert_to_mode (mode, value, 1);
 890         }
 891
 892       if (must_and)
 893         value = expand_binop (mode, and_optab, value,
 894                               mask_rtx (mode, 0, bitsize, 0),
 895                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
 896       if (bitpos > 0)
 897         value = expand_shift (LSHIFT_EXPR, mode, value,
 898                               build_int_cst (NULL_TREE, bitpos), NULL_RTX, 1);
 899     }
 900
 901   /* Now clear the chosen bits in OP0,
 902      except that if VALUE is -1 we need not bother.  */
 903
 904   subtarget = (REG_P (op0) || ! flag_force_mem) ? op0 : 0;
 905
 906   if (! all_one)
 907     {
 908       temp = expand_binop (mode, and_optab, op0,
 909                            mask_rtx (mode, bitpos, bitsize, 1),
 910                            subtarget, 1, OPTAB_LIB_WIDEN);
 911       subtarget = temp;
 912     }
 913   else
 914     temp = op0;
 915
 916   /* Now logical-or VALUE into OP0, unless it is zero.  */
 917
 918   if (! all_zero)
 919     temp = expand_binop (mode, ior_optab, temp, value,
 920                          subtarget, 1, OPTAB_LIB_WIDEN);
 921   if (op0 != temp)
 922     emit_move_insn (op0, temp);
 923 }
 924 \f
 925 /* Store a bit field that is split across multiple accessible memory objects.
 926
 927    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
 928    BITSIZE is the field width; BITPOS the position of its first bit
 929    (within the word).
 930    VALUE is the value to store.
 931
 932    This does not yet handle fields wider than BITS_PER_WORD.  */
 933
 934 static void
 935 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
 936                        unsigned HOST_WIDE_INT bitpos, rtx value)
 937 {
 938   unsigned int unit;
 939   unsigned int bitsdone = 0;
 940
 941   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
 942      much at a time.  */
 943   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 944     unit = BITS_PER_WORD;
 945   else
 946     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
 947
 948   /* If VALUE is a constant other than a CONST_INT, get it into a register in
 949      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
 950      that VALUE might be a floating-point constant.  */
 951   if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
 952     {
 953       rtx word = gen_lowpart_common (word_mode, value);
 954
 955       if (word && (value != word))
 956         value = word;
 957       else
 958         value = gen_lowpart_common (word_mode,
 959                                     force_reg (GET_MODE (value) != VOIDmode
 960                                                ? GET_MODE (value)
 961                                                : word_mode, value));
 962     }
 963
 964   while (bitsdone < bitsize)
 965     {
 966       unsigned HOST_WIDE_INT thissize;
 967       rtx part, word;
 968       unsigned HOST_WIDE_INT thispos;
 969       unsigned HOST_WIDE_INT offset;
 970
 971       offset = (bitpos + bitsdone) / unit;
 972       thispos = (bitpos + bitsdone) % unit;
 973
 974       /* THISSIZE must not overrun a word boundary.  Otherwise,
 975          store_fixed_bit_field will call us again, and we will mutually
 976          recurse forever.  */
 977       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
 978       thissize = MIN (thissize, unit - thispos);
 979
 980       if (BYTES_BIG_ENDIAN)
 981         {
 982           int total_bits;
 983
 984           /* We must do an endian conversion exactly the same way as it is
 985              done in extract_bit_field, so that the two calls to
 986              extract_fixed_bit_field will have comparable arguments.  */
 987           if (!MEM_P (value) || GET_MODE (value) == BLKmode)
 988             total_bits = BITS_PER_WORD;
 989           else
 990             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
 991
 992           /* Fetch successively less significant portions.  */
 993           if (GET_CODE (value) == CONST_INT)
 994             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 995                              >> (bitsize - bitsdone - thissize))
 996                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
 997           else
 998             /* The args are chosen so that the last part includes the
 999                lsb.  Give extract_bit_field the value it needs (with
1000                endianness compensation) to fetch the piece we want.  */
1001             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1002                                             total_bits - bitsize + bitsdone,
1003                                             NULL_RTX, 1);
1004         }
1005       else
1006         {
1007           /* Fetch successively more significant portions.  */
1008           if (GET_CODE (value) == CONST_INT)
1009             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1010                              >> bitsdone)
1011                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1012           else
1013             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1014                                             bitsdone, NULL_RTX, 1);
1015         }
1016
1017       /* If OP0 is a register, then handle OFFSET here.
1018
1019          When handling multiword bitfields, extract_bit_field may pass
1020          down a word_mode SUBREG of a larger REG for a bitfield that actually
1021          crosses a word boundary.  Thus, for a SUBREG, we must find
1022          the current word starting from the base register.  */
1023       if (GET_CODE (op0) == SUBREG)
1024         {
1025           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1026           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1027                                         GET_MODE (SUBREG_REG (op0)));
1028           offset = 0;
1029         }
1030       else if (REG_P (op0))
1031         {
1032           word = operand_subword_force (op0, offset, GET_MODE (op0));
1033           offset = 0;
1034         }
1035       else
1036         word = op0;
1037
1038       /* OFFSET is in UNITs, and UNIT is in bits.
1039          store_fixed_bit_field wants offset in bytes.  */
1040       store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1041                              thispos, part);
1042       bitsdone += thissize;
1043     }
1044 }
1045 \f
1046 /* Generate code to extract a byte-field from STR_RTX
1047    containing BITSIZE bits, starting at BITNUM,
1048    and put it in TARGET if possible (if TARGET is nonzero).
1049    Regardless of TARGET, we return the rtx for where the value is placed.
1050
1051    STR_RTX is the structure containing the byte (a REG or MEM).
1052    UNSIGNEDP is nonzero if this is an unsigned bit field.
1053    MODE is the natural mode of the field value once extracted.
1054    TMODE is the mode the caller would like the value to have;
1055    but the value may be returned with type MODE instead.
1056
1057    TOTAL_SIZE is the size in bytes of the containing structure,
1058    or -1 if varying.
1059
1060    If a TARGET is specified and we can store in it at no extra cost,
1061    we do so, and return TARGET.
1062    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1063    if they are equally easy.  */
1064
1065 rtx
1066 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1067                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1068                    enum machine_mode mode, enum machine_mode tmode)
1069 {
1070   unsigned int unit
1071     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1072   unsigned HOST_WIDE_INT offset, bitpos;
1073   rtx op0 = str_rtx;
1074   rtx spec_target = target;
1075   rtx spec_target_subreg = 0;
1076   enum machine_mode int_mode;
1077   enum machine_mode extv_mode = mode_for_extraction (EP_extv, 0);
1078   enum machine_mode extzv_mode = mode_for_extraction (EP_extzv, 0);
1079   enum machine_mode mode1;
1080   int byte_offset;
1081
1082   if (tmode == VOIDmode)
1083     tmode = mode;
1084
1085   while (GET_CODE (op0) == SUBREG)
1086     {
1087       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1088       op0 = SUBREG_REG (op0);
1089     }
1090
1091   /* If we have an out-of-bounds access to a register, just return an
1092      uninitialized register of the required mode.  This can occur if the
1093      source code contains an out-of-bounds access to a small array.  */
1094   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1095     return gen_reg_rtx (tmode);
1096
1097   if (REG_P (op0)
1098       && mode == GET_MODE (op0)
1099       && bitnum == 0
1100       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1101     {
1102       /* We're trying to extract a full register from itself.  */
1103       return op0;
1104     }
1105
1106   /* Use vec_extract patterns for extracting parts of vectors whenever
1107      available.  */
1108   if (VECTOR_MODE_P (GET_MODE (op0))
1109       && !MEM_P (op0)
1110       && (vec_extract_optab->handlers[GET_MODE (op0)].insn_code
1111           != CODE_FOR_nothing)
1112       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1113           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1114     {
1115       enum machine_mode outermode = GET_MODE (op0);
1116       enum machine_mode innermode = GET_MODE_INNER (outermode);
1117       int icode = (int) vec_extract_optab->handlers[outermode].insn_code;
1118       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1119       rtx rtxpos = GEN_INT (pos);
1120       rtx src = op0;
1121       rtx dest = NULL, pat, seq;
1122       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
1123       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
1124       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
1125
1126       if (innermode == tmode || innermode == mode)
1127         dest = target;
1128
1129       if (!dest)
1130         dest = gen_reg_rtx (innermode);
1131
1132       start_sequence ();
1133
1134       if (! (*insn_data[icode].operand[0].predicate) (dest, mode0))
1135         dest = copy_to_mode_reg (mode0, dest);
1136
1137       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
1138         src = copy_to_mode_reg (mode1, src);
1139
1140       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1141         rtxpos = copy_to_mode_reg (mode1, rtxpos);
1142
1143       /* We could handle this, but we should always be called with a pseudo
1144          for our targets and all insns should take them as outputs.  */
1145       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
1146                   && (*insn_data[icode].operand[1].predicate) (src, mode1)
1147                   && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
1148
1149       pat = GEN_FCN (icode) (dest, src, rtxpos);
1150       seq = get_insns ();
1151       end_sequence ();
1152       if (pat)
1153         {
1154           emit_insn (seq);
1155           emit_insn (pat);
1156           return dest;
1157         }
1158     }
1159
1160   /* Make sure we are playing with integral modes.  Pun with subregs
1161      if we aren't.  */
1162   {
1163     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1164     if (imode != GET_MODE (op0))
1165       {
1166         if (MEM_P (op0))
1167           op0 = adjust_address (op0, imode, 0);
1168         else
1169           {
1170             gcc_assert (imode != BLKmode);
1171             op0 = gen_lowpart (imode, op0);
1172
1173             /* If we got a SUBREG, force it into a register since we
1174                aren't going to be able to do another SUBREG on it.  */
1175             if (GET_CODE (op0) == SUBREG)
1176               op0 = force_reg (imode, op0);
1177           }
1178       }
1179   }
1180
1181   /* We may be accessing data outside the field, which means
1182      we can alias adjacent data.  */
1183   if (MEM_P (op0))
1184     {
1185       op0 = shallow_copy_rtx (op0);
1186       set_mem_alias_set (op0, 0);
1187       set_mem_expr (op0, 0);
1188     }
1189
1190   /* Extraction of a full-word or multi-word value from a structure
1191      in a register or aligned memory can be done with just a SUBREG.
1192      A subword value in the least significant part of a register
1193      can also be extracted with a SUBREG.  For this, we need the
1194      byte offset of the value in op0.  */
1195
1196   bitpos = bitnum % unit;
1197   offset = bitnum / unit;
1198   byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1199
1200   /* If OP0 is a register, BITPOS must count within a word.
1201      But as we have it, it counts within whatever size OP0 now has.
1202      On a bigendian machine, these are not the same, so convert.  */
1203   if (BYTES_BIG_ENDIAN
1204       && !MEM_P (op0)
1205       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1206     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1207
1208   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1209      If that's wrong, the solution is to test for it and set TARGET to 0
1210      if needed.  */
1211
1212   /* Only scalar integer modes can be converted via subregs.  There is an
1213      additional problem for FP modes here in that they can have a precision
1214      which is different from the size.  mode_for_size uses precision, but
1215      we want a mode based on the size, so we must avoid calling it for FP
1216      modes.  */
1217   mode1  = (SCALAR_INT_MODE_P (tmode)
1218             ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1219             : mode);
1220
1221   if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1222         && bitpos % BITS_PER_WORD == 0)
1223        || (mode1 != BLKmode
1224            /* ??? The big endian test here is wrong.  This is correct
1225               if the value is in a register, and if mode_for_size is not
1226               the same mode as op0.  This causes us to get unnecessarily
1227               inefficient code from the Thumb port when -mbig-endian.  */
1228            && (BYTES_BIG_ENDIAN
1229                ? bitpos + bitsize == BITS_PER_WORD
1230                : bitpos == 0)))
1231       && ((!MEM_P (op0)
1232            && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
1233                                      GET_MODE_BITSIZE (GET_MODE (op0)))
1234            && GET_MODE_SIZE (mode1) != 0
1235            && byte_offset % GET_MODE_SIZE (mode1) == 0)
1236           || (MEM_P (op0)
1237               && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1238                   || (offset * BITS_PER_UNIT % bitsize == 0
1239                       && MEM_ALIGN (op0) % bitsize == 0)))))
1240     {
1241       if (mode1 != GET_MODE (op0))
1242         {
1243           if (MEM_P (op0))
1244             op0 = adjust_address (op0, mode1, offset);
1245           else
1246             {
1247               rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1248                                              byte_offset);
1249               if (sub == NULL)
1250                 goto no_subreg_mode_swap;
1251               op0 = sub;
1252             }
1253         }
1254       if (mode1 != mode)
1255         return convert_to_mode (tmode, op0, unsignedp);
1256       return op0;
1257     }
1258  no_subreg_mode_swap:
1259
1260   /* Handle fields bigger than a word.  */
1261
1262   if (bitsize > BITS_PER_WORD)
1263     {
1264       /* Here we transfer the words of the field
1265          in the order least significant first.
1266          This is because the most significant word is the one which may
1267          be less than full.  */
1268
1269       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1270       unsigned int i;
1271
1272       if (target == 0 || !REG_P (target))
1273         target = gen_reg_rtx (mode);
1274
1275       /* Indicate for flow that the entire target reg is being set.  */
1276       emit_insn (gen_rtx_CLOBBER (VOIDmode, target));
1277
1278       for (i = 0; i < nwords; i++)
1279         {
1280           /* If I is 0, use the low-order word in both field and target;
1281              if I is 1, use the next to lowest word; and so on.  */
1282           /* Word number in TARGET to use.  */
1283           unsigned int wordnum
1284             = (WORDS_BIG_ENDIAN
1285                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1286                : i);
1287           /* Offset from start of field in OP0.  */
1288           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1289                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1290                                                 * (int) BITS_PER_WORD))
1291                                      : (int) i * BITS_PER_WORD);
1292           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1293           rtx result_part
1294             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1295                                            bitsize - i * BITS_PER_WORD),
1296                                  bitnum + bit_offset, 1, target_part, mode,
1297                                  word_mode);
1298
1299           gcc_assert (target_part);
1300
1301           if (result_part != target_part)
1302             emit_move_insn (target_part, result_part);
1303         }
1304
1305       if (unsignedp)
1306         {
1307           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1308              need to be zero'd out.  */
1309           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1310             {
1311               unsigned int i, total_words;
1312
1313               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1314               for (i = nwords; i < total_words; i++)
1315                 emit_move_insn
1316                   (operand_subword (target,
1317                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1318                                     1, VOIDmode),
1319                    const0_rtx);
1320             }
1321           return target;
1322         }
1323
1324       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1325       target = expand_shift (LSHIFT_EXPR, mode, target,
1326                              build_int_cst (NULL_TREE,
1327                                             GET_MODE_BITSIZE (mode) - bitsize),
1328                              NULL_RTX, 0);
1329       return expand_shift (RSHIFT_EXPR, mode, target,
1330                            build_int_cst (NULL_TREE,
1331                                           GET_MODE_BITSIZE (mode) - bitsize),
1332                            NULL_RTX, 0);
1333     }
1334
1335   /* From here on we know the desired field is smaller than a word.  */
1336
1337   /* Check if there is a correspondingly-sized integer field, so we can
1338      safely extract it as one size of integer, if necessary; then
1339      truncate or extend to the size that is wanted; then use SUBREGs or
1340      convert_to_mode to get one of the modes we really wanted.  */
1341
1342   int_mode = int_mode_for_mode (tmode);
1343   if (int_mode == BLKmode)
1344     int_mode = int_mode_for_mode (mode);
1345   /* Should probably push op0 out to memory and then do a load.  */
1346   gcc_assert (int_mode != BLKmode);
1347
1348   /* OFFSET is the number of words or bytes (UNIT says which)
1349      from STR_RTX to the first word or byte containing part of the field.  */
1350   if (!MEM_P (op0))
1351     {
1352       if (offset != 0
1353           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1354         {
1355           if (!REG_P (op0))
1356             op0 = copy_to_reg (op0);
1357           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1358                                 op0, (offset * UNITS_PER_WORD));
1359         }
1360       offset = 0;
1361     }
1362
1363   /* Now OFFSET is nonzero only for memory operands.  */
1364
1365   if (unsignedp)
1366     {
1367       if (HAVE_extzv
1368           && (GET_MODE_BITSIZE (extzv_mode) >= bitsize)
1369           && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1370                 && (bitsize + bitpos > GET_MODE_BITSIZE (extzv_mode))))
1371         {
1372           unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1373           rtx bitsize_rtx, bitpos_rtx;
1374           rtx last = get_last_insn ();
1375           rtx xop0 = op0;
1376           rtx xtarget = target;
1377           rtx xspec_target = spec_target;
1378           rtx xspec_target_subreg = spec_target_subreg;
1379           rtx pat;
1380           enum machine_mode maxmode = mode_for_extraction (EP_extzv, 0);
1381
1382           if (MEM_P (xop0))
1383             {
1384               int save_volatile_ok = volatile_ok;
1385               volatile_ok = 1;
1386
1387               /* Is the memory operand acceptable?  */
1388               if (! ((*insn_data[(int) CODE_FOR_extzv].operand[1].predicate)
1389                      (xop0, GET_MODE (xop0))))
1390                 {
1391                   /* No, load into a reg and extract from there.  */
1392                   enum machine_mode bestmode;
1393
1394                   /* Get the mode to use for inserting into this field.  If
1395                      OP0 is BLKmode, get the smallest mode consistent with the
1396                      alignment. If OP0 is a non-BLKmode object that is no
1397                      wider than MAXMODE, use its mode. Otherwise, use the
1398                      smallest mode containing the field.  */
1399
1400                   if (GET_MODE (xop0) == BLKmode
1401                       || (GET_MODE_SIZE (GET_MODE (op0))
1402                           > GET_MODE_SIZE (maxmode)))
1403                     bestmode = get_best_mode (bitsize, bitnum,
1404                                               MEM_ALIGN (xop0), maxmode,
1405                                               MEM_VOLATILE_P (xop0));
1406                   else
1407                     bestmode = GET_MODE (xop0);
1408
1409                   if (bestmode == VOIDmode
1410                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1411                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1412                     goto extzv_loses;
1413
1414                   /* Compute offset as multiple of this unit,
1415                      counting in bytes.  */
1416                   unit = GET_MODE_BITSIZE (bestmode);
1417                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1418                   xbitpos = bitnum % unit;
1419                   xop0 = adjust_address (xop0, bestmode, xoffset);
1420
1421                   /* Fetch it to a register in that size.  */
1422                   xop0 = force_reg (bestmode, xop0);
1423
1424                   /* XBITPOS counts within UNIT, which is what is expected.  */
1425                 }
1426               else
1427                 /* Get ref to first byte containing part of the field.  */
1428                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1429
1430               volatile_ok = save_volatile_ok;
1431             }
1432
1433           /* If op0 is a register, we need it in MAXMODE (which is usually
1434              SImode). to make it acceptable to the format of extzv.  */
1435           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1436             goto extzv_loses;
1437           if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1438             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1439
1440           /* On big-endian machines, we count bits from the most significant.
1441              If the bit field insn does not, we must invert.  */
1442           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1443             xbitpos = unit - bitsize - xbitpos;
1444
1445           /* Now convert from counting within UNIT to counting in MAXMODE.  */
1446           if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1447             xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
1448
1449           unit = GET_MODE_BITSIZE (maxmode);
1450
1451           if (xtarget == 0
1452               || (flag_force_mem && MEM_P (xtarget)))
1453             xtarget = xspec_target = gen_reg_rtx (tmode);
1454
1455           if (GET_MODE (xtarget) != maxmode)
1456             {
1457               if (REG_P (xtarget))
1458                 {
1459                   int wider = (GET_MODE_SIZE (maxmode)
1460                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1461                   xtarget = gen_lowpart (maxmode, xtarget);
1462                   if (wider)
1463                     xspec_target_subreg = xtarget;
1464                 }
1465               else
1466                 xtarget = gen_reg_rtx (maxmode);
1467             }
1468
1469           /* If this machine's extzv insists on a register target,
1470              make sure we have one.  */
1471           if (! ((*insn_data[(int) CODE_FOR_extzv].operand[0].predicate)
1472                  (xtarget, maxmode)))
1473             xtarget = gen_reg_rtx (maxmode);
1474
1475           bitsize_rtx = GEN_INT (bitsize);
1476           bitpos_rtx = GEN_INT (xbitpos);
1477
1478           pat = gen_extzv (xtarget, xop0, bitsize_rtx, bitpos_rtx);
1479           if (pat)
1480             {
1481               emit_insn (pat);
1482               target = xtarget;
1483               spec_target = xspec_target;
1484               spec_target_subreg = xspec_target_subreg;
1485             }
1486           else
1487             {
1488               delete_insns_since (last);
1489               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1490                                                 bitpos, target, 1);
1491             }
1492         }
1493       else
1494       extzv_loses:
1495         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1496                                           bitpos, target, 1);
1497     }
1498   else
1499     {
1500       if (HAVE_extv
1501           && (GET_MODE_BITSIZE (extv_mode) >= bitsize)
1502           && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1503                 && (bitsize + bitpos > GET_MODE_BITSIZE (extv_mode))))
1504         {
1505           int xbitpos = bitpos, xoffset = offset;
1506           rtx bitsize_rtx, bitpos_rtx;
1507           rtx last = get_last_insn ();
1508           rtx xop0 = op0, xtarget = target;
1509           rtx xspec_target = spec_target;
1510           rtx xspec_target_subreg = spec_target_subreg;
1511           rtx pat;
1512           enum machine_mode maxmode = mode_for_extraction (EP_extv, 0);
1513
1514           if (MEM_P (xop0))
1515             {
1516               /* Is the memory operand acceptable?  */
1517               if (! ((*insn_data[(int) CODE_FOR_extv].operand[1].predicate)
1518                      (xop0, GET_MODE (xop0))))
1519                 {
1520                   /* No, load into a reg and extract from there.  */
1521                   enum machine_mode bestmode;
1522
1523                   /* Get the mode to use for inserting into this field.  If
1524                      OP0 is BLKmode, get the smallest mode consistent with the
1525                      alignment. If OP0 is a non-BLKmode object that is no
1526                      wider than MAXMODE, use its mode. Otherwise, use the
1527                      smallest mode containing the field.  */
1528
1529                   if (GET_MODE (xop0) == BLKmode
1530                       || (GET_MODE_SIZE (GET_MODE (op0))
1531                           > GET_MODE_SIZE (maxmode)))
1532                     bestmode = get_best_mode (bitsize, bitnum,
1533                                               MEM_ALIGN (xop0), maxmode,
1534                                               MEM_VOLATILE_P (xop0));
1535                   else
1536                     bestmode = GET_MODE (xop0);
1537
1538                   if (bestmode == VOIDmode
1539                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1540                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1541                     goto extv_loses;
1542
1543                   /* Compute offset as multiple of this unit,
1544                      counting in bytes.  */
1545                   unit = GET_MODE_BITSIZE (bestmode);
1546                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1547                   xbitpos = bitnum % unit;
1548                   xop0 = adjust_address (xop0, bestmode, xoffset);
1549
1550                   /* Fetch it to a register in that size.  */
1551                   xop0 = force_reg (bestmode, xop0);
1552
1553                   /* XBITPOS counts within UNIT, which is what is expected.  */
1554                 }
1555               else
1556                 /* Get ref to first byte containing part of the field.  */
1557                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1558             }
1559
1560           /* If op0 is a register, we need it in MAXMODE (which is usually
1561              SImode) to make it acceptable to the format of extv.  */
1562           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1563             goto extv_loses;
1564           if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1565             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1566
1567           /* On big-endian machines, we count bits from the most significant.
1568              If the bit field insn does not, we must invert.  */
1569           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1570             xbitpos = unit - bitsize - xbitpos;
1571
1572           /* XBITPOS counts within a size of UNIT.
1573              Adjust to count within a size of MAXMODE.  */
1574           if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1575             xbitpos += (GET_MODE_BITSIZE (maxmode) - unit);
1576
1577           unit = GET_MODE_BITSIZE (maxmode);
1578
1579           if (xtarget == 0
1580               || (flag_force_mem && MEM_P (xtarget)))
1581             xtarget = xspec_target = gen_reg_rtx (tmode);
1582
1583           if (GET_MODE (xtarget) != maxmode)
1584             {
1585               if (REG_P (xtarget))
1586                 {
1587                   int wider = (GET_MODE_SIZE (maxmode)
1588                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1589                   xtarget = gen_lowpart (maxmode, xtarget);
1590                   if (wider)
1591                     xspec_target_subreg = xtarget;
1592                 }
1593               else
1594                 xtarget = gen_reg_rtx (maxmode);
1595             }
1596
1597           /* If this machine's extv insists on a register target,
1598              make sure we have one.  */
1599           if (! ((*insn_data[(int) CODE_FOR_extv].operand[0].predicate)
1600                  (xtarget, maxmode)))
1601             xtarget = gen_reg_rtx (maxmode);
1602
1603           bitsize_rtx = GEN_INT (bitsize);
1604           bitpos_rtx = GEN_INT (xbitpos);
1605
1606           pat = gen_extv (xtarget, xop0, bitsize_rtx, bitpos_rtx);
1607           if (pat)
1608             {
1609               emit_insn (pat);
1610               target = xtarget;
1611               spec_target = xspec_target;
1612               spec_target_subreg = xspec_target_subreg;
1613             }
1614           else
1615             {
1616               delete_insns_since (last);
1617               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1618                                                 bitpos, target, 0);
1619             }
1620         }
1621       else
1622       extv_loses:
1623         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1624                                           bitpos, target, 0);
1625     }
1626   if (target == spec_target)
1627     return target;
1628   if (target == spec_target_subreg)
1629     return spec_target;
1630   if (GET_MODE (target) != tmode && GET_MODE (target) != mode)
1631     {
1632       /* If the target mode is not a scalar integral, first convert to the
1633          integer mode of that size and then access it as a floating-point
1634          value via a SUBREG.  */
1635       if (!SCALAR_INT_MODE_P (tmode))
1636         {
1637           enum machine_mode smode
1638             = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1639           target = convert_to_mode (smode, target, unsignedp);
1640           target = force_reg (smode, target);
1641           return gen_lowpart (tmode, target);
1642         }
1643
1644       return convert_to_mode (tmode, target, unsignedp);
1645     }
1646   return target;
1647 }
1648 \f
1649 /* Extract a bit field using shifts and boolean operations
1650    Returns an rtx to represent the value.
1651    OP0 addresses a register (word) or memory (byte).
1652    BITPOS says which bit within the word or byte the bit field starts in.
1653    OFFSET says how many bytes farther the bit field starts;
1654     it is 0 if OP0 is a register.
1655    BITSIZE says how many bits long the bit field is.
1656     (If OP0 is a register, it may be narrower than a full word,
1657      but BITPOS still counts within a full word,
1658      which is significant on bigendian machines.)
1659
1660    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1661    If TARGET is nonzero, attempts to store the value there
1662    and return TARGET, but this is not guaranteed.
1663    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1664
1665 static rtx
1666 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1667                          unsigned HOST_WIDE_INT offset,
1668                          unsigned HOST_WIDE_INT bitsize,
1669                          unsigned HOST_WIDE_INT bitpos, rtx target,
1670                          int unsignedp)
1671 {
1672   unsigned int total_bits = BITS_PER_WORD;
1673   enum machine_mode mode;
1674
1675   if (GET_CODE (op0) == SUBREG || REG_P (op0))
1676     {
1677       /* Special treatment for a bit field split across two registers.  */
1678       if (bitsize + bitpos > BITS_PER_WORD)
1679         return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1680     }
1681   else
1682     {
1683       /* Get the proper mode to use for this field.  We want a mode that
1684          includes the entire field.  If such a mode would be larger than
1685          a word, we won't be doing the extraction the normal way.  */
1686
1687       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1688                             MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1689
1690       if (mode == VOIDmode)
1691         /* The only way this should occur is if the field spans word
1692            boundaries.  */
1693         return extract_split_bit_field (op0, bitsize,
1694                                         bitpos + offset * BITS_PER_UNIT,
1695                                         unsignedp);
1696
1697       total_bits = GET_MODE_BITSIZE (mode);
1698
1699       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1700          be in the range 0 to total_bits-1, and put any excess bytes in
1701          OFFSET.  */
1702       if (bitpos >= total_bits)
1703         {
1704           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1705           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1706                      * BITS_PER_UNIT);
1707         }
1708
1709       /* Get ref to an aligned byte, halfword, or word containing the field.
1710          Adjust BITPOS to be position within a word,
1711          and OFFSET to be the offset of that word.
1712          Then alter OP0 to refer to that word.  */
1713       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1714       offset -= (offset % (total_bits / BITS_PER_UNIT));
1715       op0 = adjust_address (op0, mode, offset);
1716     }
1717
1718   mode = GET_MODE (op0);
1719
1720   if (BYTES_BIG_ENDIAN)
1721     /* BITPOS is the distance between our msb and that of OP0.
1722        Convert it to the distance from the lsb.  */
1723     bitpos = total_bits - bitsize - bitpos;
1724
1725   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1726      We have reduced the big-endian case to the little-endian case.  */
1727
1728   if (unsignedp)
1729     {
1730       if (bitpos)
1731         {
1732           /* If the field does not already start at the lsb,
1733              shift it so it does.  */
1734           tree amount = build_int_cst (NULL_TREE, bitpos);
1735           /* Maybe propagate the target for the shift.  */
1736           /* But not if we will return it--could confuse integrate.c.  */
1737           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1738           if (tmode != mode) subtarget = 0;
1739           op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1740         }
1741       /* Convert the value to the desired mode.  */
1742       if (mode != tmode)
1743         op0 = convert_to_mode (tmode, op0, 1);
1744
1745       /* Unless the msb of the field used to be the msb when we shifted,
1746          mask out the upper bits.  */
1747
1748       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1749         return expand_binop (GET_MODE (op0), and_optab, op0,
1750                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1751                              target, 1, OPTAB_LIB_WIDEN);
1752       return op0;
1753     }
1754
1755   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1756      then arithmetic-shift its lsb to the lsb of the word.  */
1757   op0 = force_reg (mode, op0);
1758   if (mode != tmode)
1759     target = 0;
1760
1761   /* Find the narrowest integer mode that contains the field.  */
1762
1763   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1764        mode = GET_MODE_WIDER_MODE (mode))
1765     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1766       {
1767         op0 = convert_to_mode (mode, op0, 0);
1768         break;
1769       }
1770
1771   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1772     {
1773       tree amount
1774         = build_int_cst (NULL_TREE,
1775                          GET_MODE_BITSIZE (mode) - (bitsize + bitpos));
1776       /* Maybe propagate the target for the shift.  */
1777       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1778       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1779     }
1780
1781   return expand_shift (RSHIFT_EXPR, mode, op0,
1782                        build_int_cst (NULL_TREE,
1783                                       GET_MODE_BITSIZE (mode) - bitsize),
1784                        target, 0);
1785 }
1786 \f
1787 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1788    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1789    complement of that if COMPLEMENT.  The mask is truncated if
1790    necessary to the width of mode MODE.  The mask is zero-extended if
1791    BITSIZE+BITPOS is too small for MODE.  */
1792
1793 static rtx
1794 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1795 {
1796   HOST_WIDE_INT masklow, maskhigh;
1797
1798   if (bitsize == 0)
1799     masklow = 0;
1800   else if (bitpos < HOST_BITS_PER_WIDE_INT)
1801     masklow = (HOST_WIDE_INT) -1 << bitpos;
1802   else
1803     masklow = 0;
1804
1805   if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1806     masklow &= ((unsigned HOST_WIDE_INT) -1
1807                 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1808
1809   if (bitpos <= HOST_BITS_PER_WIDE_INT)
1810     maskhigh = -1;
1811   else
1812     maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1813
1814   if (bitsize == 0)
1815     maskhigh = 0;
1816   else if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1817     maskhigh &= ((unsigned HOST_WIDE_INT) -1
1818                  >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1819   else
1820     maskhigh = 0;
1821
1822   if (complement)
1823     {
1824       maskhigh = ~maskhigh;
1825       masklow = ~masklow;
1826     }
1827
1828   return immed_double_const (masklow, maskhigh, mode);
1829 }
1830
1831 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1832    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1833
1834 static rtx
1835 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1836 {
1837   unsigned HOST_WIDE_INT v = INTVAL (value);
1838   HOST_WIDE_INT low, high;
1839
1840   if (bitsize < HOST_BITS_PER_WIDE_INT)
1841     v &= ~((HOST_WIDE_INT) -1 << bitsize);
1842
1843   if (bitpos < HOST_BITS_PER_WIDE_INT)
1844     {
1845       low = v << bitpos;
1846       high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1847     }
1848   else
1849     {
1850       low = 0;
1851       high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1852     }
1853
1854   return immed_double_const (low, high, mode);
1855 }
1856 \f
1857 /* Extract a bit field from a memory by forcing the alignment of the
1858    memory.  This efficient only if the field spans at least 4 boundaries.
1859
1860    OP0 is the MEM.
1861    BITSIZE is the field width; BITPOS is the position of the first bit.
1862    UNSIGNEDP is true if the result should be zero-extended.  */
1863
1864 static rtx
1865 extract_force_align_mem_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1866                                    unsigned HOST_WIDE_INT bitpos,
1867                                    int unsignedp)
1868 {
1869   enum machine_mode mode, dmode;
1870   unsigned int m_bitsize, m_size;
1871   unsigned int sign_shift_up, sign_shift_dn;
1872   rtx base, a1, a2, v1, v2, comb, shift, result, start;
1873
1874   /* Choose a mode that will fit BITSIZE.  */
1875   mode = smallest_mode_for_size (bitsize, MODE_INT);
1876   m_size = GET_MODE_SIZE (mode);
1877   m_bitsize = GET_MODE_BITSIZE (mode);
1878
1879   /* Choose a mode twice as wide.  Fail if no such mode exists.  */
1880   dmode = mode_for_size (m_bitsize * 2, MODE_INT, false);
1881   if (dmode == BLKmode)
1882     return NULL;
1883
1884   do_pending_stack_adjust ();
1885   start = get_last_insn ();
1886
1887   /* At the end, we'll need an additional shift to deal with sign/zero
1888      extension.  By default this will be a left+right shift of the
1889      appropriate size.  But we may be able to eliminate one of them.  */
1890   sign_shift_up = sign_shift_dn = m_bitsize - bitsize;
1891
1892   if (STRICT_ALIGNMENT)
1893     {
1894       base = plus_constant (XEXP (op0, 0), bitpos / BITS_PER_UNIT);
1895       bitpos %= BITS_PER_UNIT;
1896
1897       /* We load two values to be concatenate.  There's an edge condition
1898          that bears notice -- an aligned value at the end of a page can
1899          only load one value lest we segfault.  So the two values we load
1900          are at "base & -size" and "(base + size - 1) & -size".  If base
1901          is unaligned, the addresses will be aligned and sequential; if
1902          base is aligned, the addresses will both be equal to base.  */
1903
1904       a1 = expand_simple_binop (Pmode, AND, force_operand (base, NULL),
1905                                 GEN_INT (-(HOST_WIDE_INT)m_size),
1906                                 NULL, true, OPTAB_LIB_WIDEN);
1907       mark_reg_pointer (a1, m_bitsize);
1908       v1 = gen_rtx_MEM (mode, a1);
1909       set_mem_align (v1, m_bitsize);
1910       v1 = force_reg (mode, validize_mem (v1));
1911
1912       a2 = plus_constant (base, GET_MODE_SIZE (mode) - 1);
1913       a2 = expand_simple_binop (Pmode, AND, force_operand (a2, NULL),
1914                                 GEN_INT (-(HOST_WIDE_INT)m_size),
1915                                 NULL, true, OPTAB_LIB_WIDEN);
1916       v2 = gen_rtx_MEM (mode, a2);
1917       set_mem_align (v2, m_bitsize);
1918       v2 = force_reg (mode, validize_mem (v2));
1919
1920       /* Combine these two values into a double-word value.  */
1921       if (m_bitsize == BITS_PER_WORD)
1922         {
1923           comb = gen_reg_rtx (dmode);
1924           emit_insn (gen_rtx_CLOBBER (VOIDmode, comb));
1925           emit_move_insn (gen_rtx_SUBREG (mode, comb, 0), v1);
1926           emit_move_insn (gen_rtx_SUBREG (mode, comb, m_size), v2);
1927         }
1928       else
1929         {
1930           if (BYTES_BIG_ENDIAN)
1931             comb = v1, v1 = v2, v2 = comb;
1932           v1 = convert_modes (dmode, mode, v1, true);
1933           if (v1 == NULL)
1934             goto fail;
1935           v2 = convert_modes (dmode, mode, v2, true);
1936           v2 = expand_simple_binop (dmode, ASHIFT, v2, GEN_INT (m_bitsize),
1937                                     NULL, true, OPTAB_LIB_WIDEN);
1938           if (v2 == NULL)
1939             goto fail;
1940           comb = expand_simple_binop (dmode, IOR, v1, v2, NULL,
1941                                       true, OPTAB_LIB_WIDEN);
1942           if (comb == NULL)
1943             goto fail;
1944         }
1945
1946       shift = expand_simple_binop (Pmode, AND, base, GEN_INT (m_size - 1),
1947                                    NULL, true, OPTAB_LIB_WIDEN);
1948       shift = expand_mult (Pmode, shift, GEN_INT (BITS_PER_UNIT), NULL, 1);
1949
1950       if (bitpos != 0)
1951         {
1952           if (sign_shift_up <= bitpos)
1953             bitpos -= sign_shift_up, sign_shift_up = 0;
1954           shift = expand_simple_binop (Pmode, PLUS, shift, GEN_INT (bitpos),
1955                                        NULL, true, OPTAB_LIB_WIDEN);
1956         }
1957     }
1958   else
1959     {
1960       unsigned HOST_WIDE_INT offset = bitpos / BITS_PER_UNIT;
1961       bitpos %= BITS_PER_UNIT;
1962
1963       /* When strict alignment is not required, we can just load directly
1964          from memory without masking.  If the remaining BITPOS offset is
1965          small enough, we may be able to do all operations in MODE as
1966          opposed to DMODE.  */
1967       if (bitpos + bitsize <= m_bitsize)
1968         dmode = mode;
1969       comb = adjust_address (op0, dmode, offset);
1970
1971       if (sign_shift_up <= bitpos)
1972         bitpos -= sign_shift_up, sign_shift_up = 0;
1973       shift = GEN_INT (bitpos);
1974     }
1975
1976   /* Shift down the double-word such that the requested value is at bit 0.  */
1977   if (shift != const0_rtx)
1978     comb = expand_simple_binop (dmode, unsignedp ? LSHIFTRT : ASHIFTRT,
1979                                 comb, shift, NULL, unsignedp, OPTAB_LIB_WIDEN);
1980   if (comb == NULL)
1981     goto fail;
1982
1983   /* If the field exactly matches MODE, then all we need to do is return the
1984      lowpart.  Otherwise, shift to get the sign bits set properly.  */
1985   result = force_reg (mode, gen_lowpart (mode, comb));
1986
1987   if (sign_shift_up)
1988     result = expand_simple_binop (mode, ASHIFT, result,
1989                                   GEN_INT (sign_shift_up),
1990                                   NULL_RTX, 0, OPTAB_LIB_WIDEN);
1991   if (sign_shift_dn)
1992     result = expand_simple_binop (mode, unsignedp ? LSHIFTRT : ASHIFTRT,
1993                                   result, GEN_INT (sign_shift_dn),
1994                                   NULL_RTX, 0, OPTAB_LIB_WIDEN);
1995
1996   return result;
1997
1998  fail:
1999   delete_insns_since (start);
2000   return NULL;
2001 }
2002
2003 /* Extract a bit field that is split across two words
2004    and return an RTX for the result.
2005
2006    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2007    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2008    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
2009
2010 static rtx
2011 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
2012                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
2013 {
2014   unsigned int unit;
2015   unsigned int bitsdone = 0;
2016   rtx result = NULL_RTX;
2017   int first = 1;
2018
2019   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2020      much at a time.  */
2021   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2022     unit = BITS_PER_WORD;
2023   else
2024     {
2025       unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2026       if (0 && bitsize / unit > 2)
2027         {
2028           rtx tmp = extract_force_align_mem_bit_field (op0, bitsize, bitpos,
2029                                                        unsignedp);
2030           if (tmp)
2031             return tmp;
2032         }
2033     }
2034
2035   while (bitsdone < bitsize)
2036     {
2037       unsigned HOST_WIDE_INT thissize;
2038       rtx part, word;
2039       unsigned HOST_WIDE_INT thispos;
2040       unsigned HOST_WIDE_INT offset;
2041
2042       offset = (bitpos + bitsdone) / unit;
2043       thispos = (bitpos + bitsdone) % unit;
2044
2045       /* THISSIZE must not overrun a word boundary.  Otherwise,
2046          extract_fixed_bit_field will call us again, and we will mutually
2047          recurse forever.  */
2048       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2049       thissize = MIN (thissize, unit - thispos);
2050
2051       /* If OP0 is a register, then handle OFFSET here.
2052
2053          When handling multiword bitfields, extract_bit_field may pass
2054          down a word_mode SUBREG of a larger REG for a bitfield that actually
2055          crosses a word boundary.  Thus, for a SUBREG, we must find
2056          the current word starting from the base register.  */
2057       if (GET_CODE (op0) == SUBREG)
2058         {
2059           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
2060           word = operand_subword_force (SUBREG_REG (op0), word_offset,
2061                                         GET_MODE (SUBREG_REG (op0)));
2062           offset = 0;
2063         }
2064       else if (REG_P (op0))
2065         {
2066           word = operand_subword_force (op0, offset, GET_MODE (op0));
2067           offset = 0;
2068         }
2069       else
2070         word = op0;
2071
2072       /* Extract the parts in bit-counting order,
2073          whose meaning is determined by BYTES_PER_UNIT.
2074          OFFSET is in UNITs, and UNIT is in bits.
2075          extract_fixed_bit_field wants offset in bytes.  */
2076       part = extract_fixed_bit_field (word_mode, word,
2077                                       offset * unit / BITS_PER_UNIT,
2078                                       thissize, thispos, 0, 1);
2079       bitsdone += thissize;
2080
2081       /* Shift this part into place for the result.  */
2082       if (BYTES_BIG_ENDIAN)
2083         {
2084           if (bitsize != bitsdone)
2085             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2086                                  build_int_cst (NULL_TREE, bitsize - bitsdone),
2087                                  0, 1);
2088         }
2089       else
2090         {
2091           if (bitsdone != thissize)
2092             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2093                                  build_int_cst (NULL_TREE,
2094                                                 bitsdone - thissize), 0, 1);
2095         }
2096
2097       if (first)
2098         result = part;
2099       else
2100         /* Combine the parts with bitwise or.  This works
2101            because we extracted each part as an unsigned bit field.  */
2102         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2103                                OPTAB_LIB_WIDEN);
2104
2105       first = 0;
2106     }
2107
2108   /* Unsigned bit field: we are done.  */
2109   if (unsignedp)
2110     return result;
2111   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2112   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2113                          build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
2114                          NULL_RTX, 0);
2115   return expand_shift (RSHIFT_EXPR, word_mode, result,
2116                        build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
2117                        NULL_RTX, 0);
2118 }
2119 \f
2120 /* Add INC into TARGET.  */
2121
2122 void
2123 expand_inc (rtx target, rtx inc)
2124 {
2125   rtx value = expand_binop (GET_MODE (target), add_optab,
2126                             target, inc,
2127                             target, 0, OPTAB_LIB_WIDEN);
2128   if (value != target)
2129     emit_move_insn (target, value);
2130 }
2131
2132 /* Subtract DEC from TARGET.  */
2133
2134 void
2135 expand_dec (rtx target, rtx dec)
2136 {
2137   rtx value = expand_binop (GET_MODE (target), sub_optab,
2138                             target, dec,
2139                             target, 0, OPTAB_LIB_WIDEN);
2140   if (value != target)
2141     emit_move_insn (target, value);
2142 }
2143 \f
2144 /* Output a shift instruction for expression code CODE,
2145    with SHIFTED being the rtx for the value to shift,
2146    and AMOUNT the tree for the amount to shift by.
2147    Store the result in the rtx TARGET, if that is convenient.
2148    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2149    Return the rtx for where the value is.  */
2150
2151 rtx
2152 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2153               tree amount, rtx target, int unsignedp)
2154 {
2155   rtx op1, temp = 0;
2156   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2157   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2158   int try;
2159
2160   /* Previously detected shift-counts computed by NEGATE_EXPR
2161      and shifted in the other direction; but that does not work
2162      on all machines.  */
2163
2164   op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0);
2165
2166   if (SHIFT_COUNT_TRUNCATED)
2167     {
2168       if (GET_CODE (op1) == CONST_INT
2169           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2170               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2171         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2172                        % GET_MODE_BITSIZE (mode));
2173       else if (GET_CODE (op1) == SUBREG
2174                && subreg_lowpart_p (op1))
2175         op1 = SUBREG_REG (op1);
2176     }
2177
2178   if (op1 == const0_rtx)
2179     return shifted;
2180
2181   /* Check whether its cheaper to implement a left shift by a constant
2182      bit count by a sequence of additions.  */
2183   if (code == LSHIFT_EXPR
2184       && GET_CODE (op1) == CONST_INT
2185       && INTVAL (op1) > 0
2186       && INTVAL (op1) < GET_MODE_BITSIZE (mode)
2187       && shift_cost[mode][INTVAL (op1)] > INTVAL (op1) * add_cost[mode])
2188     {
2189       int i;
2190       for (i = 0; i < INTVAL (op1); i++)
2191         {
2192           temp = force_reg (mode, shifted);
2193           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2194                                   unsignedp, OPTAB_LIB_WIDEN);
2195         }
2196       return shifted;
2197     }
2198
2199   for (try = 0; temp == 0 && try < 3; try++)
2200     {
2201       enum optab_methods methods;
2202
2203       if (try == 0)
2204         methods = OPTAB_DIRECT;
2205       else if (try == 1)
2206         methods = OPTAB_WIDEN;
2207       else
2208         methods = OPTAB_LIB_WIDEN;
2209
2210       if (rotate)
2211         {
2212           /* Widening does not work for rotation.  */
2213           if (methods == OPTAB_WIDEN)
2214             continue;
2215           else if (methods == OPTAB_LIB_WIDEN)
2216             {
2217               /* If we have been unable to open-code this by a rotation,
2218                  do it as the IOR of two shifts.  I.e., to rotate A
2219                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2220                  where C is the bitsize of A.
2221
2222                  It is theoretically possible that the target machine might
2223                  not be able to perform either shift and hence we would
2224                  be making two libcalls rather than just the one for the
2225                  shift (similarly if IOR could not be done).  We will allow
2226                  this extremely unlikely lossage to avoid complicating the
2227                  code below.  */
2228
2229               rtx subtarget = target == shifted ? 0 : target;
2230               rtx temp1;
2231               tree type = TREE_TYPE (amount);
2232               tree new_amount = make_tree (type, op1);
2233               tree other_amount
2234                 = fold (build2 (MINUS_EXPR, type,
2235                                 build_int_cst (type, GET_MODE_BITSIZE (mode)),
2236                                 amount));
2237
2238               shifted = force_reg (mode, shifted);
2239
2240               temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2241                                    mode, shifted, new_amount, subtarget, 1);
2242               temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2243                                     mode, shifted, other_amount, 0, 1);
2244               return expand_binop (mode, ior_optab, temp, temp1, target,
2245                                    unsignedp, methods);
2246             }
2247
2248           temp = expand_binop (mode,
2249                                left ? rotl_optab : rotr_optab,
2250                                shifted, op1, target, unsignedp, methods);
2251
2252           /* If we don't have the rotate, but we are rotating by a constant
2253              that is in range, try a rotate in the opposite direction.  */
2254
2255           if (temp == 0 && GET_CODE (op1) == CONST_INT
2256               && INTVAL (op1) > 0
2257               && (unsigned int) INTVAL (op1) < GET_MODE_BITSIZE (mode))
2258             temp = expand_binop (mode,
2259                                  left ? rotr_optab : rotl_optab,
2260                                  shifted,
2261                                  GEN_INT (GET_MODE_BITSIZE (mode)
2262                                           - INTVAL (op1)),
2263                                  target, unsignedp, methods);
2264         }
2265       else if (unsignedp)
2266         temp = expand_binop (mode,
2267                              left ? ashl_optab : lshr_optab,
2268                              shifted, op1, target, unsignedp, methods);
2269
2270       /* Do arithmetic shifts.
2271          Also, if we are going to widen the operand, we can just as well
2272          use an arithmetic right-shift instead of a logical one.  */
2273       if (temp == 0 && ! rotate
2274           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2275         {
2276           enum optab_methods methods1 = methods;
2277
2278           /* If trying to widen a log shift to an arithmetic shift,
2279              don't accept an arithmetic shift of the same size.  */
2280           if (unsignedp)
2281             methods1 = OPTAB_MUST_WIDEN;
2282
2283           /* Arithmetic shift */
2284
2285           temp = expand_binop (mode,
2286                                left ? ashl_optab : ashr_optab,
2287                                shifted, op1, target, unsignedp, methods1);
2288         }
2289
2290       /* We used to try extzv here for logical right shifts, but that was
2291          only useful for one machine, the VAX, and caused poor code
2292          generation there for lshrdi3, so the code was deleted and a
2293          define_expand for lshrsi3 was added to vax.md.  */
2294     }
2295
2296   gcc_assert (temp);
2297   return temp;
2298 }
2299 \f
2300 enum alg_code { alg_unknown, alg_zero, alg_m, alg_shift,
2301                   alg_add_t_m2, alg_sub_t_m2,
2302                   alg_add_factor, alg_sub_factor,
2303                   alg_add_t2_m, alg_sub_t2_m };
2304
2305 /* This structure holds the "cost" of a multiply sequence.  The
2306    "cost" field holds the total rtx_cost of every operator in the
2307    synthetic multiplication sequence, hence cost(a op b) is defined
2308    as rtx_cost(op) + cost(a) + cost(b), where cost(leaf) is zero.
2309    The "latency" field holds the minimum possible latency of the
2310    synthetic multiply, on a hypothetical infinitely parallel CPU.
2311    This is the critical path, or the maximum height, of the expression
2312    tree which is the sum of rtx_costs on the most expensive path from
2313    any leaf to the root.  Hence latency(a op b) is defined as zero for
2314    leaves and rtx_cost(op) + max(latency(a), latency(b)) otherwise.  */
2315
2316 struct mult_cost {
2317   short cost;     /* Total rtx_cost of the multiplication sequence.  */
2318   short latency;  /* The latency of the multiplication sequence.  */
2319 };
2320
2321 /* This macro is used to compare a pointer to a mult_cost against an
2322    single integer "rtx_cost" value.  This is equivalent to the macro
2323    CHEAPER_MULT_COST(X,Z) where Z = {Y,Y}.  */
2324 #define MULT_COST_LESS(X,Y) ((X)->cost < (Y)    \
2325                              || ((X)->cost == (Y) && (X)->latency < (Y)))
2326
2327 /* This macro is used to compare two pointers to mult_costs against
2328    each other.  The macro returns true if X is cheaper than Y.
2329    Currently, the cheaper of two mult_costs is the one with the
2330    lower "cost".  If "cost"s are tied, the lower latency is cheaper.  */
2331 #define CHEAPER_MULT_COST(X,Y)  ((X)->cost < (Y)->cost          \
2332                                  || ((X)->cost == (Y)->cost     \
2333                                      && (X)->latency < (Y)->latency))
2334
2335 /* This structure records a sequence of operations.
2336    `ops' is the number of operations recorded.
2337    `cost' is their total cost.
2338    The operations are stored in `op' and the corresponding
2339    logarithms of the integer coefficients in `log'.
2340
2341    These are the operations:
2342    alg_zero             total := 0;
2343    alg_m                total := multiplicand;
2344    alg_shift            total := total * coeff
2345    alg_add_t_m2         total := total + multiplicand * coeff;
2346    alg_sub_t_m2         total := total - multiplicand * coeff;
2347    alg_add_factor       total := total * coeff + total;
2348    alg_sub_factor       total := total * coeff - total;
2349    alg_add_t2_m         total := total * coeff + multiplicand;
2350    alg_sub_t2_m         total := total * coeff - multiplicand;
2351
2352    The first operand must be either alg_zero or alg_m.  */
2353
2354 struct algorithm
2355 {
2356   struct mult_cost cost;
2357   short ops;
2358   /* The size of the OP and LOG fields are not directly related to the
2359      word size, but the worst-case algorithms will be if we have few
2360      consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2361      In that case we will generate shift-by-2, add, shift-by-2, add,...,
2362      in total wordsize operations.  */
2363   enum alg_code op[MAX_BITS_PER_WORD];
2364   char log[MAX_BITS_PER_WORD];
2365 };
2366
2367 /* The entry for our multiplication cache/hash table.  */
2368 struct alg_hash_entry {
2369   /* The number we are multiplying by.  */
2370   unsigned int t;
2371
2372   /* The mode in which we are multiplying something by T.  */
2373   enum machine_mode mode;
2374
2375   /* The best multiplication algorithm for t.  */
2376   enum alg_code alg;
2377 };
2378
2379 /* The number of cache/hash entries.  */
2380 #define NUM_ALG_HASH_ENTRIES 307
2381
2382 /* Each entry of ALG_HASH caches alg_code for some integer.  This is
2383    actually a hash table.  If we have a collision, that the older
2384    entry is kicked out.  */
2385 static struct alg_hash_entry alg_hash[NUM_ALG_HASH_ENTRIES];
2386
2387 /* Indicates the type of fixup needed after a constant multiplication.
2388    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2389    the result should be negated, and ADD_VARIANT means that the
2390    multiplicand should be added to the result.  */
2391 enum mult_variant {basic_variant, negate_variant, add_variant};
2392
2393 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2394                         const struct mult_cost *, enum machine_mode mode);
2395 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2396                                  struct algorithm *, enum mult_variant *, int);
2397 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2398                               const struct algorithm *, enum mult_variant);
2399 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2400                                                  int, rtx *, int *, int *);
2401 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2402 static rtx extract_high_half (enum machine_mode, rtx);
2403 static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2404 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2405                                        int, int);
2406 /* Compute and return the best algorithm for multiplying by T.
2407    The algorithm must cost less than cost_limit
2408    If retval.cost >= COST_LIMIT, no algorithm was found and all
2409    other field of the returned struct are undefined.
2410    MODE is the machine mode of the multiplication.  */
2411
2412 static void
2413 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2414             const struct mult_cost *cost_limit, enum machine_mode mode)
2415 {
2416   int m;
2417   struct algorithm *alg_in, *best_alg;
2418   struct mult_cost best_cost;
2419   struct mult_cost new_limit;
2420   int op_cost, op_latency;
2421   unsigned HOST_WIDE_INT q;
2422   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2423   int hash_index;
2424   bool cache_hit = false;
2425   enum alg_code cache_alg = alg_zero;
2426
2427   /* Indicate that no algorithm is yet found.  If no algorithm
2428      is found, this value will be returned and indicate failure.  */
2429   alg_out->cost.cost = cost_limit->cost + 1;
2430   alg_out->cost.latency = cost_limit->latency + 1;
2431
2432   if (cost_limit->cost < 0
2433       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2434     return;
2435
2436   /* Restrict the bits of "t" to the multiplication's mode.  */
2437   t &= GET_MODE_MASK (mode);
2438
2439   /* t == 1 can be done in zero cost.  */
2440   if (t == 1)
2441     {
2442       alg_out->ops = 1;
2443       alg_out->cost.cost = 0;
2444       alg_out->cost.latency = 0;
2445       alg_out->op[0] = alg_m;
2446       return;
2447     }
2448
2449   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2450      fail now.  */
2451   if (t == 0)
2452     {
2453       if (MULT_COST_LESS (cost_limit, zero_cost))
2454         return;
2455       else
2456         {
2457           alg_out->ops = 1;
2458           alg_out->cost.cost = zero_cost;
2459           alg_out->cost.latency = zero_cost;
2460           alg_out->op[0] = alg_zero;
2461           return;
2462         }
2463     }
2464
2465   /* We'll be needing a couple extra algorithm structures now.  */
2466
2467   alg_in = alloca (sizeof (struct algorithm));
2468   best_alg = alloca (sizeof (struct algorithm));
2469   best_cost = *cost_limit;
2470
2471   /* Compute the hash index.  */
2472   hash_index = (t ^ (unsigned int) mode) % NUM_ALG_HASH_ENTRIES;
2473
2474   /* See if we already know what to do for T.  */
2475   if (alg_hash[hash_index].t == t
2476       && alg_hash[hash_index].mode == mode
2477       && alg_hash[hash_index].alg != alg_unknown)
2478     {
2479       cache_hit = true;
2480       cache_alg = alg_hash[hash_index].alg;
2481       switch (cache_alg)
2482         {
2483         case alg_shift:
2484           goto do_alg_shift;
2485
2486         case alg_add_t_m2:
2487         case alg_sub_t_m2:
2488           goto do_alg_addsub_t_m2;
2489
2490         case alg_add_factor:
2491         case alg_sub_factor:
2492           goto do_alg_addsub_factor;
2493
2494         case alg_add_t2_m:
2495           goto do_alg_add_t2_m;
2496
2497         case alg_sub_t2_m:
2498           goto do_alg_sub_t2_m;
2499
2500         default:
2501           gcc_unreachable ();
2502         }
2503     }
2504
2505   /* If we have a group of zero bits at the low-order part of T, try
2506      multiplying by the remaining bits and then doing a shift.  */
2507
2508   if ((t & 1) == 0)
2509     {
2510     do_alg_shift:
2511       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2512       if (m < maxm)
2513         {
2514           q = t >> m;
2515           /* The function expand_shift will choose between a shift and
2516              a sequence of additions, so the observed cost is given as
2517              MIN (m * add_cost[mode], shift_cost[mode][m]).  */
2518           op_cost = m * add_cost[mode];
2519           if (shift_cost[mode][m] < op_cost)
2520             op_cost = shift_cost[mode][m];
2521           new_limit.cost = best_cost.cost - op_cost;
2522           new_limit.latency = best_cost.latency - op_cost;
2523           synth_mult (alg_in, q, &new_limit, mode);
2524
2525           alg_in->cost.cost += op_cost;
2526           alg_in->cost.latency += op_cost;
2527           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2528             {
2529               struct algorithm *x;
2530               best_cost = alg_in->cost;
2531               x = alg_in, alg_in = best_alg, best_alg = x;
2532               best_alg->log[best_alg->ops] = m;
2533               best_alg->op[best_alg->ops] = alg_shift;
2534             }
2535         }
2536       if (cache_hit)
2537         goto done;
2538     }
2539
2540   /* If we have an odd number, add or subtract one.  */
2541   if ((t & 1) != 0)
2542     {
2543       unsigned HOST_WIDE_INT w;
2544
2545     do_alg_addsub_t_m2:
2546       for (w = 1; (w & t) != 0; w <<= 1)
2547         ;
2548       /* If T was -1, then W will be zero after the loop.  This is another
2549          case where T ends with ...111.  Handling this with (T + 1) and
2550          subtract 1 produces slightly better code and results in algorithm
2551          selection much faster than treating it like the ...0111 case
2552          below.  */
2553       if (w == 0
2554           || (w > 2
2555               /* Reject the case where t is 3.
2556                  Thus we prefer addition in that case.  */
2557               && t != 3))
2558         {
2559           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2560
2561           op_cost = add_cost[mode];
2562           new_limit.cost = best_cost.cost - op_cost;
2563           new_limit.latency = best_cost.latency - op_cost;
2564           synth_mult (alg_in, t + 1, &new_limit, mode);
2565
2566           alg_in->cost.cost += op_cost;
2567           alg_in->cost.latency += op_cost;
2568           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2569             {
2570               struct algorithm *x;
2571               best_cost = alg_in->cost;
2572               x = alg_in, alg_in = best_alg, best_alg = x;
2573               best_alg->log[best_alg->ops] = 0;
2574               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2575             }
2576         }
2577       else
2578         {
2579           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2580
2581           op_cost = add_cost[mode];
2582           new_limit.cost = best_cost.cost - op_cost;
2583           new_limit.latency = best_cost.latency - op_cost;
2584           synth_mult (alg_in, t - 1, &new_limit, mode);
2585
2586           alg_in->cost.cost += op_cost;
2587           alg_in->cost.latency += op_cost;
2588           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2589             {
2590               struct algorithm *x;
2591               best_cost = alg_in->cost;
2592               x = alg_in, alg_in = best_alg, best_alg = x;
2593               best_alg->log[best_alg->ops] = 0;
2594               best_alg->op[best_alg->ops] = alg_add_t_m2;
2595             }
2596         }
2597       if (cache_hit)
2598         goto done;
2599     }
2600
2601   /* Look for factors of t of the form
2602      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2603      If we find such a factor, we can multiply by t using an algorithm that
2604      multiplies by q, shift the result by m and add/subtract it to itself.
2605
2606      We search for large factors first and loop down, even if large factors
2607      are less probable than small; if we find a large factor we will find a
2608      good sequence quickly, and therefore be able to prune (by decreasing
2609      COST_LIMIT) the search.  */
2610
2611  do_alg_addsub_factor:
2612   for (m = floor_log2 (t - 1); m >= 2; m--)
2613     {
2614       unsigned HOST_WIDE_INT d;
2615
2616       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2617       if (t % d == 0 && t > d && m < maxm
2618           && (!cache_hit || cache_alg == alg_add_factor))
2619         {
2620           /* If the target has a cheap shift-and-add instruction use
2621              that in preference to a shift insn followed by an add insn.
2622              Assume that the shift-and-add is "atomic" with a latency
2623              equal to its cost, otherwise assume that on superscalar
2624              hardware the shift may be executed concurrently with the
2625              earlier steps in the algorithm.  */
2626           op_cost = add_cost[mode] + shift_cost[mode][m];
2627           if (shiftadd_cost[mode][m] < op_cost)
2628             {
2629               op_cost = shiftadd_cost[mode][m];
2630               op_latency = op_cost;
2631             }
2632           else
2633             op_latency = add_cost[mode];
2634
2635           new_limit.cost = best_cost.cost - op_cost;
2636           new_limit.latency = best_cost.latency - op_latency;
2637           synth_mult (alg_in, t / d, &new_limit, mode);
2638
2639           alg_in->cost.cost += op_cost;
2640           alg_in->cost.latency += op_latency;
2641           if (alg_in->cost.latency < op_cost)
2642             alg_in->cost.latency = op_cost;
2643           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2644             {
2645               struct algorithm *x;
2646               best_cost = alg_in->cost;
2647               x = alg_in, alg_in = best_alg, best_alg = x;
2648               best_alg->log[best_alg->ops] = m;
2649               best_alg->op[best_alg->ops] = alg_add_factor;
2650             }
2651           /* Other factors will have been taken care of in the recursion.  */
2652           break;
2653         }
2654
2655       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2656       if (t % d == 0 && t > d && m < maxm
2657           && (!cache_hit || cache_alg == alg_sub_factor))
2658         {
2659           /* If the target has a cheap shift-and-subtract insn use
2660              that in preference to a shift insn followed by a sub insn.
2661              Assume that the shift-and-sub is "atomic" with a latency
2662              equal to it's cost, otherwise assume that on superscalar
2663              hardware the shift may be executed concurrently with the
2664              earlier steps in the algorithm.  */
2665           op_cost = add_cost[mode] + shift_cost[mode][m];
2666           if (shiftsub_cost[mode][m] < op_cost)
2667             {
2668               op_cost = shiftsub_cost[mode][m];
2669               op_latency = op_cost;
2670             }
2671           else
2672             op_latency = add_cost[mode];
2673
2674           new_limit.cost = best_cost.cost - op_cost;
2675           new_limit.latency = best_cost.latency - op_latency;
2676           synth_mult (alg_in, t / d, &new_limit, mode);
2677
2678           alg_in->cost.cost += op_cost;
2679           alg_in->cost.latency += op_latency;
2680           if (alg_in->cost.latency < op_cost)
2681             alg_in->cost.latency = op_cost;
2682           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2683             {
2684               struct algorithm *x;
2685               best_cost = alg_in->cost;
2686               x = alg_in, alg_in = best_alg, best_alg = x;
2687               best_alg->log[best_alg->ops] = m;
2688               best_alg->op[best_alg->ops] = alg_sub_factor;
2689             }
2690           break;
2691         }
2692     }
2693   if (cache_hit)
2694     goto done;
2695
2696   /* Try shift-and-add (load effective address) instructions,
2697      i.e. do a*3, a*5, a*9.  */
2698   if ((t & 1) != 0)
2699     {
2700     do_alg_add_t2_m:
2701       q = t - 1;
2702       q = q & -q;
2703       m = exact_log2 (q);
2704       if (m >= 0 && m < maxm)
2705         {
2706           op_cost = shiftadd_cost[mode][m];
2707           new_limit.cost = best_cost.cost - op_cost;
2708           new_limit.latency = best_cost.latency - op_cost;
2709           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2710
2711           alg_in->cost.cost += op_cost;
2712           alg_in->cost.latency += op_cost;
2713           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2714             {
2715               struct algorithm *x;
2716               best_cost = alg_in->cost;
2717               x = alg_in, alg_in = best_alg, best_alg = x;
2718               best_alg->log[best_alg->ops] = m;
2719               best_alg->op[best_alg->ops] = alg_add_t2_m;
2720             }
2721         }
2722       if (cache_hit)
2723         goto done;
2724
2725     do_alg_sub_t2_m:
2726       q = t + 1;
2727       q = q & -q;
2728       m = exact_log2 (q);
2729       if (m >= 0 && m < maxm)
2730         {
2731           op_cost = shiftsub_cost[mode][m];
2732           new_limit.cost = best_cost.cost - op_cost;
2733           new_limit.latency = best_cost.latency - op_cost;
2734           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2735
2736           alg_in->cost.cost += op_cost;
2737           alg_in->cost.latency += op_cost;
2738           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2739             {
2740               struct algorithm *x;
2741               best_cost = alg_in->cost;
2742               x = alg_in, alg_in = best_alg, best_alg = x;
2743               best_alg->log[best_alg->ops] = m;
2744               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2745             }
2746         }
2747       if (cache_hit)
2748         goto done;
2749     }
2750
2751  done:
2752   /* If best_cost has not decreased, we have not found any algorithm.  */
2753   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2754     return;
2755
2756   /* Cache the result.  */
2757   if (!cache_hit)
2758     {
2759       alg_hash[hash_index].t = t;
2760       alg_hash[hash_index].mode = mode;
2761       alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2762     }
2763
2764   /* If we are getting a too long sequence for `struct algorithm'
2765      to record, make this search fail.  */
2766   if (best_alg->ops == MAX_BITS_PER_WORD)
2767     return;
2768
2769   /* Copy the algorithm from temporary space to the space at alg_out.
2770      We avoid using structure assignment because the majority of
2771      best_alg is normally undefined, and this is a critical function.  */
2772   alg_out->ops = best_alg->ops + 1;
2773   alg_out->cost = best_cost;
2774   memcpy (alg_out->op, best_alg->op,
2775           alg_out->ops * sizeof *alg_out->op);
2776   memcpy (alg_out->log, best_alg->log,
2777           alg_out->ops * sizeof *alg_out->log);
2778 }
2779 \f
2780 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2781    Try three variations:
2782
2783        - a shift/add sequence based on VAL itself
2784        - a shift/add sequence based on -VAL, followed by a negation
2785        - a shift/add sequence based on VAL - 1, followed by an addition.
2786
2787    Return true if the cheapest of these cost less than MULT_COST,
2788    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2789
2790 static bool
2791 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2792                      struct algorithm *alg, enum mult_variant *variant,
2793                      int mult_cost)
2794 {
2795   struct algorithm alg2;
2796   struct mult_cost limit;
2797   int op_cost;
2798
2799   *variant = basic_variant;
2800   limit.cost = mult_cost;
2801   limit.latency = mult_cost;
2802   synth_mult (alg, val, &limit, mode);
2803
2804   /* This works only if the inverted value actually fits in an
2805      `unsigned int' */
2806   if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2807     {
2808       op_cost = neg_cost[mode];
2809       if (MULT_COST_LESS (&alg->cost, mult_cost))
2810         {
2811           limit.cost = alg->cost.cost - op_cost;
2812           limit.latency = alg->cost.latency - op_cost;
2813         }
2814       else
2815         {
2816           limit.cost = mult_cost - op_cost;
2817           limit.latency = mult_cost - op_cost;
2818         }
2819
2820       synth_mult (&alg2, -val, &limit, mode);
2821       alg2.cost.cost += op_cost;
2822       alg2.cost.latency += op_cost;
2823       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2824         *alg = alg2, *variant = negate_variant;
2825     }
2826
2827   /* This proves very useful for division-by-constant.  */
2828   op_cost = add_cost[mode];
2829   if (MULT_COST_LESS (&alg->cost, mult_cost))
2830     {
2831       limit.cost = alg->cost.cost - op_cost;
2832       limit.latency = alg->cost.latency - op_cost;
2833     }
2834   else
2835     {
2836       limit.cost = mult_cost - op_cost;
2837       limit.latency = mult_cost - op_cost;
2838     }
2839
2840   synth_mult (&alg2, val - 1, &limit, mode);
2841   alg2.cost.cost += op_cost;
2842   alg2.cost.latency += op_cost;
2843   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2844     *alg = alg2, *variant = add_variant;
2845
2846   return MULT_COST_LESS (&alg->cost, mult_cost);
2847 }
2848
2849 /* A subroutine of expand_mult, used for constant multiplications.
2850    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2851    convenient.  Use the shift/add sequence described by ALG and apply
2852    the final fixup specified by VARIANT.  */
2853
2854 static rtx
2855 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2856                    rtx target, const struct algorithm *alg,
2857                    enum mult_variant variant)
2858 {
2859   HOST_WIDE_INT val_so_far;
2860   rtx insn, accum, tem;
2861   int opno;
2862   enum machine_mode nmode;
2863
2864   /* Avoid referencing memory over and over.
2865      For speed, but also for correctness when mem is volatile.  */
2866   if (MEM_P (op0))
2867     op0 = force_reg (mode, op0);
2868
2869   /* ACCUM starts out either as OP0 or as a zero, depending on
2870      the first operation.  */
2871
2872   if (alg->op[0] == alg_zero)
2873     {
2874       accum = copy_to_mode_reg (mode, const0_rtx);
2875       val_so_far = 0;
2876     }
2877   else if (alg->op[0] == alg_m)
2878     {
2879       accum = copy_to_mode_reg (mode, op0);
2880       val_so_far = 1;
2881     }
2882   else
2883     gcc_unreachable ();
2884
2885   for (opno = 1; opno < alg->ops; opno++)
2886     {
2887       int log = alg->log[opno];
2888       rtx shift_subtarget = optimize ? 0 : accum;
2889       rtx add_target
2890         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2891            && !optimize)
2892           ? target : 0;
2893       rtx accum_target = optimize ? 0 : accum;
2894
2895       switch (alg->op[opno])
2896         {
2897         case alg_shift:
2898           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2899                                 build_int_cst (NULL_TREE, log),
2900                                 NULL_RTX, 0);
2901           val_so_far <<= log;
2902           break;
2903
2904         case alg_add_t_m2:
2905           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2906                               build_int_cst (NULL_TREE, log),
2907                               NULL_RTX, 0);
2908           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2909                                  add_target ? add_target : accum_target);
2910           val_so_far += (HOST_WIDE_INT) 1 << log;
2911           break;
2912
2913         case alg_sub_t_m2:
2914           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2915                               build_int_cst (NULL_TREE, log),
2916                               NULL_RTX, 0);
2917           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2918                                  add_target ? add_target : accum_target);
2919           val_so_far -= (HOST_WIDE_INT) 1 << log;
2920           break;
2921
2922         case alg_add_t2_m:
2923           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2924                                 build_int_cst (NULL_TREE, log),
2925                                 shift_subtarget,
2926                                 0);
2927           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2928                                  add_target ? add_target : accum_target);
2929           val_so_far = (val_so_far << log) + 1;
2930           break;
2931
2932         case alg_sub_t2_m:
2933           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2934                                 build_int_cst (NULL_TREE, log),
2935                                 shift_subtarget, 0);
2936           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2937                                  add_target ? add_target : accum_target);
2938           val_so_far = (val_so_far << log) - 1;
2939           break;
2940
2941         case alg_add_factor:
2942           tem = expand_shift (LSHIFT_EXPR, mode, accum,
2943                               build_int_cst (NULL_TREE, log),
2944                               NULL_RTX, 0);
2945           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2946                                  add_target ? add_target : accum_target);
2947           val_so_far += val_so_far << log;
2948           break;
2949
2950         case alg_sub_factor:
2951           tem = expand_shift (LSHIFT_EXPR, mode, accum,
2952                               build_int_cst (NULL_TREE, log),
2953                               NULL_RTX, 0);
2954           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2955                                  (add_target
2956                                   ? add_target : (optimize ? 0 : tem)));
2957           val_so_far = (val_so_far << log) - val_so_far;
2958           break;
2959
2960         default:
2961           gcc_unreachable ();
2962         }
2963
2964       /* Write a REG_EQUAL note on the last insn so that we can cse
2965          multiplication sequences.  Note that if ACCUM is a SUBREG,
2966          we've set the inner register and must properly indicate
2967          that.  */
2968
2969       tem = op0, nmode = mode;
2970       if (GET_CODE (accum) == SUBREG)
2971         {
2972           nmode = GET_MODE (SUBREG_REG (accum));
2973           tem = gen_lowpart (nmode, op0);
2974         }
2975
2976       insn = get_last_insn ();
2977       set_unique_reg_note (insn, REG_EQUAL,
2978                            gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)));
2979     }
2980
2981   if (variant == negate_variant)
2982     {
2983       val_so_far = -val_so_far;
2984       accum = expand_unop (mode, neg_optab, accum, target, 0);
2985     }
2986   else if (variant == add_variant)
2987     {
2988       val_so_far = val_so_far + 1;
2989       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
2990     }
2991
2992   /* Compare only the bits of val and val_so_far that are significant
2993      in the result mode, to avoid sign-/zero-extension confusion.  */
2994   val &= GET_MODE_MASK (mode);
2995   val_so_far &= GET_MODE_MASK (mode);
2996   gcc_assert (val == val_so_far);
2997
2998   return accum;
2999 }
3000
3001 /* Perform a multiplication and return an rtx for the result.
3002    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3003    TARGET is a suggestion for where to store the result (an rtx).
3004
3005    We check specially for a constant integer as OP1.
3006    If you want this check for OP0 as well, then before calling
3007    you should swap the two operands if OP0 would be constant.  */
3008
3009 rtx
3010 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3011              int unsignedp)
3012 {
3013   rtx const_op1 = op1;
3014   enum mult_variant variant;
3015   struct algorithm algorithm;
3016
3017   /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3018      less than or equal in size to `unsigned int' this doesn't matter.
3019      If the mode is larger than `unsigned int', then synth_mult works only
3020      if the constant value exactly fits in an `unsigned int' without any
3021      truncation.  This means that multiplying by negative values does
3022      not work; results are off by 2^32 on a 32 bit machine.  */
3023
3024   /* If we are multiplying in DImode, it may still be a win
3025      to try to work with shifts and adds.  */
3026   if (GET_CODE (op1) == CONST_DOUBLE
3027       && GET_MODE_CLASS (GET_MODE (op1)) == MODE_INT
3028       && HOST_BITS_PER_INT >= BITS_PER_WORD
3029       && CONST_DOUBLE_HIGH (op1) == 0)
3030     const_op1 = GEN_INT (CONST_DOUBLE_LOW (op1));
3031   else if (HOST_BITS_PER_INT < GET_MODE_BITSIZE (mode)
3032            && GET_CODE (op1) == CONST_INT
3033            && INTVAL (op1) < 0)
3034     const_op1 = 0;
3035
3036   /* We used to test optimize here, on the grounds that it's better to
3037      produce a smaller program when -O is not used.
3038      But this causes such a terrible slowdown sometimes
3039      that it seems better to use synth_mult always.  */
3040
3041   if (const_op1 && GET_CODE (const_op1) == CONST_INT
3042       && (unsignedp || !flag_trapv))
3043     {
3044       HOST_WIDE_INT coeff = INTVAL (const_op1);
3045       int mult_cost;
3046
3047       /* Special case powers of two.  */
3048       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3049         {
3050           if (coeff == 0)
3051             return const0_rtx;
3052           if (coeff == 1)
3053             return op0;
3054           return expand_shift (LSHIFT_EXPR, mode, op0,
3055                                build_int_cst (NULL_TREE, floor_log2 (coeff)),
3056                                target, unsignedp);
3057         }
3058
3059       mult_cost = rtx_cost (gen_rtx_MULT (mode, op0, op1), SET);
3060       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3061                                mult_cost))
3062         return expand_mult_const (mode, op0, coeff, target,
3063                                   &algorithm, variant);
3064     }
3065
3066   if (GET_CODE (op0) == CONST_DOUBLE)
3067     {
3068       rtx temp = op0;
3069       op0 = op1;
3070       op1 = temp;
3071     }
3072
3073   /* Expand x*2.0 as x+x.  */
3074   if (GET_CODE (op1) == CONST_DOUBLE
3075       && GET_MODE_CLASS (mode) == MODE_FLOAT)
3076     {
3077       REAL_VALUE_TYPE d;
3078       REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3079
3080       if (REAL_VALUES_EQUAL (d, dconst2))
3081         {
3082           op0 = force_reg (GET_MODE (op0), op0);
3083           return expand_binop (mode, add_optab, op0, op0,
3084                                target, unsignedp, OPTAB_LIB_WIDEN);
3085         }
3086     }
3087
3088   /* This used to use umul_optab if unsigned, but for non-widening multiply
3089      there is no difference between signed and unsigned.  */
3090   op0 = expand_binop (mode,
3091                       ! unsignedp
3092                       && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3093                       ? smulv_optab : smul_optab,
3094                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3095   gcc_assert (op0);
3096   return op0;
3097 }
3098 \f
3099 /* Return the smallest n such that 2**n >= X.  */
3100
3101 int
3102 ceil_log2 (unsigned HOST_WIDE_INT x)
3103 {
3104   return floor_log2 (x - 1) + 1;
3105 }
3106
3107 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3108    replace division by D, and put the least significant N bits of the result
3109    in *MULTIPLIER_PTR and return the most significant bit.
3110
3111    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3112    needed precision is in PRECISION (should be <= N).
3113
3114    PRECISION should be as small as possible so this function can choose
3115    multiplier more freely.
3116
3117    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3118    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3119
3120    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3121    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3122
3123 static
3124 unsigned HOST_WIDE_INT
3125 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3126                    rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr)
3127 {
3128   HOST_WIDE_INT mhigh_hi, mlow_hi;
3129   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3130   int lgup, post_shift;
3131   int pow, pow2;
3132   unsigned HOST_WIDE_INT nl, dummy1;
3133   HOST_WIDE_INT nh, dummy2;
3134
3135   /* lgup = ceil(log2(divisor)); */
3136   lgup = ceil_log2 (d);
3137
3138   gcc_assert (lgup <= n);
3139
3140   pow = n + lgup;
3141   pow2 = n + lgup - precision;
3142
3143   /* We could handle this with some effort, but this case is much
3144      better handled directly with a scc insn, so rely on caller using
3145      that.  */
3146   gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3147
3148   /* mlow = 2^(N + lgup)/d */
3149  if (pow >= HOST_BITS_PER_WIDE_INT)
3150     {
3151       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3152       nl = 0;
3153     }
3154   else
3155     {
3156       nh = 0;
3157       nl = (unsigned HOST_WIDE_INT) 1 << pow;
3158     }
3159   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3160                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3161
3162   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3163   if (pow2 >= HOST_BITS_PER_WIDE_INT)
3164     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3165   else
3166     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3167   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3168                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3169
3170   gcc_assert (!mhigh_hi || nh - d < d);
3171   gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3172   /* Assert that mlow < mhigh.  */
3173   gcc_assert (mlow_hi < mhigh_hi
3174               || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3175
3176   /* If precision == N, then mlow, mhigh exceed 2^N
3177      (but they do not exceed 2^(N+1)).  */
3178
3179   /* Reduce to lowest terms.  */
3180   for (post_shift = lgup; post_shift > 0; post_shift--)
3181     {
3182       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3183       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3184       if (ml_lo >= mh_lo)
3185         break;
3186
3187       mlow_hi = 0;
3188       mlow_lo = ml_lo;
3189       mhigh_hi = 0;
3190       mhigh_lo = mh_lo;
3191     }
3192
3193   *post_shift_ptr = post_shift;
3194   *lgup_ptr = lgup;
3195   if (n < HOST_BITS_PER_WIDE_INT)
3196     {
3197       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3198       *multiplier_ptr = GEN_INT (mhigh_lo & mask);
3199       return mhigh_lo >= mask;
3200     }
3201   else
3202     {
3203       *multiplier_ptr = GEN_INT (mhigh_lo);
3204       return mhigh_hi;
3205     }
3206 }
3207
3208 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3209    congruent to 1 (mod 2**N).  */
3210
3211 static unsigned HOST_WIDE_INT
3212 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3213 {
3214   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3215
3216   /* The algorithm notes that the choice y = x satisfies
3217      x*y == 1 mod 2^3, since x is assumed odd.
3218      Each iteration doubles the number of bits of significance in y.  */
3219
3220   unsigned HOST_WIDE_INT mask;
3221   unsigned HOST_WIDE_INT y = x;
3222   int nbit = 3;
3223
3224   mask = (n == HOST_BITS_PER_WIDE_INT
3225           ? ~(unsigned HOST_WIDE_INT) 0
3226           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3227
3228   while (nbit < n)
3229     {
3230       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3231       nbit *= 2;
3232     }
3233   return y;
3234 }
3235
3236 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3237    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3238    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3239    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3240    become signed.
3241
3242    The result is put in TARGET if that is convenient.
3243
3244    MODE is the mode of operation.  */
3245
3246 rtx
3247 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3248                              rtx op1, rtx target, int unsignedp)
3249 {
3250   rtx tem;
3251   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3252
3253   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3254                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3255                       NULL_RTX, 0);
3256   tem = expand_and (mode, tem, op1, NULL_RTX);
3257   adj_operand
3258     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3259                      adj_operand);
3260
3261   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3262                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3263                       NULL_RTX, 0);
3264   tem = expand_and (mode, tem, op0, NULL_RTX);
3265   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3266                           target);
3267
3268   return target;
3269 }
3270
3271 /* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */
3272
3273 static rtx
3274 extract_high_half (enum machine_mode mode, rtx op)
3275 {
3276   enum machine_mode wider_mode;
3277
3278   if (mode == word_mode)
3279     return gen_highpart (mode, op);
3280
3281   wider_mode = GET_MODE_WIDER_MODE (mode);
3282   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3283                      build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode)), 0, 1);
3284   return convert_modes (mode, wider_mode, op, 0);
3285 }
3286
3287 /* Like expand_mult_highpart, but only consider using a multiplication
3288    optab.  OP1 is an rtx for the constant operand.  */
3289
3290 static rtx
3291 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3292                             rtx target, int unsignedp, int max_cost)
3293 {
3294   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3295   enum machine_mode wider_mode;
3296   optab moptab;
3297   rtx tem;
3298   int size;
3299
3300   wider_mode = GET_MODE_WIDER_MODE (mode);
3301   size = GET_MODE_BITSIZE (mode);
3302
3303   /* Firstly, try using a multiplication insn that only generates the needed
3304      high part of the product, and in the sign flavor of unsignedp.  */
3305   if (mul_highpart_cost[mode] < max_cost)
3306     {
3307       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3308       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3309                           unsignedp, OPTAB_DIRECT);
3310       if (tem)
3311         return tem;
3312     }
3313
3314   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3315      Need to adjust the result after the multiplication.  */
3316   if (size - 1 < BITS_PER_WORD
3317       && (mul_highpart_cost[mode] + 2 * shift_cost[mode][size-1]
3318           + 4 * add_cost[mode] < max_cost))
3319     {
3320       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3321       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3322                           unsignedp, OPTAB_DIRECT);
3323       if (tem)
3324         /* We used the wrong signedness.  Adjust the result.  */
3325         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3326                                             tem, unsignedp);
3327     }
3328
3329   /* Try widening multiplication.  */
3330   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3331   if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3332       && mul_widen_cost[wider_mode] < max_cost)
3333     {
3334       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3335                           unsignedp, OPTAB_WIDEN);
3336       if (tem)
3337         return extract_high_half (mode, tem);
3338     }
3339
3340   /* Try widening the mode and perform a non-widening multiplication.  */
3341   if (smul_optab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3342       && size - 1 < BITS_PER_WORD
3343       && mul_cost[wider_mode] + shift_cost[mode][size-1] < max_cost)
3344     {
3345       rtx insns, wop0, wop1;
3346
3347       /* We need to widen the operands, for example to ensure the
3348          constant multiplier is correctly sign or zero extended.
3349          Use a sequence to clean-up any instructions emitted by
3350          the conversions if things don't work out.  */
3351       start_sequence ();
3352       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3353       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3354       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3355                           unsignedp, OPTAB_WIDEN);
3356       insns = get_insns ();
3357       end_sequence ();
3358
3359       if (tem)
3360         {
3361           emit_insn (insns);
3362           return extract_high_half (mode, tem);
3363         }
3364     }
3365
3366   /* Try widening multiplication of opposite signedness, and adjust.  */
3367   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3368   if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3369       && size - 1 < BITS_PER_WORD
3370       && (mul_widen_cost[wider_mode] + 2 * shift_cost[mode][size-1]
3371           + 4 * add_cost[mode] < max_cost))
3372     {
3373       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3374                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3375       if (tem != 0)
3376         {
3377           tem = extract_high_half (mode, tem);
3378           /* We used the wrong signedness.  Adjust the result.  */
3379           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3380                                               target, unsignedp);
3381         }
3382     }
3383
3384   return 0;
3385 }
3386
3387 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3388    putting the high half of the result in TARGET if that is convenient,
3389    and return where the result is.  If the operation can not be performed,
3390    0 is returned.
3391
3392    MODE is the mode of operation and result.
3393
3394    UNSIGNEDP nonzero means unsigned multiply.
3395
3396    MAX_COST is the total allowed cost for the expanded RTL.  */
3397
3398 static rtx
3399 expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3400                       rtx target, int unsignedp, int max_cost)
3401 {
3402   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3403   unsigned HOST_WIDE_INT cnst1;
3404   int extra_cost;
3405   bool sign_adjust = false;
3406   enum mult_variant variant;
3407   struct algorithm alg;
3408   rtx tem;
3409
3410   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3411   gcc_assert (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT);
3412
3413   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3414
3415   /* We can't optimize modes wider than BITS_PER_WORD.
3416      ??? We might be able to perform double-word arithmetic if
3417      mode == word_mode, however all the cost calculations in
3418      synth_mult etc. assume single-word operations.  */
3419   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3420     return expand_mult_highpart_optab (mode, op0, op1, target,
3421                                        unsignedp, max_cost);
3422
3423   extra_cost = shift_cost[mode][GET_MODE_BITSIZE (mode) - 1];
3424
3425   /* Check whether we try to multiply by a negative constant.  */
3426   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3427     {
3428       sign_adjust = true;
3429       extra_cost += add_cost[mode];
3430     }
3431
3432   /* See whether shift/add multiplication is cheap enough.  */
3433   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3434                            max_cost - extra_cost))
3435     {
3436       /* See whether the specialized multiplication optabs are
3437          cheaper than the shift/add version.  */
3438       tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3439                                         alg.cost.cost + extra_cost);
3440       if (tem)
3441         return tem;
3442
3443       tem = convert_to_mode (wider_mode, op0, unsignedp);
3444       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3445       tem = extract_high_half (mode, tem);
3446
3447       /* Adjust result for signedness.  */
3448       if (sign_adjust)
3449         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3450
3451       return tem;
3452     }
3453   return expand_mult_highpart_optab (mode, op0, op1, target,
3454                                      unsignedp, max_cost);
3455 }
3456
3457
3458 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3459
3460 static rtx
3461 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3462 {
3463   unsigned HOST_WIDE_INT masklow, maskhigh;
3464   rtx result, temp, shift, label;
3465   int logd;
3466
3467   logd = floor_log2 (d);
3468   result = gen_reg_rtx (mode);
3469
3470   /* Avoid conditional branches when they're expensive.  */
3471   if (BRANCH_COST >= 2
3472       && !optimize_size)
3473     {
3474       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3475                                       mode, 0, -1);
3476       if (signmask)
3477         {
3478           signmask = force_reg (mode, signmask);
3479           masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3480           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3481
3482           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3483              which instruction sequence to use.  If logical right shifts
3484              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3485              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3486
3487           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3488           if (lshr_optab->handlers[mode].insn_code == CODE_FOR_nothing
3489               || rtx_cost (temp, SET) > COSTS_N_INSNS (2))
3490             {
3491               temp = expand_binop (mode, xor_optab, op0, signmask,
3492                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3493               temp = expand_binop (mode, sub_optab, temp, signmask,
3494                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3495               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3496                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3497               temp = expand_binop (mode, xor_optab, temp, signmask,
3498                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3499               temp = expand_binop (mode, sub_optab, temp, signmask,
3500                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3501             }
3502           else
3503             {
3504               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3505                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3506               signmask = force_reg (mode, signmask);
3507
3508               temp = expand_binop (mode, add_optab, op0, signmask,
3509                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3510               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3511                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3512               temp = expand_binop (mode, sub_optab, temp, signmask,
3513                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3514             }
3515           return temp;
3516         }
3517     }
3518
3519   /* Mask contains the mode's signbit and the significant bits of the
3520      modulus.  By including the signbit in the operation, many targets
3521      can avoid an explicit compare operation in the following comparison
3522      against zero.  */
3523
3524   masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3525   if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3526     {
3527       masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3528       maskhigh = -1;
3529     }
3530   else
3531     maskhigh = (HOST_WIDE_INT) -1
3532                  << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3533
3534   temp = expand_binop (mode, and_optab, op0,
3535                        immed_double_const (masklow, maskhigh, mode),
3536                        result, 1, OPTAB_LIB_WIDEN);
3537   if (temp != result)
3538     emit_move_insn (result, temp);
3539
3540   label = gen_label_rtx ();
3541   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3542
3543   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3544                        0, OPTAB_LIB_WIDEN);
3545   masklow = (HOST_WIDE_INT) -1 << logd;
3546   maskhigh = -1;
3547   temp = expand_binop (mode, ior_optab, temp,
3548                        immed_double_const (masklow, maskhigh, mode),
3549                        result, 1, OPTAB_LIB_WIDEN);
3550   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3551                        0, OPTAB_LIB_WIDEN);
3552   if (temp != result)
3553     emit_move_insn (result, temp);
3554   emit_label (label);
3555   return result;
3556 }
3557
3558 /* Expand signed division of OP0 by a power of two D in mode MODE.
3559    This routine is only called for positive values of D.  */
3560
3561 static rtx
3562 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3563 {
3564   rtx temp, label;
3565   tree shift;
3566   int logd;
3567
3568   logd = floor_log2 (d);
3569   shift = build_int_cst (NULL_TREE, logd);
3570
3571   if (d == 2 && BRANCH_COST >= 1)
3572     {
3573       temp = gen_reg_rtx (mode);
3574       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3575       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3576                            0, OPTAB_LIB_WIDEN);
3577       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3578     }
3579
3580 #ifdef HAVE_conditional_move
3581   if (BRANCH_COST >= 2)
3582     {
3583       rtx temp2;
3584
3585       /* ??? emit_conditional_move forces a stack adjustment via
3586          compare_from_rtx so, if the sequence is discarded, it will
3587          be lost.  Do it now instead.  */
3588       do_pending_stack_adjust ();
3589
3590       start_sequence ();
3591       temp2 = copy_to_mode_reg (mode, op0);
3592       temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3593                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3594       temp = force_reg (mode, temp);
3595
3596       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3597       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3598                                      mode, temp, temp2, mode, 0);
3599       if (temp2)
3600         {
3601           rtx seq = get_insns ();
3602           end_sequence ();
3603           emit_insn (seq);
3604           return expand_shift (RSHIFT_EXPR, mode, temp2, shift, NULL_RTX, 0);
3605         }
3606       end_sequence ();
3607     }
3608 #endif
3609
3610   if (BRANCH_COST >= 2)
3611     {
3612       int ushift = GET_MODE_BITSIZE (mode) - logd;
3613
3614       temp = gen_reg_rtx (mode);
3615       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3616       if (shift_cost[mode][ushift] > COSTS_N_INSNS (1))
3617         temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3618                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3619       else
3620         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3621                              build_int_cst (NULL_TREE, ushift),
3622                              NULL_RTX, 1);
3623       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3624                            0, OPTAB_LIB_WIDEN);
3625       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3626     }
3627
3628   label = gen_label_rtx ();
3629   temp = copy_to_mode_reg (mode, op0);
3630   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3631   expand_inc (temp, GEN_INT (d - 1));
3632   emit_label (label);
3633   return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3634 }
3635 \f
3636 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3637    if that is convenient, and returning where the result is.
3638    You may request either the quotient or the remainder as the result;
3639    specify REM_FLAG nonzero to get the remainder.
3640
3641    CODE is the expression code for which kind of division this is;
3642    it controls how rounding is done.  MODE is the machine mode to use.
3643    UNSIGNEDP nonzero means do unsigned division.  */
3644
3645 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3646    and then correct it by or'ing in missing high bits
3647    if result of ANDI is nonzero.
3648    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3649    This could optimize to a bfexts instruction.
3650    But C doesn't use these operations, so their optimizations are
3651    left for later.  */
3652 /* ??? For modulo, we don't actually need the highpart of the first product,
3653    the low part will do nicely.  And for small divisors, the second multiply
3654    can also be a low-part only multiply or even be completely left out.
3655    E.g. to calculate the remainder of a division by 3 with a 32 bit
3656    multiply, multiply with 0x55555556 and extract the upper two bits;
3657    the result is exact for inputs up to 0x1fffffff.
3658    The input range can be reduced by using cross-sum rules.
3659    For odd divisors >= 3, the following table gives right shift counts
3660    so that if a number is shifted by an integer multiple of the given
3661    amount, the remainder stays the same:
3662    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3663    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3664    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3665    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3666    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3667
3668    Cross-sum rules for even numbers can be derived by leaving as many bits
3669    to the right alone as the divisor has zeros to the right.
3670    E.g. if x is an unsigned 32 bit number:
3671    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3672    */
3673
3674 rtx
3675 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3676                rtx op0, rtx op1, rtx target, int unsignedp)
3677 {
3678   enum machine_mode compute_mode;
3679   rtx tquotient;
3680   rtx quotient = 0, remainder = 0;
3681   rtx last;
3682   int size;
3683   rtx insn, set;
3684   optab optab1, optab2;
3685   int op1_is_constant, op1_is_pow2 = 0;
3686   int max_cost, extra_cost;
3687   static HOST_WIDE_INT last_div_const = 0;
3688   static HOST_WIDE_INT ext_op1;
3689
3690   op1_is_constant = GET_CODE (op1) == CONST_INT;
3691   if (op1_is_constant)
3692     {
3693       ext_op1 = INTVAL (op1);
3694       if (unsignedp)
3695         ext_op1 &= GET_MODE_MASK (mode);
3696       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3697                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3698     }
3699
3700   /*
3701      This is the structure of expand_divmod:
3702
3703      First comes code to fix up the operands so we can perform the operations
3704      correctly and efficiently.
3705
3706      Second comes a switch statement with code specific for each rounding mode.
3707      For some special operands this code emits all RTL for the desired
3708      operation, for other cases, it generates only a quotient and stores it in
3709      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3710      to indicate that it has not done anything.
3711
3712      Last comes code that finishes the operation.  If QUOTIENT is set and
3713      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3714      QUOTIENT is not set, it is computed using trunc rounding.
3715
3716      We try to generate special code for division and remainder when OP1 is a
3717      constant.  If |OP1| = 2**n we can use shifts and some other fast
3718      operations.  For other values of OP1, we compute a carefully selected
3719      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3720      by m.
3721
3722      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3723      half of the product.  Different strategies for generating the product are
3724      implemented in expand_mult_highpart.
3725
3726      If what we actually want is the remainder, we generate that by another
3727      by-constant multiplication and a subtraction.  */
3728
3729   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3730      code below will malfunction if we are, so check here and handle
3731      the special case if so.  */
3732   if (op1 == const1_rtx)
3733     return rem_flag ? const0_rtx : op0;
3734
3735     /* When dividing by -1, we could get an overflow.
3736      negv_optab can handle overflows.  */
3737   if (! unsignedp && op1 == constm1_rtx)
3738     {
3739       if (rem_flag)
3740         return const0_rtx;
3741       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3742                           ? negv_optab : neg_optab, op0, target, 0);
3743     }
3744
3745   if (target
3746       /* Don't use the function value register as a target
3747          since we have to read it as well as write it,
3748          and function-inlining gets confused by this.  */
3749       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3750           /* Don't clobber an operand while doing a multi-step calculation.  */
3751           || ((rem_flag || op1_is_constant)
3752               && (reg_mentioned_p (target, op0)
3753                   || (MEM_P (op0) && MEM_P (target))))
3754           || reg_mentioned_p (target, op1)
3755           || (MEM_P (op1) && MEM_P (target))))
3756     target = 0;
3757
3758   /* Get the mode in which to perform this computation.  Normally it will
3759      be MODE, but sometimes we can't do the desired operation in MODE.
3760      If so, pick a wider mode in which we can do the operation.  Convert
3761      to that mode at the start to avoid repeated conversions.
3762
3763      First see what operations we need.  These depend on the expression
3764      we are evaluating.  (We assume that divxx3 insns exist under the
3765      same conditions that modxx3 insns and that these insns don't normally
3766      fail.  If these assumptions are not correct, we may generate less
3767      efficient code in some cases.)
3768
3769      Then see if we find a mode in which we can open-code that operation
3770      (either a division, modulus, or shift).  Finally, check for the smallest
3771      mode for which we can do the operation with a library call.  */
3772
3773   /* We might want to refine this now that we have division-by-constant
3774      optimization.  Since expand_mult_highpart tries so many variants, it is
3775      not straightforward to generalize this.  Maybe we should make an array
3776      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3777
3778   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3779             ? (unsignedp ? lshr_optab : ashr_optab)
3780             : (unsignedp ? udiv_optab : sdiv_optab));
3781   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3782             ? optab1
3783             : (unsignedp ? udivmod_optab : sdivmod_optab));
3784
3785   for (compute_mode = mode; compute_mode != VOIDmode;
3786        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3787     if (optab1->handlers[compute_mode].insn_code != CODE_FOR_nothing
3788         || optab2->handlers[compute_mode].insn_code != CODE_FOR_nothing)
3789       break;
3790
3791   if (compute_mode == VOIDmode)
3792     for (compute_mode = mode; compute_mode != VOIDmode;
3793          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3794       if (optab1->handlers[compute_mode].libfunc
3795           || optab2->handlers[compute_mode].libfunc)
3796         break;
3797
3798   /* If we still couldn't find a mode, use MODE, but we'll probably abort
3799      in expand_binop.  */
3800   if (compute_mode == VOIDmode)
3801     compute_mode = mode;
3802
3803   if (target && GET_MODE (target) == compute_mode)
3804     tquotient = target;
3805   else
3806     tquotient = gen_reg_rtx (compute_mode);
3807
3808   size = GET_MODE_BITSIZE (compute_mode);
3809 #if 0
3810   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3811      (mode), and thereby get better code when OP1 is a constant.  Do that
3812      later.  It will require going over all usages of SIZE below.  */
3813   size = GET_MODE_BITSIZE (mode);
3814 #endif
3815
3816   /* Only deduct something for a REM if the last divide done was
3817      for a different constant.   Then set the constant of the last
3818      divide.  */
3819   max_cost = div_cost[compute_mode]
3820     - (rem_flag && ! (last_div_const != 0 && op1_is_constant
3821                       && INTVAL (op1) == last_div_const)
3822        ? mul_cost[compute_mode] + add_cost[compute_mode]
3823        : 0);
3824
3825   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3826
3827   /* Now convert to the best mode to use.  */
3828   if (compute_mode != mode)
3829     {
3830       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3831       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3832
3833       /* convert_modes may have placed op1 into a register, so we
3834          must recompute the following.  */
3835       op1_is_constant = GET_CODE (op1) == CONST_INT;
3836       op1_is_pow2 = (op1_is_constant
3837                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3838                           || (! unsignedp
3839                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3840     }
3841
3842   /* If one of the operands is a volatile MEM, copy it into a register.  */
3843
3844   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3845     op0 = force_reg (compute_mode, op0);
3846   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3847     op1 = force_reg (compute_mode, op1);
3848
3849   /* If we need the remainder or if OP1 is constant, we need to
3850      put OP0 in a register in case it has any queued subexpressions.  */
3851   if (rem_flag || op1_is_constant)
3852     op0 = force_reg (compute_mode, op0);
3853
3854   last = get_last_insn ();
3855
3856   /* Promote floor rounding to trunc rounding for unsigned operations.  */
3857   if (unsignedp)
3858     {
3859       if (code == FLOOR_DIV_EXPR)
3860         code = TRUNC_DIV_EXPR;
3861       if (code == FLOOR_MOD_EXPR)
3862         code = TRUNC_MOD_EXPR;
3863       if (code == EXACT_DIV_EXPR && op1_is_pow2)
3864         code = TRUNC_DIV_EXPR;
3865     }
3866
3867   if (op1 != const0_rtx)
3868     switch (code)
3869       {
3870       case TRUNC_MOD_EXPR:
3871       case TRUNC_DIV_EXPR:
3872         if (op1_is_constant)
3873           {
3874             if (unsignedp)
3875               {
3876                 unsigned HOST_WIDE_INT mh;
3877                 int pre_shift, post_shift;
3878                 int dummy;
3879                 rtx ml;
3880                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
3881                                             & GET_MODE_MASK (compute_mode));
3882
3883                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3884                   {
3885                     pre_shift = floor_log2 (d);
3886                     if (rem_flag)
3887                       {
3888                         remainder
3889                           = expand_binop (compute_mode, and_optab, op0,
3890                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3891                                           remainder, 1,
3892                                           OPTAB_LIB_WIDEN);
3893                         if (remainder)
3894                           return gen_lowpart (mode, remainder);
3895                       }
3896                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3897                                              build_int_cst (NULL_TREE,
3898                                                             pre_shift),
3899                                              tquotient, 1);
3900                   }
3901                 else if (size <= HOST_BITS_PER_WIDE_INT)
3902                   {
3903                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
3904                       {
3905                         /* Most significant bit of divisor is set; emit an scc
3906                            insn.  */
3907                         quotient = emit_store_flag (tquotient, GEU, op0, op1,
3908                                                     compute_mode, 1, 1);
3909                         if (quotient == 0)
3910                           goto fail1;
3911                       }
3912                     else
3913                       {
3914                         /* Find a suitable multiplier and right shift count
3915                            instead of multiplying with D.  */
3916
3917                         mh = choose_multiplier (d, size, size,
3918                                                 &ml, &post_shift, &dummy);
3919
3920                         /* If the suggested multiplier is more than SIZE bits,
3921                            we can do better for even divisors, using an
3922                            initial right shift.  */
3923                         if (mh != 0 && (d & 1) == 0)
3924                           {
3925                             pre_shift = floor_log2 (d & -d);
3926                             mh = choose_multiplier (d >> pre_shift, size,
3927                                                     size - pre_shift,
3928                                                     &ml, &post_shift, &dummy);
3929                             gcc_assert (!mh);
3930                           }
3931                         else
3932                           pre_shift = 0;
3933
3934                         if (mh != 0)
3935                           {
3936                             rtx t1, t2, t3, t4;
3937
3938                             if (post_shift - 1 >= BITS_PER_WORD)
3939                               goto fail1;
3940
3941                             extra_cost
3942                               = (shift_cost[compute_mode][post_shift - 1]
3943                                  + shift_cost[compute_mode][1]
3944                                  + 2 * add_cost[compute_mode]);
3945                             t1 = expand_mult_highpart (compute_mode, op0, ml,
3946                                                        NULL_RTX, 1,
3947                                                        max_cost - extra_cost);
3948                             if (t1 == 0)
3949                               goto fail1;
3950                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
3951                                                                op0, t1),
3952                                                 NULL_RTX);
3953                             t3 = expand_shift
3954                               (RSHIFT_EXPR, compute_mode, t2,
3955                                build_int_cst (NULL_TREE, 1),
3956                                NULL_RTX,1);
3957                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
3958                                                               t1, t3),
3959                                                 NULL_RTX);
3960                             quotient = expand_shift
3961                               (RSHIFT_EXPR, compute_mode, t4,
3962                                build_int_cst (NULL_TREE, post_shift - 1),
3963                                tquotient, 1);
3964                           }
3965                         else
3966                           {
3967                             rtx t1, t2;
3968
3969                             if (pre_shift >= BITS_PER_WORD
3970                                 || post_shift >= BITS_PER_WORD)
3971                               goto fail1;
3972
3973                             t1 = expand_shift
3974                               (RSHIFT_EXPR, compute_mode, op0,
3975                                build_int_cst (NULL_TREE, pre_shift),
3976                                NULL_RTX, 1);
3977                             extra_cost
3978                               = (shift_cost[compute_mode][pre_shift]
3979                                  + shift_cost[compute_mode][post_shift]);
3980                             t2 = expand_mult_highpart (compute_mode, t1, ml,
3981                                                        NULL_RTX, 1,
3982                                                        max_cost - extra_cost);
3983                             if (t2 == 0)
3984                               goto fail1;
3985                             quotient = expand_shift
3986                               (RSHIFT_EXPR, compute_mode, t2,
3987                                build_int_cst (NULL_TREE, post_shift),
3988                                tquotient, 1);
3989                           }
3990                       }
3991                   }
3992                 else            /* Too wide mode to use tricky code */
3993                   break;
3994
3995                 insn = get_last_insn ();
3996                 if (insn != last
3997                     && (set = single_set (insn)) != 0
3998                     && SET_DEST (set) == quotient)
3999                   set_unique_reg_note (insn,
4000                                        REG_EQUAL,
4001                                        gen_rtx_UDIV (compute_mode, op0, op1));
4002               }
4003             else                /* TRUNC_DIV, signed */
4004               {
4005                 unsigned HOST_WIDE_INT ml;
4006                 int lgup, post_shift;
4007                 rtx mlr;
4008                 HOST_WIDE_INT d = INTVAL (op1);
4009                 unsigned HOST_WIDE_INT abs_d = d >= 0 ? d : -d;
4010
4011                 /* n rem d = n rem -d */
4012                 if (rem_flag && d < 0)
4013                   {
4014                     d = abs_d;
4015                     op1 = gen_int_mode (abs_d, compute_mode);
4016                   }
4017
4018                 if (d == 1)
4019                   quotient = op0;
4020                 else if (d == -1)
4021                   quotient = expand_unop (compute_mode, neg_optab, op0,
4022                                           tquotient, 0);
4023                 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4024                   {
4025                     /* This case is not handled correctly below.  */
4026                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4027                                                 compute_mode, 1, 1);
4028                     if (quotient == 0)
4029                       goto fail1;
4030                   }
4031                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4032                          && (rem_flag ? smod_pow2_cheap[compute_mode]
4033                                       : sdiv_pow2_cheap[compute_mode])
4034                          /* We assume that cheap metric is true if the
4035                             optab has an expander for this mode.  */
4036                          && (((rem_flag ? smod_optab : sdiv_optab)
4037                               ->handlers[compute_mode].insn_code
4038                               != CODE_FOR_nothing)
4039                              || (sdivmod_optab->handlers[compute_mode]
4040                                  .insn_code != CODE_FOR_nothing)))
4041                   ;
4042                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4043                   {
4044                     if (rem_flag)
4045                       {
4046                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4047                         if (remainder)
4048                           return gen_lowpart (mode, remainder);
4049                       }
4050
4051                     if (sdiv_pow2_cheap[compute_mode]
4052                         && ((sdiv_optab->handlers[compute_mode].insn_code
4053                              != CODE_FOR_nothing)
4054                             || (sdivmod_optab->handlers[compute_mode].insn_code
4055                                 != CODE_FOR_nothing)))
4056                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4057                                                 compute_mode, op0,
4058                                                 gen_int_mode (abs_d,
4059                                                               compute_mode),
4060                                                 NULL_RTX, 0);
4061                     else
4062                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4063
4064                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4065                        negate the quotient.  */
4066                     if (d < 0)
4067                       {
4068                         insn = get_last_insn ();
4069                         if (insn != last
4070                             && (set = single_set (insn)) != 0
4071                             && SET_DEST (set) == quotient
4072                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4073                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4074                           set_unique_reg_note (insn,
4075                                                REG_EQUAL,
4076                                                gen_rtx_DIV (compute_mode,
4077                                                             op0,
4078                                                             GEN_INT
4079                                                             (trunc_int_for_mode
4080                                                              (abs_d,
4081                                                               compute_mode))));
4082
4083                         quotient = expand_unop (compute_mode, neg_optab,
4084                                                 quotient, quotient, 0);
4085                       }
4086                   }
4087                 else if (size <= HOST_BITS_PER_WIDE_INT)
4088                   {
4089                     choose_multiplier (abs_d, size, size - 1,
4090                                        &mlr, &post_shift, &lgup);
4091                     ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
4092                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4093                       {
4094                         rtx t1, t2, t3;
4095
4096                         if (post_shift >= BITS_PER_WORD
4097                             || size - 1 >= BITS_PER_WORD)
4098                           goto fail1;
4099
4100                         extra_cost = (shift_cost[compute_mode][post_shift]
4101                                       + shift_cost[compute_mode][size - 1]
4102                                       + add_cost[compute_mode]);
4103                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4104                                                    NULL_RTX, 0,
4105                                                    max_cost - extra_cost);
4106                         if (t1 == 0)
4107                           goto fail1;
4108                         t2 = expand_shift
4109                           (RSHIFT_EXPR, compute_mode, t1,
4110                            build_int_cst (NULL_TREE, post_shift),
4111                            NULL_RTX, 0);
4112                         t3 = expand_shift
4113                           (RSHIFT_EXPR, compute_mode, op0,
4114                            build_int_cst (NULL_TREE, size - 1),
4115                            NULL_RTX, 0);
4116                         if (d < 0)
4117                           quotient
4118                             = force_operand (gen_rtx_MINUS (compute_mode,
4119                                                             t3, t2),
4120                                              tquotient);
4121                         else
4122                           quotient
4123                             = force_operand (gen_rtx_MINUS (compute_mode,
4124                                                             t2, t3),
4125                                              tquotient);
4126                       }
4127                     else
4128                       {
4129                         rtx t1, t2, t3, t4;
4130
4131                         if (post_shift >= BITS_PER_WORD
4132                             || size - 1 >= BITS_PER_WORD)
4133                           goto fail1;
4134
4135                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4136                         mlr = gen_int_mode (ml, compute_mode);
4137                         extra_cost = (shift_cost[compute_mode][post_shift]
4138                                       + shift_cost[compute_mode][size - 1]
4139                                       + 2 * add_cost[compute_mode]);
4140                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4141                                                    NULL_RTX, 0,
4142                                                    max_cost - extra_cost);
4143                         if (t1 == 0)
4144                           goto fail1;
4145                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4146                                                           t1, op0),
4147                                             NULL_RTX);
4148                         t3 = expand_shift
4149                           (RSHIFT_EXPR, compute_mode, t2,
4150                            build_int_cst (NULL_TREE, post_shift),
4151                            NULL_RTX, 0);
4152                         t4 = expand_shift
4153                           (RSHIFT_EXPR, compute_mode, op0,
4154                            build_int_cst (NULL_TREE, size - 1),
4155                            NULL_RTX, 0);
4156                         if (d < 0)
4157                           quotient
4158                             = force_operand (gen_rtx_MINUS (compute_mode,
4159                                                             t4, t3),
4160                                              tquotient);
4161                         else
4162                           quotient
4163                             = force_operand (gen_rtx_MINUS (compute_mode,
4164                                                             t3, t4),
4165                                              tquotient);
4166                       }
4167                   }
4168                 else            /* Too wide mode to use tricky code */
4169                   break;
4170
4171                 insn = get_last_insn ();
4172                 if (insn != last
4173                     && (set = single_set (insn)) != 0
4174                     && SET_DEST (set) == quotient)
4175                   set_unique_reg_note (insn,
4176                                        REG_EQUAL,
4177                                        gen_rtx_DIV (compute_mode, op0, op1));
4178               }
4179             break;
4180           }
4181       fail1:
4182         delete_insns_since (last);
4183         break;
4184
4185       case FLOOR_DIV_EXPR:
4186       case FLOOR_MOD_EXPR:
4187       /* We will come here only for signed operations.  */
4188         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4189           {
4190             unsigned HOST_WIDE_INT mh;
4191             int pre_shift, lgup, post_shift;
4192             HOST_WIDE_INT d = INTVAL (op1);
4193             rtx ml;
4194
4195             if (d > 0)
4196               {
4197                 /* We could just as easily deal with negative constants here,
4198                    but it does not seem worth the trouble for GCC 2.6.  */
4199                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4200                   {
4201                     pre_shift = floor_log2 (d);
4202                     if (rem_flag)
4203                       {
4204                         remainder = expand_binop (compute_mode, and_optab, op0,
4205                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4206                                                   remainder, 0, OPTAB_LIB_WIDEN);
4207                         if (remainder)
4208                           return gen_lowpart (mode, remainder);
4209                       }
4210                     quotient = expand_shift
4211                       (RSHIFT_EXPR, compute_mode, op0,
4212                        build_int_cst (NULL_TREE, pre_shift),
4213                        tquotient, 0);
4214                   }
4215                 else
4216                   {
4217                     rtx t1, t2, t3, t4;
4218
4219                     mh = choose_multiplier (d, size, size - 1,
4220                                             &ml, &post_shift, &lgup);
4221                     gcc_assert (!mh);
4222
4223                     if (post_shift < BITS_PER_WORD
4224                         && size - 1 < BITS_PER_WORD)
4225                       {
4226                         t1 = expand_shift
4227                           (RSHIFT_EXPR, compute_mode, op0,
4228                            build_int_cst (NULL_TREE, size - 1),
4229                            NULL_RTX, 0);
4230                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4231                                            NULL_RTX, 0, OPTAB_WIDEN);
4232                         extra_cost = (shift_cost[compute_mode][post_shift]
4233                                       + shift_cost[compute_mode][size - 1]
4234                                       + 2 * add_cost[compute_mode]);
4235                         t3 = expand_mult_highpart (compute_mode, t2, ml,
4236                                                    NULL_RTX, 1,
4237                                                    max_cost - extra_cost);
4238                         if (t3 != 0)
4239                           {
4240                             t4 = expand_shift
4241                               (RSHIFT_EXPR, compute_mode, t3,
4242                                build_int_cst (NULL_TREE, post_shift),
4243                                NULL_RTX, 1);
4244                             quotient = expand_binop (compute_mode, xor_optab,
4245                                                      t4, t1, tquotient, 0,
4246                                                      OPTAB_WIDEN);
4247                           }
4248                       }
4249                   }
4250               }
4251             else
4252               {
4253                 rtx nsign, t1, t2, t3, t4;
4254                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4255                                                   op0, constm1_rtx), NULL_RTX);
4256                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4257                                    0, OPTAB_WIDEN);
4258                 nsign = expand_shift
4259                   (RSHIFT_EXPR, compute_mode, t2,
4260                    build_int_cst (NULL_TREE, size - 1),
4261                    NULL_RTX, 0);
4262                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4263                                     NULL_RTX);
4264                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4265                                     NULL_RTX, 0);
4266                 if (t4)
4267                   {
4268                     rtx t5;
4269                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4270                                       NULL_RTX, 0);
4271                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4272                                                             t4, t5),
4273                                               tquotient);
4274                   }
4275               }
4276           }
4277
4278         if (quotient != 0)
4279           break;
4280         delete_insns_since (last);
4281
4282         /* Try using an instruction that produces both the quotient and
4283            remainder, using truncation.  We can easily compensate the quotient
4284            or remainder to get floor rounding, once we have the remainder.
4285            Notice that we compute also the final remainder value here,
4286            and return the result right away.  */
4287         if (target == 0 || GET_MODE (target) != compute_mode)
4288           target = gen_reg_rtx (compute_mode);
4289
4290         if (rem_flag)
4291           {
4292             remainder
4293               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4294             quotient = gen_reg_rtx (compute_mode);
4295           }
4296         else
4297           {
4298             quotient
4299               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4300             remainder = gen_reg_rtx (compute_mode);
4301           }
4302
4303         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4304                                  quotient, remainder, 0))
4305           {
4306             /* This could be computed with a branch-less sequence.
4307                Save that for later.  */
4308             rtx tem;
4309             rtx label = gen_label_rtx ();
4310             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4311             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4312                                 NULL_RTX, 0, OPTAB_WIDEN);
4313             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4314             expand_dec (quotient, const1_rtx);
4315             expand_inc (remainder, op1);
4316             emit_label (label);
4317             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4318           }
4319
4320         /* No luck with division elimination or divmod.  Have to do it
4321            by conditionally adjusting op0 *and* the result.  */
4322         {
4323           rtx label1, label2, label3, label4, label5;
4324           rtx adjusted_op0;
4325           rtx tem;
4326
4327           quotient = gen_reg_rtx (compute_mode);
4328           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4329           label1 = gen_label_rtx ();
4330           label2 = gen_label_rtx ();
4331           label3 = gen_label_rtx ();
4332           label4 = gen_label_rtx ();
4333           label5 = gen_label_rtx ();
4334           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4335           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4336           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4337                               quotient, 0, OPTAB_LIB_WIDEN);
4338           if (tem != quotient)
4339             emit_move_insn (quotient, tem);
4340           emit_jump_insn (gen_jump (label5));
4341           emit_barrier ();
4342           emit_label (label1);
4343           expand_inc (adjusted_op0, const1_rtx);
4344           emit_jump_insn (gen_jump (label4));
4345           emit_barrier ();
4346           emit_label (label2);
4347           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4348           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4349                               quotient, 0, OPTAB_LIB_WIDEN);
4350           if (tem != quotient)
4351             emit_move_insn (quotient, tem);
4352           emit_jump_insn (gen_jump (label5));
4353           emit_barrier ();
4354           emit_label (label3);
4355           expand_dec (adjusted_op0, const1_rtx);
4356           emit_label (label4);
4357           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4358                               quotient, 0, OPTAB_LIB_WIDEN);
4359           if (tem != quotient)
4360             emit_move_insn (quotient, tem);
4361           expand_dec (quotient, const1_rtx);
4362           emit_label (label5);
4363         }
4364         break;
4365
4366       case CEIL_DIV_EXPR:
4367       case CEIL_MOD_EXPR:
4368         if (unsignedp)
4369           {
4370             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4371               {
4372                 rtx t1, t2, t3;
4373                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4374                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4375                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4376                                    tquotient, 1);
4377                 t2 = expand_binop (compute_mode, and_optab, op0,
4378                                    GEN_INT (d - 1),
4379                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4380                 t3 = gen_reg_rtx (compute_mode);
4381                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4382                                       compute_mode, 1, 1);
4383                 if (t3 == 0)
4384                   {
4385                     rtx lab;
4386                     lab = gen_label_rtx ();
4387                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4388                     expand_inc (t1, const1_rtx);
4389                     emit_label (lab);
4390                     quotient = t1;
4391                   }
4392                 else
4393                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4394                                                           t1, t3),
4395                                             tquotient);
4396                 break;
4397               }
4398
4399             /* Try using an instruction that produces both the quotient and
4400                remainder, using truncation.  We can easily compensate the
4401                quotient or remainder to get ceiling rounding, once we have the
4402                remainder.  Notice that we compute also the final remainder
4403                value here, and return the result right away.  */
4404             if (target == 0 || GET_MODE (target) != compute_mode)
4405               target = gen_reg_rtx (compute_mode);
4406
4407             if (rem_flag)
4408               {
4409                 remainder = (REG_P (target)
4410                              ? target : gen_reg_rtx (compute_mode));
4411                 quotient = gen_reg_rtx (compute_mode);
4412               }
4413             else
4414               {
4415                 quotient = (REG_P (target)
4416                             ? target : gen_reg_rtx (compute_mode));
4417                 remainder = gen_reg_rtx (compute_mode);
4418               }
4419
4420             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4421                                      remainder, 1))
4422               {
4423                 /* This could be computed with a branch-less sequence.
4424                    Save that for later.  */
4425                 rtx label = gen_label_rtx ();
4426                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4427                                  compute_mode, label);
4428                 expand_inc (quotient, const1_rtx);
4429                 expand_dec (remainder, op1);
4430                 emit_label (label);
4431                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4432               }
4433
4434             /* No luck with division elimination or divmod.  Have to do it
4435                by conditionally adjusting op0 *and* the result.  */
4436             {
4437               rtx label1, label2;
4438               rtx adjusted_op0, tem;
4439
4440               quotient = gen_reg_rtx (compute_mode);
4441               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4442               label1 = gen_label_rtx ();
4443               label2 = gen_label_rtx ();
4444               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4445                                compute_mode, label1);
4446               emit_move_insn  (quotient, const0_rtx);
4447               emit_jump_insn (gen_jump (label2));
4448               emit_barrier ();
4449               emit_label (label1);
4450               expand_dec (adjusted_op0, const1_rtx);
4451               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4452                                   quotient, 1, OPTAB_LIB_WIDEN);
4453               if (tem != quotient)
4454                 emit_move_insn (quotient, tem);
4455               expand_inc (quotient, const1_rtx);
4456               emit_label (label2);
4457             }
4458           }
4459         else /* signed */
4460           {
4461             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4462                 && INTVAL (op1) >= 0)
4463               {
4464                 /* This is extremely similar to the code for the unsigned case
4465                    above.  For 2.7 we should merge these variants, but for
4466                    2.6.1 I don't want to touch the code for unsigned since that
4467                    get used in C.  The signed case will only be used by other
4468                    languages (Ada).  */
4469
4470                 rtx t1, t2, t3;
4471                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4472                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4473                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4474                                    tquotient, 0);
4475                 t2 = expand_binop (compute_mode, and_optab, op0,
4476                                    GEN_INT (d - 1),
4477                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4478                 t3 = gen_reg_rtx (compute_mode);
4479                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4480                                       compute_mode, 1, 1);
4481                 if (t3 == 0)
4482                   {
4483                     rtx lab;
4484                     lab = gen_label_rtx ();
4485                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4486                     expand_inc (t1, const1_rtx);
4487                     emit_label (lab);
4488                     quotient = t1;
4489                   }
4490                 else
4491                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4492                                                           t1, t3),
4493                                             tquotient);
4494                 break;
4495               }
4496
4497             /* Try using an instruction that produces both the quotient and
4498                remainder, using truncation.  We can easily compensate the
4499                quotient or remainder to get ceiling rounding, once we have the
4500                remainder.  Notice that we compute also the final remainder
4501                value here, and return the result right away.  */
4502             if (target == 0 || GET_MODE (target) != compute_mode)
4503               target = gen_reg_rtx (compute_mode);
4504             if (rem_flag)
4505               {
4506                 remainder= (REG_P (target)
4507                             ? target : gen_reg_rtx (compute_mode));
4508                 quotient = gen_reg_rtx (compute_mode);
4509               }
4510             else
4511               {
4512                 quotient = (REG_P (target)
4513                             ? target : gen_reg_rtx (compute_mode));
4514                 remainder = gen_reg_rtx (compute_mode);
4515               }
4516
4517             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4518                                      remainder, 0))
4519               {
4520                 /* This could be computed with a branch-less sequence.
4521                    Save that for later.  */
4522                 rtx tem;
4523                 rtx label = gen_label_rtx ();
4524                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4525                                  compute_mode, label);
4526                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4527                                     NULL_RTX, 0, OPTAB_WIDEN);
4528                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4529                 expand_inc (quotient, const1_rtx);
4530                 expand_dec (remainder, op1);
4531                 emit_label (label);
4532                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4533               }
4534
4535             /* No luck with division elimination or divmod.  Have to do it
4536                by conditionally adjusting op0 *and* the result.  */
4537             {
4538               rtx label1, label2, label3, label4, label5;
4539               rtx adjusted_op0;
4540               rtx tem;
4541
4542               quotient = gen_reg_rtx (compute_mode);
4543               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4544               label1 = gen_label_rtx ();
4545               label2 = gen_label_rtx ();
4546               label3 = gen_label_rtx ();
4547               label4 = gen_label_rtx ();
4548               label5 = gen_label_rtx ();
4549               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4550               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4551                                compute_mode, label1);
4552               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4553                                   quotient, 0, OPTAB_LIB_WIDEN);
4554               if (tem != quotient)
4555                 emit_move_insn (quotient, tem);
4556               emit_jump_insn (gen_jump (label5));
4557               emit_barrier ();
4558               emit_label (label1);
4559               expand_dec (adjusted_op0, const1_rtx);
4560               emit_jump_insn (gen_jump (label4));
4561               emit_barrier ();
4562               emit_label (label2);
4563               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4564                                compute_mode, label3);
4565               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4566                                   quotient, 0, OPTAB_LIB_WIDEN);
4567               if (tem != quotient)
4568                 emit_move_insn (quotient, tem);
4569               emit_jump_insn (gen_jump (label5));
4570               emit_barrier ();
4571               emit_label (label3);
4572               expand_inc (adjusted_op0, const1_rtx);
4573               emit_label (label4);
4574               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4575                                   quotient, 0, OPTAB_LIB_WIDEN);
4576               if (tem != quotient)
4577                 emit_move_insn (quotient, tem);
4578               expand_inc (quotient, const1_rtx);
4579               emit_label (label5);
4580             }
4581           }
4582         break;
4583
4584       case EXACT_DIV_EXPR:
4585         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4586           {
4587             HOST_WIDE_INT d = INTVAL (op1);
4588             unsigned HOST_WIDE_INT ml;
4589             int pre_shift;
4590             rtx t1;
4591
4592             pre_shift = floor_log2 (d & -d);
4593             ml = invert_mod2n (d >> pre_shift, size);
4594             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4595                                build_int_cst (NULL_TREE, pre_shift),
4596                                NULL_RTX, unsignedp);
4597             quotient = expand_mult (compute_mode, t1,
4598                                     gen_int_mode (ml, compute_mode),
4599                                     NULL_RTX, 1);
4600
4601             insn = get_last_insn ();
4602             set_unique_reg_note (insn,
4603                                  REG_EQUAL,
4604                                  gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4605                                                  compute_mode,
4606                                                  op0, op1));
4607           }
4608         break;
4609
4610       case ROUND_DIV_EXPR:
4611       case ROUND_MOD_EXPR:
4612         if (unsignedp)
4613           {
4614             rtx tem;
4615             rtx label;
4616             label = gen_label_rtx ();
4617             quotient = gen_reg_rtx (compute_mode);
4618             remainder = gen_reg_rtx (compute_mode);
4619             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4620               {
4621                 rtx tem;
4622                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4623                                          quotient, 1, OPTAB_LIB_WIDEN);
4624                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4625                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4626                                           remainder, 1, OPTAB_LIB_WIDEN);
4627               }
4628             tem = plus_constant (op1, -1);
4629             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4630                                 build_int_cst (NULL_TREE, 1),
4631                                 NULL_RTX, 1);
4632             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4633             expand_inc (quotient, const1_rtx);
4634             expand_dec (remainder, op1);
4635             emit_label (label);
4636           }
4637         else
4638           {
4639             rtx abs_rem, abs_op1, tem, mask;
4640             rtx label;
4641             label = gen_label_rtx ();
4642             quotient = gen_reg_rtx (compute_mode);
4643             remainder = gen_reg_rtx (compute_mode);
4644             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4645               {
4646                 rtx tem;
4647                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4648                                          quotient, 0, OPTAB_LIB_WIDEN);
4649                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4650                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4651                                           remainder, 0, OPTAB_LIB_WIDEN);
4652               }
4653             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4654             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4655             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4656                                 build_int_cst (NULL_TREE, 1),
4657                                 NULL_RTX, 1);
4658             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4659             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4660                                 NULL_RTX, 0, OPTAB_WIDEN);
4661             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4662                                  build_int_cst (NULL_TREE, size - 1),
4663                                  NULL_RTX, 0);
4664             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4665                                 NULL_RTX, 0, OPTAB_WIDEN);
4666             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4667                                 NULL_RTX, 0, OPTAB_WIDEN);
4668             expand_inc (quotient, tem);
4669             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4670                                 NULL_RTX, 0, OPTAB_WIDEN);
4671             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4672                                 NULL_RTX, 0, OPTAB_WIDEN);
4673             expand_dec (remainder, tem);
4674             emit_label (label);
4675           }
4676         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4677
4678       default:
4679         gcc_unreachable ();
4680       }
4681
4682   if (quotient == 0)
4683     {
4684       if (target && GET_MODE (target) != compute_mode)
4685         target = 0;
4686
4687       if (rem_flag)
4688         {
4689           /* Try to produce the remainder without producing the quotient.
4690              If we seem to have a divmod pattern that does not require widening,
4691              don't try widening here.  We should really have a WIDEN argument
4692              to expand_twoval_binop, since what we'd really like to do here is
4693              1) try a mod insn in compute_mode
4694              2) try a divmod insn in compute_mode
4695              3) try a div insn in compute_mode and multiply-subtract to get
4696                 remainder
4697              4) try the same things with widening allowed.  */
4698           remainder
4699             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4700                                  op0, op1, target,
4701                                  unsignedp,
4702                                  ((optab2->handlers[compute_mode].insn_code
4703                                    != CODE_FOR_nothing)
4704                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4705           if (remainder == 0)
4706             {
4707               /* No luck there.  Can we do remainder and divide at once
4708                  without a library call?  */
4709               remainder = gen_reg_rtx (compute_mode);
4710               if (! expand_twoval_binop ((unsignedp
4711                                           ? udivmod_optab
4712                                           : sdivmod_optab),
4713                                          op0, op1,
4714                                          NULL_RTX, remainder, unsignedp))
4715                 remainder = 0;
4716             }
4717
4718           if (remainder)
4719             return gen_lowpart (mode, remainder);
4720         }
4721
4722       /* Produce the quotient.  Try a quotient insn, but not a library call.
4723          If we have a divmod in this mode, use it in preference to widening
4724          the div (for this test we assume it will not fail). Note that optab2
4725          is set to the one of the two optabs that the call below will use.  */
4726       quotient
4727         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4728                              op0, op1, rem_flag ? NULL_RTX : target,
4729                              unsignedp,
4730                              ((optab2->handlers[compute_mode].insn_code
4731                                != CODE_FOR_nothing)
4732                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4733
4734       if (quotient == 0)
4735         {
4736           /* No luck there.  Try a quotient-and-remainder insn,
4737              keeping the quotient alone.  */
4738           quotient = gen_reg_rtx (compute_mode);
4739           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4740                                      op0, op1,
4741                                      quotient, NULL_RTX, unsignedp))
4742             {
4743               quotient = 0;
4744               if (! rem_flag)
4745                 /* Still no luck.  If we are not computing the remainder,
4746                    use a library call for the quotient.  */
4747                 quotient = sign_expand_binop (compute_mode,
4748                                               udiv_optab, sdiv_optab,
4749                                               op0, op1, target,
4750                                               unsignedp, OPTAB_LIB_WIDEN);
4751             }
4752         }
4753     }
4754
4755   if (rem_flag)
4756     {
4757       if (target && GET_MODE (target) != compute_mode)
4758         target = 0;
4759
4760       if (quotient == 0)
4761         {
4762           /* No divide instruction either.  Use library for remainder.  */
4763           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4764                                          op0, op1, target,
4765                                          unsignedp, OPTAB_LIB_WIDEN);
4766           /* No remainder function.  Try a quotient-and-remainder
4767              function, keeping the remainder.  */
4768           if (!remainder)
4769             {
4770               remainder = gen_reg_rtx (compute_mode);
4771               if (!expand_twoval_binop_libfunc
4772                   (unsignedp ? udivmod_optab : sdivmod_optab,
4773                    op0, op1,
4774                    NULL_RTX, remainder,
4775                    unsignedp ? UMOD : MOD))
4776                 remainder = NULL_RTX;
4777             }
4778         }
4779       else
4780         {
4781           /* We divided.  Now finish doing X - Y * (X / Y).  */
4782           remainder = expand_mult (compute_mode, quotient, op1,
4783                                    NULL_RTX, unsignedp);
4784           remainder = expand_binop (compute_mode, sub_optab, op0,
4785                                     remainder, target, unsignedp,
4786                                     OPTAB_LIB_WIDEN);
4787         }
4788     }
4789
4790   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4791 }
4792 \f
4793 /* Return a tree node with data type TYPE, describing the value of X.
4794    Usually this is an VAR_DECL, if there is no obvious better choice.
4795    X may be an expression, however we only support those expressions
4796    generated by loop.c.  */
4797
4798 tree
4799 make_tree (tree type, rtx x)
4800 {
4801   tree t;
4802
4803   switch (GET_CODE (x))
4804     {
4805     case CONST_INT:
4806       {
4807         HOST_WIDE_INT hi = 0;
4808
4809         if (INTVAL (x) < 0
4810             && !(TYPE_UNSIGNED (type)
4811                  && (GET_MODE_BITSIZE (TYPE_MODE (type))
4812                      < HOST_BITS_PER_WIDE_INT)))
4813           hi = -1;
4814
4815         t = build_int_cst_wide (type, INTVAL (x), hi);
4816
4817         return t;
4818       }
4819
4820     case CONST_DOUBLE:
4821       if (GET_MODE (x) == VOIDmode)
4822         t = build_int_cst_wide (type,
4823                                 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4824       else
4825         {
4826           REAL_VALUE_TYPE d;
4827
4828           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4829           t = build_real (type, d);
4830         }
4831
4832       return t;
4833
4834     case CONST_VECTOR:
4835       {
4836         int i, units;
4837         rtx elt;
4838         tree t = NULL_TREE;
4839
4840         units = CONST_VECTOR_NUNITS (x);
4841
4842         /* Build a tree with vector elements.  */
4843         for (i = units - 1; i >= 0; --i)
4844           {
4845             elt = CONST_VECTOR_ELT (x, i);
4846             t = tree_cons (NULL_TREE, make_tree (type, elt), t);
4847           }
4848
4849         return build_vector (type, t);
4850       }
4851
4852     case PLUS:
4853       return fold (build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4854                            make_tree (type, XEXP (x, 1))));
4855
4856     case MINUS:
4857       return fold (build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4858                            make_tree (type, XEXP (x, 1))));
4859
4860     case NEG:
4861       return fold (build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0))));
4862
4863     case MULT:
4864       return fold (build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4865                            make_tree (type, XEXP (x, 1))));
4866
4867     case ASHIFT:
4868       return fold (build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4869                            make_tree (type, XEXP (x, 1))));
4870
4871     case LSHIFTRT:
4872       t = lang_hooks.types.unsigned_type (type);
4873       return fold_convert (type, build2 (RSHIFT_EXPR, t,
4874                                          make_tree (t, XEXP (x, 0)),
4875                                          make_tree (type, XEXP (x, 1))));
4876
4877     case ASHIFTRT:
4878       t = lang_hooks.types.signed_type (type);
4879       return fold_convert (type, build2 (RSHIFT_EXPR, t,
4880                                          make_tree (t, XEXP (x, 0)),
4881                                          make_tree (type, XEXP (x, 1))));
4882
4883     case DIV:
4884       if (TREE_CODE (type) != REAL_TYPE)
4885         t = lang_hooks.types.signed_type (type);
4886       else
4887         t = type;
4888
4889       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
4890                                          make_tree (t, XEXP (x, 0)),
4891                                          make_tree (t, XEXP (x, 1))));
4892     case UDIV:
4893       t = lang_hooks.types.unsigned_type (type);
4894       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
4895                                          make_tree (t, XEXP (x, 0)),
4896                                          make_tree (t, XEXP (x, 1))));
4897
4898     case SIGN_EXTEND:
4899     case ZERO_EXTEND:
4900       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
4901                                           GET_CODE (x) == ZERO_EXTEND);
4902       return fold_convert (type, make_tree (t, XEXP (x, 0)));
4903
4904     default:
4905       t = build_decl (VAR_DECL, NULL_TREE, type);
4906
4907       /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
4908          ptr_mode.  So convert.  */
4909       if (POINTER_TYPE_P (type))
4910         x = convert_memory_address (TYPE_MODE (type), x);
4911
4912       /* Note that we do *not* use SET_DECL_RTL here, because we do not
4913          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
4914       t->decl.rtl = x;
4915
4916       return t;
4917     }
4918 }
4919
4920 /* Check whether the multiplication X * MULT + ADD overflows.
4921    X, MULT and ADD must be CONST_*.
4922    MODE is the machine mode for the computation.
4923    X and MULT must have mode MODE.  ADD may have a different mode.
4924    So can X (defaults to same as MODE).
4925    UNSIGNEDP is nonzero to do unsigned multiplication.  */
4926
4927 bool
4928 const_mult_add_overflow_p (rtx x, rtx mult, rtx add,
4929                            enum machine_mode mode, int unsignedp)
4930 {
4931   tree type, mult_type, add_type, result;
4932
4933   type = lang_hooks.types.type_for_mode (mode, unsignedp);
4934
4935   /* In order to get a proper overflow indication from an unsigned
4936      type, we have to pretend that it's a sizetype.  */
4937   mult_type = type;
4938   if (unsignedp)
4939     {
4940       /* FIXME:It would be nice if we could step directly from this
4941          type to its sizetype equivalent.  */
4942       mult_type = build_distinct_type_copy (type);
4943       TYPE_IS_SIZETYPE (mult_type) = 1;
4944     }
4945
4946   add_type = (GET_MODE (add) == VOIDmode ? mult_type
4947               : lang_hooks.types.type_for_mode (GET_MODE (add), unsignedp));
4948
4949   result = fold (build2 (PLUS_EXPR, mult_type,
4950                          fold (build2 (MULT_EXPR, mult_type,
4951                                        make_tree (mult_type, x),
4952                                        make_tree (mult_type, mult))),
4953                          make_tree (add_type, add)));
4954
4955   return TREE_CONSTANT_OVERFLOW (result);
4956 }
4957
4958 /* Return an rtx representing the value of X * MULT + ADD.
4959    TARGET is a suggestion for where to store the result (an rtx).
4960    MODE is the machine mode for the computation.
4961    X and MULT must have mode MODE.  ADD may have a different mode.
4962    So can X (defaults to same as MODE).
4963    UNSIGNEDP is nonzero to do unsigned multiplication.
4964    This may emit insns.  */
4965
4966 rtx
4967 expand_mult_add (rtx x, rtx target, rtx mult, rtx add, enum machine_mode mode,
4968                  int unsignedp)
4969 {
4970   tree type = lang_hooks.types.type_for_mode (mode, unsignedp);
4971   tree add_type = (GET_MODE (add) == VOIDmode
4972                    ? type: lang_hooks.types.type_for_mode (GET_MODE (add),
4973                                                            unsignedp));
4974   tree result =  fold (build2 (PLUS_EXPR, type,
4975                                fold (build2 (MULT_EXPR, type,
4976                                              make_tree (type, x),
4977                                              make_tree (type, mult))),
4978                                make_tree (add_type, add)));
4979
4980   return expand_expr (result, target, VOIDmode, 0);
4981 }
4982 \f
4983 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
4984    and returning TARGET.
4985
4986    If TARGET is 0, a pseudo-register or constant is returned.  */
4987
4988 rtx
4989 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
4990 {
4991   rtx tem = 0;
4992
4993   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
4994     tem = simplify_binary_operation (AND, mode, op0, op1);
4995   if (tem == 0)
4996     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
4997
4998   if (target == 0)
4999     target = tem;
5000   else if (tem != target)
5001     emit_move_insn (target, tem);
5002   return target;
5003 }
5004 \f
5005 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5006    and storing in TARGET.  Normally return TARGET.
5007    Return 0 if that cannot be done.
5008
5009    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5010    it is VOIDmode, they cannot both be CONST_INT.
5011
5012    UNSIGNEDP is for the case where we have to widen the operands
5013    to perform the operation.  It says to use zero-extension.
5014
5015    NORMALIZEP is 1 if we should convert the result to be either zero
5016    or one.  Normalize is -1 if we should convert the result to be
5017    either zero or -1.  If NORMALIZEP is zero, the result will be left
5018    "raw" out of the scc insn.  */
5019
5020 rtx
5021 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5022                  enum machine_mode mode, int unsignedp, int normalizep)
5023 {
5024   rtx subtarget;
5025   enum insn_code icode;
5026   enum machine_mode compare_mode;
5027   enum machine_mode target_mode = GET_MODE (target);
5028   rtx tem;
5029   rtx last = get_last_insn ();
5030   rtx pattern, comparison;
5031
5032   if (unsignedp)
5033     code = unsigned_condition (code);
5034
5035   /* If one operand is constant, make it the second one.  Only do this
5036      if the other operand is not constant as well.  */
5037
5038   if (swap_commutative_operands_p (op0, op1))
5039     {
5040       tem = op0;
5041       op0 = op1;
5042       op1 = tem;
5043       code = swap_condition (code);
5044     }
5045
5046   if (mode == VOIDmode)
5047     mode = GET_MODE (op0);
5048
5049   /* For some comparisons with 1 and -1, we can convert this to
5050      comparisons with zero.  This will often produce more opportunities for
5051      store-flag insns.  */
5052
5053   switch (code)
5054     {
5055     case LT:
5056       if (op1 == const1_rtx)
5057         op1 = const0_rtx, code = LE;
5058       break;
5059     case LE:
5060       if (op1 == constm1_rtx)
5061         op1 = const0_rtx, code = LT;
5062       break;
5063     case GE:
5064       if (op1 == const1_rtx)
5065         op1 = const0_rtx, code = GT;
5066       break;
5067     case GT:
5068       if (op1 == constm1_rtx)
5069         op1 = const0_rtx, code = GE;
5070       break;
5071     case GEU:
5072       if (op1 == const1_rtx)
5073         op1 = const0_rtx, code = NE;
5074       break;
5075     case LTU:
5076       if (op1 == const1_rtx)
5077         op1 = const0_rtx, code = EQ;
5078       break;
5079     default:
5080       break;
5081     }
5082
5083   /* If we are comparing a double-word integer with zero or -1, we can
5084      convert the comparison into one involving a single word.  */
5085   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5086       && GET_MODE_CLASS (mode) == MODE_INT
5087       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5088     {
5089       if ((code == EQ || code == NE)
5090           && (op1 == const0_rtx || op1 == constm1_rtx))
5091         {
5092           rtx op00, op01, op0both;
5093
5094           /* Do a logical OR or AND of the two words and compare the result.  */
5095           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5096           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5097           op0both = expand_binop (word_mode,
5098                                   op1 == const0_rtx ? ior_optab : and_optab,
5099                                   op00, op01, NULL_RTX, unsignedp, OPTAB_DIRECT);
5100
5101           if (op0both != 0)
5102             return emit_store_flag (target, code, op0both, op1, word_mode,
5103                                     unsignedp, normalizep);
5104         }
5105       else if ((code == LT || code == GE) && op1 == const0_rtx)
5106         {
5107           rtx op0h;
5108
5109           /* If testing the sign bit, can just test on high word.  */
5110           op0h = simplify_gen_subreg (word_mode, op0, mode,
5111                                       subreg_highpart_offset (word_mode, mode));
5112           return emit_store_flag (target, code, op0h, op1, word_mode,
5113                                   unsignedp, normalizep);
5114         }
5115     }
5116
5117   /* From now on, we won't change CODE, so set ICODE now.  */
5118   icode = setcc_gen_code[(int) code];
5119
5120   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5121      complement of A (for GE) and shifting the sign bit to the low bit.  */
5122   if (op1 == const0_rtx && (code == LT || code == GE)
5123       && GET_MODE_CLASS (mode) == MODE_INT
5124       && (normalizep || STORE_FLAG_VALUE == 1
5125           || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5126               && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5127                   == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
5128     {
5129       subtarget = target;
5130
5131       /* If the result is to be wider than OP0, it is best to convert it
5132          first.  If it is to be narrower, it is *incorrect* to convert it
5133          first.  */
5134       if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5135         {
5136           op0 = convert_modes (target_mode, mode, op0, 0);
5137           mode = target_mode;
5138         }
5139
5140       if (target_mode != mode)
5141         subtarget = 0;
5142
5143       if (code == GE)
5144         op0 = expand_unop (mode, one_cmpl_optab, op0,
5145                            ((STORE_FLAG_VALUE == 1 || normalizep)
5146                             ? 0 : subtarget), 0);
5147
5148       if (STORE_FLAG_VALUE == 1 || normalizep)
5149         /* If we are supposed to produce a 0/1 value, we want to do
5150            a logical shift from the sign bit to the low-order bit; for
5151            a -1/0 value, we do an arithmetic shift.  */
5152         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5153                             size_int (GET_MODE_BITSIZE (mode) - 1),
5154                             subtarget, normalizep != -1);
5155
5156       if (mode != target_mode)
5157         op0 = convert_modes (target_mode, mode, op0, 0);
5158
5159       return op0;
5160     }
5161
5162   if (icode != CODE_FOR_nothing)
5163     {
5164       insn_operand_predicate_fn pred;
5165
5166       /* We think we may be able to do this with a scc insn.  Emit the
5167          comparison and then the scc insn.  */
5168
5169       do_pending_stack_adjust ();
5170       last = get_last_insn ();
5171
5172       comparison
5173         = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX);
5174       if (CONSTANT_P (comparison))
5175         {
5176           switch (GET_CODE (comparison))
5177             {
5178             case CONST_INT:
5179               if (comparison == const0_rtx)
5180                 return const0_rtx;
5181               break;
5182
5183 #ifdef FLOAT_STORE_FLAG_VALUE
5184             case CONST_DOUBLE:
5185               if (comparison == CONST0_RTX (GET_MODE (comparison)))
5186                 return const0_rtx;
5187               break;
5188 #endif
5189             default:
5190               gcc_unreachable ();
5191             }
5192
5193           if (normalizep == 1)
5194             return const1_rtx;
5195           if (normalizep == -1)
5196             return constm1_rtx;
5197           return const_true_rtx;
5198         }
5199
5200       /* The code of COMPARISON may not match CODE if compare_from_rtx
5201          decided to swap its operands and reverse the original code.
5202
5203          We know that compare_from_rtx returns either a CONST_INT or
5204          a new comparison code, so it is safe to just extract the
5205          code from COMPARISON.  */
5206       code = GET_CODE (comparison);
5207
5208       /* Get a reference to the target in the proper mode for this insn.  */
5209       compare_mode = insn_data[(int) icode].operand[0].mode;
5210       subtarget = target;
5211       pred = insn_data[(int) icode].operand[0].predicate;
5212       if (optimize || ! (*pred) (subtarget, compare_mode))
5213         subtarget = gen_reg_rtx (compare_mode);
5214
5215       pattern = GEN_FCN (icode) (subtarget);
5216       if (pattern)
5217         {
5218           emit_insn (pattern);
5219
5220           /* If we are converting to a wider mode, first convert to
5221              TARGET_MODE, then normalize.  This produces better combining
5222              opportunities on machines that have a SIGN_EXTRACT when we are
5223              testing a single bit.  This mostly benefits the 68k.
5224
5225              If STORE_FLAG_VALUE does not have the sign bit set when
5226              interpreted in COMPARE_MODE, we can do this conversion as
5227              unsigned, which is usually more efficient.  */
5228           if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (compare_mode))
5229             {
5230               convert_move (target, subtarget,
5231                             (GET_MODE_BITSIZE (compare_mode)
5232                              <= HOST_BITS_PER_WIDE_INT)
5233                             && 0 == (STORE_FLAG_VALUE
5234                                      & ((HOST_WIDE_INT) 1
5235                                         << (GET_MODE_BITSIZE (compare_mode) -1))));
5236               op0 = target;
5237               compare_mode = target_mode;
5238             }
5239           else
5240             op0 = subtarget;
5241
5242           /* If we want to keep subexpressions around, don't reuse our
5243              last target.  */
5244
5245           if (optimize)
5246             subtarget = 0;
5247
5248           /* Now normalize to the proper value in COMPARE_MODE.  Sometimes
5249              we don't have to do anything.  */
5250           if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5251             ;
5252           /* STORE_FLAG_VALUE might be the most negative number, so write
5253              the comparison this way to avoid a compiler-time warning.  */
5254           else if (- normalizep == STORE_FLAG_VALUE)
5255             op0 = expand_unop (compare_mode, neg_optab, op0, subtarget, 0);
5256
5257           /* We don't want to use STORE_FLAG_VALUE < 0 below since this
5258              makes it hard to use a value of just the sign bit due to
5259              ANSI integer constant typing rules.  */
5260           else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_WIDE_INT
5261                    && (STORE_FLAG_VALUE
5262                        & ((HOST_WIDE_INT) 1
5263                           << (GET_MODE_BITSIZE (compare_mode) - 1))))
5264             op0 = expand_shift (RSHIFT_EXPR, compare_mode, op0,
5265                                 size_int (GET_MODE_BITSIZE (compare_mode) - 1),
5266                                 subtarget, normalizep == 1);
5267           else
5268             {
5269               gcc_assert (STORE_FLAG_VALUE & 1);
5270
5271               op0 = expand_and (compare_mode, op0, const1_rtx, subtarget);
5272               if (normalizep == -1)
5273                 op0 = expand_unop (compare_mode, neg_optab, op0, op0, 0);
5274             }
5275
5276           /* If we were converting to a smaller mode, do the
5277              conversion now.  */
5278           if (target_mode != compare_mode)
5279             {
5280               convert_move (target, op0, 0);
5281               return target;
5282             }
5283           else
5284             return op0;
5285         }
5286     }
5287
5288   delete_insns_since (last);
5289
5290   /* If optimizing, use different pseudo registers for each insn, instead
5291      of reusing the same pseudo.  This leads to better CSE, but slows
5292      down the compiler, since there are more pseudos */
5293   subtarget = (!optimize
5294                && (target_mode == mode)) ? target : NULL_RTX;
5295
5296   /* If we reached here, we can't do this with a scc insn.  However, there
5297      are some comparisons that can be done directly.  For example, if
5298      this is an equality comparison of integers, we can try to exclusive-or
5299      (or subtract) the two operands and use a recursive call to try the
5300      comparison with zero.  Don't do any of these cases if branches are
5301      very cheap.  */
5302
5303   if (BRANCH_COST > 0
5304       && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
5305       && op1 != const0_rtx)
5306     {
5307       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5308                           OPTAB_WIDEN);
5309
5310       if (tem == 0)
5311         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5312                             OPTAB_WIDEN);
5313       if (tem != 0)
5314         tem = emit_store_flag (target, code, tem, const0_rtx,
5315                                mode, unsignedp, normalizep);
5316       if (tem == 0)
5317         delete_insns_since (last);
5318       return tem;
5319     }
5320
5321   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5322      the constant zero.  Reject all other comparisons at this point.  Only
5323      do LE and GT if branches are expensive since they are expensive on
5324      2-operand machines.  */
5325
5326   if (BRANCH_COST == 0
5327       || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
5328       || (code != EQ && code != NE
5329           && (BRANCH_COST <= 1 || (code != LE && code != GT))))
5330     return 0;
5331
5332   /* See what we need to return.  We can only return a 1, -1, or the
5333      sign bit.  */
5334
5335   if (normalizep == 0)
5336     {
5337       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5338         normalizep = STORE_FLAG_VALUE;
5339
5340       else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5341                && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5342                    == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
5343         ;
5344       else
5345         return 0;
5346     }
5347
5348   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5349      do the necessary operation below.  */
5350
5351   tem = 0;
5352
5353   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5354      the sign bit set.  */
5355
5356   if (code == LE)
5357     {
5358       /* This is destructive, so SUBTARGET can't be OP0.  */
5359       if (rtx_equal_p (subtarget, op0))
5360         subtarget = 0;
5361
5362       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5363                           OPTAB_WIDEN);
5364       if (tem)
5365         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5366                             OPTAB_WIDEN);
5367     }
5368
5369   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5370      number of bits in the mode of OP0, minus one.  */
5371
5372   if (code == GT)
5373     {
5374       if (rtx_equal_p (subtarget, op0))
5375         subtarget = 0;
5376
5377       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5378                           size_int (GET_MODE_BITSIZE (mode) - 1),
5379                           subtarget, 0);
5380       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5381                           OPTAB_WIDEN);
5382     }
5383
5384   if (code == EQ || code == NE)
5385     {
5386       /* For EQ or NE, one way to do the comparison is to apply an operation
5387          that converts the operand into a positive number if it is nonzero
5388          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5389          for NE we negate.  This puts the result in the sign bit.  Then we
5390          normalize with a shift, if needed.
5391
5392          Two operations that can do the above actions are ABS and FFS, so try
5393          them.  If that doesn't work, and MODE is smaller than a full word,
5394          we can use zero-extension to the wider mode (an unsigned conversion)
5395          as the operation.  */
5396
5397       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5398          that is compensated by the subsequent overflow when subtracting
5399          one / negating.  */
5400
5401       if (abs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
5402         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5403       else if (ffs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
5404         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5405       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5406         {
5407           tem = convert_modes (word_mode, mode, op0, 1);
5408           mode = word_mode;
5409         }
5410
5411       if (tem != 0)
5412         {
5413           if (code == EQ)
5414             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5415                                 0, OPTAB_WIDEN);
5416           else
5417             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5418         }
5419
5420       /* If we couldn't do it that way, for NE we can "or" the two's complement
5421          of the value with itself.  For EQ, we take the one's complement of
5422          that "or", which is an extra insn, so we only handle EQ if branches
5423          are expensive.  */
5424
5425       if (tem == 0 && (code == NE || BRANCH_COST > 1))
5426         {
5427           if (rtx_equal_p (subtarget, op0))
5428             subtarget = 0;
5429
5430           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5431           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5432                               OPTAB_WIDEN);
5433
5434           if (tem && code == EQ)
5435             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5436         }
5437     }
5438
5439   if (tem && normalizep)
5440     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5441                         size_int (GET_MODE_BITSIZE (mode) - 1),
5442                         subtarget, normalizep == 1);
5443
5444   if (tem)
5445     {
5446       if (GET_MODE (tem) != target_mode)
5447         {
5448           convert_move (target, tem, 0);
5449           tem = target;
5450         }
5451       else if (!subtarget)
5452         {
5453           emit_move_insn (target, tem);
5454           tem = target;
5455         }
5456     }
5457   else
5458     delete_insns_since (last);
5459
5460   return tem;
5461 }
5462
5463 /* Like emit_store_flag, but always succeeds.  */
5464
5465 rtx
5466 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5467                        enum machine_mode mode, int unsignedp, int normalizep)
5468 {
5469   rtx tem, label;
5470
5471   /* First see if emit_store_flag can do the job.  */
5472   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5473   if (tem != 0)
5474     return tem;
5475
5476   if (normalizep == 0)
5477     normalizep = 1;
5478
5479   /* If this failed, we have to do this with set/compare/jump/set code.  */
5480
5481   if (!REG_P (target)
5482       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5483     target = gen_reg_rtx (GET_MODE (target));
5484
5485   emit_move_insn (target, const1_rtx);
5486   label = gen_label_rtx ();
5487   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5488                            NULL_RTX, label);
5489
5490   emit_move_insn (target, const0_rtx);
5491   emit_label (label);
5492
5493   return target;
5494 }
5495 \f
5496 /* Perform possibly multi-word comparison and conditional jump to LABEL
5497    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE
5498
5499    The algorithm is based on the code in expr.c:do_jump.
5500
5501    Note that this does not perform a general comparison.  Only variants
5502    generated within expmed.c are correctly handled, others abort (but could
5503    be handled if needed).  */
5504
5505 static void
5506 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5507                  rtx label)
5508 {
5509   /* If this mode is an integer too wide to compare properly,
5510      compare word by word.  Rely on cse to optimize constant cases.  */
5511
5512   if (GET_MODE_CLASS (mode) == MODE_INT
5513       && ! can_compare_p (op, mode, ccp_jump))
5514     {
5515       rtx label2 = gen_label_rtx ();
5516
5517       switch (op)
5518         {
5519         case LTU:
5520           do_jump_by_parts_greater_rtx (mode, 1, arg2, arg1, label2, label);
5521           break;
5522
5523         case LEU:
5524           do_jump_by_parts_greater_rtx (mode, 1, arg1, arg2, label, label2);
5525           break;
5526
5527         case LT:
5528           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label2, label);
5529           break;
5530
5531         case GT:
5532           do_jump_by_parts_greater_rtx (mode, 0, arg1, arg2, label2, label);
5533           break;
5534
5535         case GE:
5536           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label, label2);
5537           break;
5538
5539           /* do_jump_by_parts_equality_rtx compares with zero.  Luckily
5540              that's the only equality operations we do */
5541         case EQ:
5542           gcc_assert (arg2 == const0_rtx && mode == GET_MODE(arg1));
5543           do_jump_by_parts_equality_rtx (arg1, label2, label);
5544           break;
5545
5546         case NE:
5547           gcc_assert (arg2 == const0_rtx && mode == GET_MODE(arg1));
5548           do_jump_by_parts_equality_rtx (arg1, label, label2);
5549           break;
5550
5551         default:
5552           gcc_unreachable ();
5553         }
5554
5555       emit_label (label2);
5556     }
5557   else
5558     emit_cmp_and_jump_insns (arg1, arg2, op, NULL_RTX, mode, 0, label);
5559 }