gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 2, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING.  If not, write to the Free
  20 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  21 02111-1307, USA.  */
  22
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "coretypes.h"
  27 #include "tm.h"
  28 #include "toplev.h"
  29 #include "rtl.h"
  30 #include "tree.h"
  31 #include "tm_p.h"
  32 #include "flags.h"
  33 #include "insn-config.h"
  34 #include "expr.h"
  35 #include "optabs.h"
  36 #include "real.h"
  37 #include "recog.h"
  38 #include "langhooks.h"
  39
  40 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  41                                    unsigned HOST_WIDE_INT,
  42                                    unsigned HOST_WIDE_INT, rtx);
  43 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  44                                    unsigned HOST_WIDE_INT, rtx);
  45 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  46                                     unsigned HOST_WIDE_INT,
  47                                     unsigned HOST_WIDE_INT,
  48                                     unsigned HOST_WIDE_INT, rtx, int);
  49 static rtx mask_rtx (enum machine_mode, int, int, int);
  50 static rtx lshift_value (enum machine_mode, rtx, int, int);
  51 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  52                                     unsigned HOST_WIDE_INT, int);
  53 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  54 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  55 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  56
  57 /* Test whether a value is zero of a power of two.  */
  58 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
  59
  60 /* Nonzero means divides or modulus operations are relatively cheap for
  61    powers of two, so don't use branches; emit the operation instead.
  62    Usually, this will mean that the MD file will emit non-branch
  63    sequences.  */
  64
  65 static bool sdiv_pow2_cheap[NUM_MACHINE_MODES];
  66 static bool smod_pow2_cheap[NUM_MACHINE_MODES];
  67
  68 #ifndef SLOW_UNALIGNED_ACCESS
  69 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  70 #endif
  71
  72 /* For compilers that support multiple targets with different word sizes,
  73    MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
  74    is the H8/300(H) compiler.  */
  75
  76 #ifndef MAX_BITS_PER_WORD
  77 #define MAX_BITS_PER_WORD BITS_PER_WORD
  78 #endif
  79
  80 /* Reduce conditional compilation elsewhere.  */
  81 #ifndef HAVE_insv
  82 #define HAVE_insv       0
  83 #define CODE_FOR_insv   CODE_FOR_nothing
  84 #define gen_insv(a,b,c,d) NULL_RTX
  85 #endif
  86 #ifndef HAVE_extv
  87 #define HAVE_extv       0
  88 #define CODE_FOR_extv   CODE_FOR_nothing
  89 #define gen_extv(a,b,c,d) NULL_RTX
  90 #endif
  91 #ifndef HAVE_extzv
  92 #define HAVE_extzv      0
  93 #define CODE_FOR_extzv  CODE_FOR_nothing
  94 #define gen_extzv(a,b,c,d) NULL_RTX
  95 #endif
  96
  97 /* Cost of various pieces of RTL.  Note that some of these are indexed by
  98    shift count and some by mode.  */
  99 static int zero_cost;
 100 static int add_cost[NUM_MACHINE_MODES];
 101 static int neg_cost[NUM_MACHINE_MODES];
 102 static int shift_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 103 static int shiftadd_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 104 static int shiftsub_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 105 static int mul_cost[NUM_MACHINE_MODES];
 106 static int div_cost[NUM_MACHINE_MODES];
 107 static int mul_widen_cost[NUM_MACHINE_MODES];
 108 static int mul_highpart_cost[NUM_MACHINE_MODES];
 109
 110 void
 111 init_expmed (void)
 112 {
 113   struct
 114   {
 115     struct rtx_def reg;         rtunion reg_fld[2];
 116     struct rtx_def plus;        rtunion plus_fld1;
 117     struct rtx_def neg;
 118     struct rtx_def udiv;        rtunion udiv_fld1;
 119     struct rtx_def mult;        rtunion mult_fld1;
 120     struct rtx_def div;         rtunion div_fld1;
 121     struct rtx_def mod;         rtunion mod_fld1;
 122     struct rtx_def zext;
 123     struct rtx_def wide_mult;   rtunion wide_mult_fld1;
 124     struct rtx_def wide_lshr;   rtunion wide_lshr_fld1;
 125     struct rtx_def wide_trunc;
 126     struct rtx_def shift;       rtunion shift_fld1;
 127     struct rtx_def shift_mult;  rtunion shift_mult_fld1;
 128     struct rtx_def shift_add;   rtunion shift_add_fld1;
 129     struct rtx_def shift_sub;   rtunion shift_sub_fld1;
 130   } all;
 131
 132   rtx pow2[MAX_BITS_PER_WORD];
 133   rtx cint[MAX_BITS_PER_WORD];
 134   int m, n;
 135   enum machine_mode mode, wider_mode;
 136
 137   zero_cost = rtx_cost (const0_rtx, 0);
 138
 139   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 140     {
 141       pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 142       cint[m] = GEN_INT (m);
 143     }
 144
 145   memset (&all, 0, sizeof all);
 146
 147   PUT_CODE (&all.reg, REG);
 148   REGNO (&all.reg) = 10000;
 149
 150   PUT_CODE (&all.plus, PLUS);
 151   XEXP (&all.plus, 0) = &all.reg;
 152   XEXP (&all.plus, 1) = &all.reg;
 153
 154   PUT_CODE (&all.neg, NEG);
 155   XEXP (&all.neg, 0) = &all.reg;
 156
 157   PUT_CODE (&all.udiv, UDIV);
 158   XEXP (&all.udiv, 0) = &all.reg;
 159   XEXP (&all.udiv, 1) = &all.reg;
 160
 161   PUT_CODE (&all.mult, MULT);
 162   XEXP (&all.mult, 0) = &all.reg;
 163   XEXP (&all.mult, 1) = &all.reg;
 164
 165   PUT_CODE (&all.div, DIV);
 166   XEXP (&all.div, 0) = &all.reg;
 167   XEXP (&all.div, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
 168
 169   PUT_CODE (&all.mod, MOD);
 170   XEXP (&all.mod, 0) = &all.reg;
 171   XEXP (&all.mod, 1) = XEXP (&all.div, 1);
 172
 173   PUT_CODE (&all.zext, ZERO_EXTEND);
 174   XEXP (&all.zext, 0) = &all.reg;
 175
 176   PUT_CODE (&all.wide_mult, MULT);
 177   XEXP (&all.wide_mult, 0) = &all.zext;
 178   XEXP (&all.wide_mult, 1) = &all.zext;
 179
 180   PUT_CODE (&all.wide_lshr, LSHIFTRT);
 181   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
 182
 183   PUT_CODE (&all.wide_trunc, TRUNCATE);
 184   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
 185
 186   PUT_CODE (&all.shift, ASHIFT);
 187   XEXP (&all.shift, 0) = &all.reg;
 188
 189   PUT_CODE (&all.shift_mult, MULT);
 190   XEXP (&all.shift_mult, 0) = &all.reg;
 191
 192   PUT_CODE (&all.shift_add, PLUS);
 193   XEXP (&all.shift_add, 0) = &all.shift_mult;
 194   XEXP (&all.shift_add, 1) = &all.reg;
 195
 196   PUT_CODE (&all.shift_sub, MINUS);
 197   XEXP (&all.shift_sub, 0) = &all.shift_mult;
 198   XEXP (&all.shift_sub, 1) = &all.reg;
 199
 200   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 201        mode != VOIDmode;
 202        mode = GET_MODE_WIDER_MODE (mode))
 203     {
 204       PUT_MODE (&all.reg, mode);
 205       PUT_MODE (&all.plus, mode);
 206       PUT_MODE (&all.neg, mode);
 207       PUT_MODE (&all.udiv, mode);
 208       PUT_MODE (&all.mult, mode);
 209       PUT_MODE (&all.div, mode);
 210       PUT_MODE (&all.mod, mode);
 211       PUT_MODE (&all.wide_trunc, mode);
 212       PUT_MODE (&all.shift, mode);
 213       PUT_MODE (&all.shift_mult, mode);
 214       PUT_MODE (&all.shift_add, mode);
 215       PUT_MODE (&all.shift_sub, mode);
 216
 217       add_cost[mode] = rtx_cost (&all.plus, SET);
 218       neg_cost[mode] = rtx_cost (&all.neg, SET);
 219       div_cost[mode] = rtx_cost (&all.udiv, SET);
 220       mul_cost[mode] = rtx_cost (&all.mult, SET);
 221
 222       sdiv_pow2_cheap[mode] = (rtx_cost (&all.div, SET) <= 2 * add_cost[mode]);
 223       smod_pow2_cheap[mode] = (rtx_cost (&all.mod, SET) <= 4 * add_cost[mode]);
 224
 225       wider_mode = GET_MODE_WIDER_MODE (mode);
 226       if (wider_mode != VOIDmode)
 227         {
 228           PUT_MODE (&all.zext, wider_mode);
 229           PUT_MODE (&all.wide_mult, wider_mode);
 230           PUT_MODE (&all.wide_lshr, wider_mode);
 231           XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
 232
 233           mul_widen_cost[wider_mode] = rtx_cost (&all.wide_mult, SET);
 234           mul_highpart_cost[mode] = rtx_cost (&all.wide_trunc, SET);
 235         }
 236
 237       shift_cost[mode][0] = 0;
 238       shiftadd_cost[mode][0] = shiftsub_cost[mode][0] = add_cost[mode];
 239
 240       n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
 241       for (m = 1; m < n; m++)
 242         {
 243           XEXP (&all.shift, 1) = cint[m];
 244           XEXP (&all.shift_mult, 1) = pow2[m];
 245
 246           shift_cost[mode][m] = rtx_cost (&all.shift, SET);
 247           shiftadd_cost[mode][m] = rtx_cost (&all.shift_add, SET);
 248           shiftsub_cost[mode][m] = rtx_cost (&all.shift_sub, SET);
 249         }
 250     }
 251 }
 252
 253 /* Return an rtx representing minus the value of X.
 254    MODE is the intended mode of the result,
 255    useful if X is a CONST_INT.  */
 256
 257 rtx
 258 negate_rtx (enum machine_mode mode, rtx x)
 259 {
 260   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 261
 262   if (result == 0)
 263     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 264
 265   return result;
 266 }
 267
 268 /* Report on the availability of insv/extv/extzv and the desired mode
 269    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
 270    is false; else the mode of the specified operand.  If OPNO is -1,
 271    all the caller cares about is whether the insn is available.  */
 272 enum machine_mode
 273 mode_for_extraction (enum extraction_pattern pattern, int opno)
 274 {
 275   const struct insn_data *data;
 276
 277   switch (pattern)
 278     {
 279     case EP_insv:
 280       if (HAVE_insv)
 281         {
 282           data = &insn_data[CODE_FOR_insv];
 283           break;
 284         }
 285       return MAX_MACHINE_MODE;
 286
 287     case EP_extv:
 288       if (HAVE_extv)
 289         {
 290           data = &insn_data[CODE_FOR_extv];
 291           break;
 292         }
 293       return MAX_MACHINE_MODE;
 294
 295     case EP_extzv:
 296       if (HAVE_extzv)
 297         {
 298           data = &insn_data[CODE_FOR_extzv];
 299           break;
 300         }
 301       return MAX_MACHINE_MODE;
 302
 303     default:
 304       gcc_unreachable ();
 305     }
 306
 307   if (opno == -1)
 308     return VOIDmode;
 309
 310   /* Everyone who uses this function used to follow it with
 311      if (result == VOIDmode) result = word_mode; */
 312   if (data->operand[opno].mode == VOIDmode)
 313     return word_mode;
 314   return data->operand[opno].mode;
 315 }
 316
 317 \f
 318 /* Generate code to store value from rtx VALUE
 319    into a bit-field within structure STR_RTX
 320    containing BITSIZE bits starting at bit BITNUM.
 321    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
 322    ALIGN is the alignment that STR_RTX is known to have.
 323    TOTAL_SIZE is the size of the structure in bytes, or -1 if varying.  */
 324
 325 /* ??? Note that there are two different ideas here for how
 326    to determine the size to count bits within, for a register.
 327    One is BITS_PER_WORD, and the other is the size of operand 3
 328    of the insv pattern.
 329
 330    If operand 3 of the insv pattern is VOIDmode, then we will use BITS_PER_WORD
 331    else, we use the mode of operand 3.  */
 332
 333 rtx
 334 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 335                  unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
 336                  rtx value)
 337 {
 338   unsigned int unit
 339     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
 340   unsigned HOST_WIDE_INT offset, bitpos;
 341   rtx op0 = str_rtx;
 342   int byte_offset;
 343   rtx orig_value;
 344
 345   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
 346
 347   while (GET_CODE (op0) == SUBREG)
 348     {
 349       /* The following line once was done only if WORDS_BIG_ENDIAN,
 350          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 351          meaningful at a much higher level; when structures are copied
 352          between memory and regs, the higher-numbered regs
 353          always get higher addresses.  */
 354       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
 355       op0 = SUBREG_REG (op0);
 356     }
 357
 358   /* No action is needed if the target is a register and if the field
 359      lies completely outside that register.  This can occur if the source
 360      code contains an out-of-bounds access to a small array.  */
 361   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 362     return value;
 363
 364   /* Use vec_set patterns for inserting parts of vectors whenever
 365      available.  */
 366   if (VECTOR_MODE_P (GET_MODE (op0))
 367       && !MEM_P (op0)
 368       && (vec_set_optab->handlers[GET_MODE (op0)].insn_code
 369           != CODE_FOR_nothing)
 370       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 371       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 372       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 373     {
 374       enum machine_mode outermode = GET_MODE (op0);
 375       enum machine_mode innermode = GET_MODE_INNER (outermode);
 376       int icode = (int) vec_set_optab->handlers[outermode].insn_code;
 377       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 378       rtx rtxpos = GEN_INT (pos);
 379       rtx src = value;
 380       rtx dest = op0;
 381       rtx pat, seq;
 382       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
 383       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
 384       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
 385
 386       start_sequence ();
 387
 388       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
 389         src = copy_to_mode_reg (mode1, src);
 390
 391       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
 392         rtxpos = copy_to_mode_reg (mode1, rtxpos);
 393
 394       /* We could handle this, but we should always be called with a pseudo
 395          for our targets and all insns should take them as outputs.  */
 396       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
 397                   && (*insn_data[icode].operand[1].predicate) (src, mode1)
 398                   && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
 399       pat = GEN_FCN (icode) (dest, src, rtxpos);
 400       seq = get_insns ();
 401       end_sequence ();
 402       if (pat)
 403         {
 404           emit_insn (seq);
 405           emit_insn (pat);
 406           return dest;
 407         }
 408     }
 409
 410   if (flag_force_mem)
 411     {
 412       int old_generating_concat_p = generating_concat_p;
 413       generating_concat_p = 0;
 414       value = force_not_mem (value);
 415       generating_concat_p = old_generating_concat_p;
 416     }
 417
 418   /* If the target is a register, overwriting the entire object, or storing
 419      a full-word or multi-word field can be done with just a SUBREG.
 420
 421      If the target is memory, storing any naturally aligned field can be
 422      done with a simple store.  For targets that support fast unaligned
 423      memory, any naturally sized, unit aligned field can be done directly.  */
 424
 425   offset = bitnum / unit;
 426   bitpos = bitnum % unit;
 427   byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 428                 + (offset * UNITS_PER_WORD);
 429
 430   if (bitpos == 0
 431       && bitsize == GET_MODE_BITSIZE (fieldmode)
 432       && (!MEM_P (op0)
 433           ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 434              || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
 435              && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
 436           : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 437              || (offset * BITS_PER_UNIT % bitsize == 0
 438                  && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
 439     {
 440       if (GET_MODE (op0) != fieldmode)
 441         {
 442           if (MEM_P (op0))
 443             op0 = adjust_address (op0, fieldmode, offset);
 444           else
 445             op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 446                                        byte_offset);
 447         }
 448       emit_move_insn (op0, value);
 449       return value;
 450     }
 451
 452   /* Make sure we are playing with integral modes.  Pun with subregs
 453      if we aren't.  This must come after the entire register case above,
 454      since that case is valid for any mode.  The following cases are only
 455      valid for integral modes.  */
 456   {
 457     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 458     if (imode != GET_MODE (op0))
 459       {
 460         if (MEM_P (op0))
 461           op0 = adjust_address (op0, imode, 0);
 462         else
 463           {
 464             gcc_assert (imode != BLKmode);
 465             op0 = gen_lowpart (imode, op0);
 466           }
 467       }
 468   }
 469
 470   /* We may be accessing data outside the field, which means
 471      we can alias adjacent data.  */
 472   if (MEM_P (op0))
 473     {
 474       op0 = shallow_copy_rtx (op0);
 475       set_mem_alias_set (op0, 0);
 476       set_mem_expr (op0, 0);
 477     }
 478
 479   /* If OP0 is a register, BITPOS must count within a word.
 480      But as we have it, it counts within whatever size OP0 now has.
 481      On a bigendian machine, these are not the same, so convert.  */
 482   if (BYTES_BIG_ENDIAN
 483       && !MEM_P (op0)
 484       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 485     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 486
 487   /* Storing an lsb-aligned field in a register
 488      can be done with a movestrict instruction.  */
 489
 490   if (!MEM_P (op0)
 491       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 492       && bitsize == GET_MODE_BITSIZE (fieldmode)
 493       && (movstrict_optab->handlers[fieldmode].insn_code
 494           != CODE_FOR_nothing))
 495     {
 496       int icode = movstrict_optab->handlers[fieldmode].insn_code;
 497
 498       /* Get appropriate low part of the value being stored.  */
 499       if (GET_CODE (value) == CONST_INT || REG_P (value))
 500         value = gen_lowpart (fieldmode, value);
 501       else if (!(GET_CODE (value) == SYMBOL_REF
 502                  || GET_CODE (value) == LABEL_REF
 503                  || GET_CODE (value) == CONST))
 504         value = convert_to_mode (fieldmode, value, 0);
 505
 506       if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
 507         value = copy_to_mode_reg (fieldmode, value);
 508
 509       if (GET_CODE (op0) == SUBREG)
 510         {
 511           /* Else we've got some float mode source being extracted into
 512              a different float mode destination -- this combination of
 513              subregs results in Severe Tire Damage.  */
 514           gcc_assert (GET_MODE (SUBREG_REG (op0)) == fieldmode
 515                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 516                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 517           op0 = SUBREG_REG (op0);
 518         }
 519
 520       emit_insn (GEN_FCN (icode)
 521                  (gen_rtx_SUBREG (fieldmode, op0,
 522                                   (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 523                                   + (offset * UNITS_PER_WORD)),
 524                                   value));
 525
 526       return value;
 527     }
 528
 529   /* Handle fields bigger than a word.  */
 530
 531   if (bitsize > BITS_PER_WORD)
 532     {
 533       /* Here we transfer the words of the field
 534          in the order least significant first.
 535          This is because the most significant word is the one which may
 536          be less than full.
 537          However, only do that if the value is not BLKmode.  */
 538
 539       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 540       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 541       unsigned int i;
 542
 543       /* This is the mode we must force value to, so that there will be enough
 544          subwords to extract.  Note that fieldmode will often (always?) be
 545          VOIDmode, because that is what store_field uses to indicate that this
 546          is a bit field, but passing VOIDmode to operand_subword_force will
 547          result in an abort.  */
 548       fieldmode = GET_MODE (value);
 549       if (fieldmode == VOIDmode)
 550         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 551
 552       for (i = 0; i < nwords; i++)
 553         {
 554           /* If I is 0, use the low-order word in both field and target;
 555              if I is 1, use the next to lowest word; and so on.  */
 556           unsigned int wordnum = (backwards ? nwords - i - 1 : i);
 557           unsigned int bit_offset = (backwards
 558                                      ? MAX ((int) bitsize - ((int) i + 1)
 559                                             * BITS_PER_WORD,
 560                                             0)
 561                                      : (int) i * BITS_PER_WORD);
 562
 563           store_bit_field (op0, MIN (BITS_PER_WORD,
 564                                      bitsize - i * BITS_PER_WORD),
 565                            bitnum + bit_offset, word_mode,
 566                            operand_subword_force (value, wordnum, fieldmode));
 567         }
 568       return value;
 569     }
 570
 571   /* From here on we can assume that the field to be stored in is
 572      a full-word (whatever type that is), since it is shorter than a word.  */
 573
 574   /* OFFSET is the number of words or bytes (UNIT says which)
 575      from STR_RTX to the first word or byte containing part of the field.  */
 576
 577   if (!MEM_P (op0))
 578     {
 579       if (offset != 0
 580           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 581         {
 582           if (!REG_P (op0))
 583             {
 584               /* Since this is a destination (lvalue), we can't copy it to a
 585                  pseudo.  We can trivially remove a SUBREG that does not
 586                  change the size of the operand.  Such a SUBREG may have been
 587                  added above.  Otherwise, abort.  */
 588               gcc_assert (GET_CODE (op0) == SUBREG
 589                           && (GET_MODE_SIZE (GET_MODE (op0))
 590                               == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
 591               op0 = SUBREG_REG (op0);
 592             }
 593           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 594                                 op0, (offset * UNITS_PER_WORD));
 595         }
 596       offset = 0;
 597     }
 598
 599   /* If VALUE has a floating-point or complex mode, access it as an
 600      integer of the corresponding size.  This can occur on a machine
 601      with 64 bit registers that uses SFmode for float.  It can also
 602      occur for unaligned float or complex fields.  */
 603   orig_value = value;
 604   if (GET_MODE (value) != VOIDmode
 605       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 606       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 607     {
 608       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 609       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 610     }
 611
 612   /* Now OFFSET is nonzero only if OP0 is memory
 613      and is therefore always measured in bytes.  */
 614
 615   if (HAVE_insv
 616       && GET_MODE (value) != BLKmode
 617       && !(bitsize == 1 && GET_CODE (value) == CONST_INT)
 618       /* Ensure insv's size is wide enough for this field.  */
 619       && (GET_MODE_BITSIZE (op_mode) >= bitsize)
 620       && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
 621             && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))))
 622     {
 623       int xbitpos = bitpos;
 624       rtx value1;
 625       rtx xop0 = op0;
 626       rtx last = get_last_insn ();
 627       rtx pat;
 628       enum machine_mode maxmode = mode_for_extraction (EP_insv, 3);
 629       int save_volatile_ok = volatile_ok;
 630
 631       volatile_ok = 1;
 632
 633       /* If this machine's insv can only insert into a register, copy OP0
 634          into a register and save it back later.  */
 635       /* This used to check flag_force_mem, but that was a serious
 636          de-optimization now that flag_force_mem is enabled by -O2.  */
 637       if (MEM_P (op0)
 638           && ! ((*insn_data[(int) CODE_FOR_insv].operand[0].predicate)
 639                 (op0, VOIDmode)))
 640         {
 641           rtx tempreg;
 642           enum machine_mode bestmode;
 643
 644           /* Get the mode to use for inserting into this field.  If OP0 is
 645              BLKmode, get the smallest mode consistent with the alignment. If
 646              OP0 is a non-BLKmode object that is no wider than MAXMODE, use its
 647              mode. Otherwise, use the smallest mode containing the field.  */
 648
 649           if (GET_MODE (op0) == BLKmode
 650               || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (maxmode))
 651             bestmode
 652               = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0), maxmode,
 653                                MEM_VOLATILE_P (op0));
 654           else
 655             bestmode = GET_MODE (op0);
 656
 657           if (bestmode == VOIDmode
 658               || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
 659                   && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
 660             goto insv_loses;
 661
 662           /* Adjust address to point to the containing unit of that mode.
 663              Compute offset as multiple of this unit, counting in bytes.  */
 664           unit = GET_MODE_BITSIZE (bestmode);
 665           offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 666           bitpos = bitnum % unit;
 667           op0 = adjust_address (op0, bestmode,  offset);
 668
 669           /* Fetch that unit, store the bitfield in it, then store
 670              the unit.  */
 671           tempreg = copy_to_reg (op0);
 672           store_bit_field (tempreg, bitsize, bitpos, fieldmode, orig_value);
 673           emit_move_insn (op0, tempreg);
 674           return value;
 675         }
 676       volatile_ok = save_volatile_ok;
 677
 678       /* Add OFFSET into OP0's address.  */
 679       if (MEM_P (xop0))
 680         xop0 = adjust_address (xop0, byte_mode, offset);
 681
 682       /* If xop0 is a register, we need it in MAXMODE
 683          to make it acceptable to the format of insv.  */
 684       if (GET_CODE (xop0) == SUBREG)
 685         /* We can't just change the mode, because this might clobber op0,
 686            and we will need the original value of op0 if insv fails.  */
 687         xop0 = gen_rtx_SUBREG (maxmode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 688       if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
 689         xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
 690
 691       /* On big-endian machines, we count bits from the most significant.
 692          If the bit field insn does not, we must invert.  */
 693
 694       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 695         xbitpos = unit - bitsize - xbitpos;
 696
 697       /* We have been counting XBITPOS within UNIT.
 698          Count instead within the size of the register.  */
 699       if (BITS_BIG_ENDIAN && !MEM_P (xop0))
 700         xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
 701
 702       unit = GET_MODE_BITSIZE (maxmode);
 703
 704       /* Convert VALUE to maxmode (which insv insn wants) in VALUE1.  */
 705       value1 = value;
 706       if (GET_MODE (value) != maxmode)
 707         {
 708           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 709             {
 710               /* Optimization: Don't bother really extending VALUE
 711                  if it has all the bits we will actually use.  However,
 712                  if we must narrow it, be sure we do it correctly.  */
 713
 714               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (maxmode))
 715                 {
 716                   rtx tmp;
 717
 718                   tmp = simplify_subreg (maxmode, value1, GET_MODE (value), 0);
 719                   if (! tmp)
 720                     tmp = simplify_gen_subreg (maxmode,
 721                                                force_reg (GET_MODE (value),
 722                                                           value1),
 723                                                GET_MODE (value), 0);
 724                   value1 = tmp;
 725                 }
 726               else
 727                 value1 = gen_lowpart (maxmode, value1);
 728             }
 729           else if (GET_CODE (value) == CONST_INT)
 730             value1 = gen_int_mode (INTVAL (value), maxmode);
 731           else
 732             /* Parse phase is supposed to make VALUE's data type
 733                match that of the component reference, which is a type
 734                at least as wide as the field; so VALUE should have
 735                a mode that corresponds to that type.  */
 736             gcc_assert (CONSTANT_P (value));
 737         }
 738
 739       /* If this machine's insv insists on a register,
 740          get VALUE1 into a register.  */
 741       if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
 742              (value1, maxmode)))
 743         value1 = force_reg (maxmode, value1);
 744
 745       pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
 746       if (pat)
 747         emit_insn (pat);
 748       else
 749         {
 750           delete_insns_since (last);
 751           store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 752         }
 753     }
 754   else
 755     insv_loses:
 756     /* Insv is not available; store using shifts and boolean ops.  */
 757     store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 758   return value;
 759 }
 760 \f
 761 /* Use shifts and boolean operations to store VALUE
 762    into a bit field of width BITSIZE
 763    in a memory location specified by OP0 except offset by OFFSET bytes.
 764      (OFFSET must be 0 if OP0 is a register.)
 765    The field starts at position BITPOS within the byte.
 766     (If OP0 is a register, it may be a full word or a narrower mode,
 767      but BITPOS still counts within a full word,
 768      which is significant on bigendian machines.)  */
 769
 770 static void
 771 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
 772                        unsigned HOST_WIDE_INT bitsize,
 773                        unsigned HOST_WIDE_INT bitpos, rtx value)
 774 {
 775   enum machine_mode mode;
 776   unsigned int total_bits = BITS_PER_WORD;
 777   rtx subtarget, temp;
 778   int all_zero = 0;
 779   int all_one = 0;
 780
 781   /* There is a case not handled here:
 782      a structure with a known alignment of just a halfword
 783      and a field split across two aligned halfwords within the structure.
 784      Or likewise a structure with a known alignment of just a byte
 785      and a field split across two bytes.
 786      Such cases are not supposed to be able to occur.  */
 787
 788   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 789     {
 790       gcc_assert (!offset);
 791       /* Special treatment for a bit field split across two registers.  */
 792       if (bitsize + bitpos > BITS_PER_WORD)
 793         {
 794           store_split_bit_field (op0, bitsize, bitpos, value);
 795           return;
 796         }
 797     }
 798   else
 799     {
 800       /* Get the proper mode to use for this field.  We want a mode that
 801          includes the entire field.  If such a mode would be larger than
 802          a word, we won't be doing the extraction the normal way.
 803          We don't want a mode bigger than the destination.  */
 804
 805       mode = GET_MODE (op0);
 806       if (GET_MODE_BITSIZE (mode) == 0
 807           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 808         mode = word_mode;
 809       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 810                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 811
 812       if (mode == VOIDmode)
 813         {
 814           /* The only way this should occur is if the field spans word
 815              boundaries.  */
 816           store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
 817                                  value);
 818           return;
 819         }
 820
 821       total_bits = GET_MODE_BITSIZE (mode);
 822
 823       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 824          be in the range 0 to total_bits-1, and put any excess bytes in
 825          OFFSET.  */
 826       if (bitpos >= total_bits)
 827         {
 828           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 829           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 830                      * BITS_PER_UNIT);
 831         }
 832
 833       /* Get ref to an aligned byte, halfword, or word containing the field.
 834          Adjust BITPOS to be position within a word,
 835          and OFFSET to be the offset of that word.
 836          Then alter OP0 to refer to that word.  */
 837       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 838       offset -= (offset % (total_bits / BITS_PER_UNIT));
 839       op0 = adjust_address (op0, mode, offset);
 840     }
 841
 842   mode = GET_MODE (op0);
 843
 844   /* Now MODE is either some integral mode for a MEM as OP0,
 845      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 846      The bit field is contained entirely within OP0.
 847      BITPOS is the starting bit number within OP0.
 848      (OP0's mode may actually be narrower than MODE.)  */
 849
 850   if (BYTES_BIG_ENDIAN)
 851       /* BITPOS is the distance between our msb
 852          and that of the containing datum.
 853          Convert it to the distance from the lsb.  */
 854       bitpos = total_bits - bitsize - bitpos;
 855
 856   /* Now BITPOS is always the distance between our lsb
 857      and that of OP0.  */
 858
 859   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 860      we must first convert its mode to MODE.  */
 861
 862   if (GET_CODE (value) == CONST_INT)
 863     {
 864       HOST_WIDE_INT v = INTVAL (value);
 865
 866       if (bitsize < HOST_BITS_PER_WIDE_INT)
 867         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 868
 869       if (v == 0)
 870         all_zero = 1;
 871       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 872                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 873                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 874         all_one = 1;
 875
 876       value = lshift_value (mode, value, bitpos, bitsize);
 877     }
 878   else
 879     {
 880       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 881                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 882
 883       if (GET_MODE (value) != mode)
 884         {
 885           if ((REG_P (value) || GET_CODE (value) == SUBREG)
 886               && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
 887             value = gen_lowpart (mode, value);
 888           else
 889             value = convert_to_mode (mode, value, 1);
 890         }
 891
 892       if (must_and)
 893         value = expand_binop (mode, and_optab, value,
 894                               mask_rtx (mode, 0, bitsize, 0),
 895                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
 896       if (bitpos > 0)
 897         value = expand_shift (LSHIFT_EXPR, mode, value,
 898                               build_int_cst (NULL_TREE, bitpos), NULL_RTX, 1);
 899     }
 900
 901   /* Now clear the chosen bits in OP0,
 902      except that if VALUE is -1 we need not bother.  */
 903
 904   subtarget = (REG_P (op0) || ! flag_force_mem) ? op0 : 0;
 905
 906   if (! all_one)
 907     {
 908       temp = expand_binop (mode, and_optab, op0,
 909                            mask_rtx (mode, bitpos, bitsize, 1),
 910                            subtarget, 1, OPTAB_LIB_WIDEN);
 911       subtarget = temp;
 912     }
 913   else
 914     temp = op0;
 915
 916   /* Now logical-or VALUE into OP0, unless it is zero.  */
 917
 918   if (! all_zero)
 919     temp = expand_binop (mode, ior_optab, temp, value,
 920                          subtarget, 1, OPTAB_LIB_WIDEN);
 921   if (op0 != temp)
 922     emit_move_insn (op0, temp);
 923 }
 924 \f
 925 /* Store a bit field that is split across multiple accessible memory objects.
 926
 927    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
 928    BITSIZE is the field width; BITPOS the position of its first bit
 929    (within the word).
 930    VALUE is the value to store.
 931
 932    This does not yet handle fields wider than BITS_PER_WORD.  */
 933
 934 static void
 935 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
 936                        unsigned HOST_WIDE_INT bitpos, rtx value)
 937 {
 938   unsigned int unit;
 939   unsigned int bitsdone = 0;
 940
 941   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
 942      much at a time.  */
 943   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 944     unit = BITS_PER_WORD;
 945   else
 946     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
 947
 948   /* If VALUE is a constant other than a CONST_INT, get it into a register in
 949      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
 950      that VALUE might be a floating-point constant.  */
 951   if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
 952     {
 953       rtx word = gen_lowpart_common (word_mode, value);
 954
 955       if (word && (value != word))
 956         value = word;
 957       else
 958         value = gen_lowpart_common (word_mode,
 959                                     force_reg (GET_MODE (value) != VOIDmode
 960                                                ? GET_MODE (value)
 961                                                : word_mode, value));
 962     }
 963
 964   while (bitsdone < bitsize)
 965     {
 966       unsigned HOST_WIDE_INT thissize;
 967       rtx part, word;
 968       unsigned HOST_WIDE_INT thispos;
 969       unsigned HOST_WIDE_INT offset;
 970
 971       offset = (bitpos + bitsdone) / unit;
 972       thispos = (bitpos + bitsdone) % unit;
 973
 974       /* THISSIZE must not overrun a word boundary.  Otherwise,
 975          store_fixed_bit_field will call us again, and we will mutually
 976          recurse forever.  */
 977       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
 978       thissize = MIN (thissize, unit - thispos);
 979
 980       if (BYTES_BIG_ENDIAN)
 981         {
 982           int total_bits;
 983
 984           /* We must do an endian conversion exactly the same way as it is
 985              done in extract_bit_field, so that the two calls to
 986              extract_fixed_bit_field will have comparable arguments.  */
 987           if (!MEM_P (value) || GET_MODE (value) == BLKmode)
 988             total_bits = BITS_PER_WORD;
 989           else
 990             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
 991
 992           /* Fetch successively less significant portions.  */
 993           if (GET_CODE (value) == CONST_INT)
 994             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 995                              >> (bitsize - bitsdone - thissize))
 996                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
 997           else
 998             /* The args are chosen so that the last part includes the
 999                lsb.  Give extract_bit_field the value it needs (with
1000                endianness compensation) to fetch the piece we want.  */
1001             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1002                                             total_bits - bitsize + bitsdone,
1003                                             NULL_RTX, 1);
1004         }
1005       else
1006         {
1007           /* Fetch successively more significant portions.  */
1008           if (GET_CODE (value) == CONST_INT)
1009             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1010                              >> bitsdone)
1011                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1012           else
1013             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1014                                             bitsdone, NULL_RTX, 1);
1015         }
1016
1017       /* If OP0 is a register, then handle OFFSET here.
1018
1019          When handling multiword bitfields, extract_bit_field may pass
1020          down a word_mode SUBREG of a larger REG for a bitfield that actually
1021          crosses a word boundary.  Thus, for a SUBREG, we must find
1022          the current word starting from the base register.  */
1023       if (GET_CODE (op0) == SUBREG)
1024         {
1025           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1026           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1027                                         GET_MODE (SUBREG_REG (op0)));
1028           offset = 0;
1029         }
1030       else if (REG_P (op0))
1031         {
1032           word = operand_subword_force (op0, offset, GET_MODE (op0));
1033           offset = 0;
1034         }
1035       else
1036         word = op0;
1037
1038       /* OFFSET is in UNITs, and UNIT is in bits.
1039          store_fixed_bit_field wants offset in bytes.  */
1040       store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1041                              thispos, part);
1042       bitsdone += thissize;
1043     }
1044 }
1045 \f
1046 /* Generate code to extract a byte-field from STR_RTX
1047    containing BITSIZE bits, starting at BITNUM,
1048    and put it in TARGET if possible (if TARGET is nonzero).
1049    Regardless of TARGET, we return the rtx for where the value is placed.
1050
1051    STR_RTX is the structure containing the byte (a REG or MEM).
1052    UNSIGNEDP is nonzero if this is an unsigned bit field.
1053    MODE is the natural mode of the field value once extracted.
1054    TMODE is the mode the caller would like the value to have;
1055    but the value may be returned with type MODE instead.
1056
1057    TOTAL_SIZE is the size in bytes of the containing structure,
1058    or -1 if varying.
1059
1060    If a TARGET is specified and we can store in it at no extra cost,
1061    we do so, and return TARGET.
1062    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1063    if they are equally easy.  */
1064
1065 rtx
1066 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1067                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1068                    enum machine_mode mode, enum machine_mode tmode)
1069 {
1070   unsigned int unit
1071     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1072   unsigned HOST_WIDE_INT offset, bitpos;
1073   rtx op0 = str_rtx;
1074   rtx spec_target = target;
1075   rtx spec_target_subreg = 0;
1076   enum machine_mode int_mode;
1077   enum machine_mode extv_mode = mode_for_extraction (EP_extv, 0);
1078   enum machine_mode extzv_mode = mode_for_extraction (EP_extzv, 0);
1079   enum machine_mode mode1;
1080   int byte_offset;
1081
1082   if (tmode == VOIDmode)
1083     tmode = mode;
1084
1085   while (GET_CODE (op0) == SUBREG)
1086     {
1087       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1088       op0 = SUBREG_REG (op0);
1089     }
1090
1091   /* If we have an out-of-bounds access to a register, just return an
1092      uninitialised register of the required mode.  This can occur if the
1093      source code contains an out-of-bounds access to a small array.  */
1094   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1095     return gen_reg_rtx (tmode);
1096
1097   if (REG_P (op0)
1098       && mode == GET_MODE (op0)
1099       && bitnum == 0
1100       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1101     {
1102       /* We're trying to extract a full register from itself.  */
1103       return op0;
1104     }
1105
1106   /* Use vec_extract patterns for extracting parts of vectors whenever
1107      available.  */
1108   if (VECTOR_MODE_P (GET_MODE (op0))
1109       && !MEM_P (op0)
1110       && (vec_extract_optab->handlers[GET_MODE (op0)].insn_code
1111           != CODE_FOR_nothing)
1112       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1113           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1114     {
1115       enum machine_mode outermode = GET_MODE (op0);
1116       enum machine_mode innermode = GET_MODE_INNER (outermode);
1117       int icode = (int) vec_extract_optab->handlers[outermode].insn_code;
1118       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1119       rtx rtxpos = GEN_INT (pos);
1120       rtx src = op0;
1121       rtx dest = NULL, pat, seq;
1122       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
1123       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
1124       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
1125
1126       if (innermode == tmode || innermode == mode)
1127         dest = target;
1128
1129       if (!dest)
1130         dest = gen_reg_rtx (innermode);
1131
1132       start_sequence ();
1133
1134       if (! (*insn_data[icode].operand[0].predicate) (dest, mode0))
1135         dest = copy_to_mode_reg (mode0, dest);
1136
1137       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
1138         src = copy_to_mode_reg (mode1, src);
1139
1140       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1141         rtxpos = copy_to_mode_reg (mode1, rtxpos);
1142
1143       /* We could handle this, but we should always be called with a pseudo
1144          for our targets and all insns should take them as outputs.  */
1145       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
1146                   && (*insn_data[icode].operand[1].predicate) (src, mode1)
1147                   && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
1148
1149       pat = GEN_FCN (icode) (dest, src, rtxpos);
1150       seq = get_insns ();
1151       end_sequence ();
1152       if (pat)
1153         {
1154           emit_insn (seq);
1155           emit_insn (pat);
1156           return dest;
1157         }
1158     }
1159
1160   /* Make sure we are playing with integral modes.  Pun with subregs
1161      if we aren't.  */
1162   {
1163     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1164     if (imode != GET_MODE (op0))
1165       {
1166         if (MEM_P (op0))
1167           op0 = adjust_address (op0, imode, 0);
1168         else
1169           {
1170             gcc_assert (imode != BLKmode);
1171             op0 = gen_lowpart (imode, op0);
1172
1173             /* If we got a SUBREG, force it into a register since we
1174                aren't going to be able to do another SUBREG on it.  */
1175             if (GET_CODE (op0) == SUBREG)
1176               op0 = force_reg (imode, op0);
1177           }
1178       }
1179   }
1180
1181   /* We may be accessing data outside the field, which means
1182      we can alias adjacent data.  */
1183   if (MEM_P (op0))
1184     {
1185       op0 = shallow_copy_rtx (op0);
1186       set_mem_alias_set (op0, 0);
1187       set_mem_expr (op0, 0);
1188     }
1189
1190   /* Extraction of a full-word or multi-word value from a structure
1191      in a register or aligned memory can be done with just a SUBREG.
1192      A subword value in the least significant part of a register
1193      can also be extracted with a SUBREG.  For this, we need the
1194      byte offset of the value in op0.  */
1195
1196   bitpos = bitnum % unit;
1197   offset = bitnum / unit;
1198   byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1199
1200   /* If OP0 is a register, BITPOS must count within a word.
1201      But as we have it, it counts within whatever size OP0 now has.
1202      On a bigendian machine, these are not the same, so convert.  */
1203   if (BYTES_BIG_ENDIAN
1204       && !MEM_P (op0)
1205       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1206     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1207
1208   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1209      If that's wrong, the solution is to test for it and set TARGET to 0
1210      if needed.  */
1211
1212   /* Only scalar integer modes can be converted via subregs.  There is an
1213      additional problem for FP modes here in that they can have a precision
1214      which is different from the size.  mode_for_size uses precision, but
1215      we want a mode based on the size, so we must avoid calling it for FP
1216      modes.  */
1217   mode1  = (SCALAR_INT_MODE_P (tmode)
1218             ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1219             : mode);
1220
1221   if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1222         && bitpos % BITS_PER_WORD == 0)
1223        || (mode1 != BLKmode
1224            /* ??? The big endian test here is wrong.  This is correct
1225               if the value is in a register, and if mode_for_size is not
1226               the same mode as op0.  This causes us to get unnecessarily
1227               inefficient code from the Thumb port when -mbig-endian.  */
1228            && (BYTES_BIG_ENDIAN
1229                ? bitpos + bitsize == BITS_PER_WORD
1230                : bitpos == 0)))
1231       && ((!MEM_P (op0)
1232            && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
1233                                      GET_MODE_BITSIZE (GET_MODE (op0)))
1234            && GET_MODE_SIZE (mode1) != 0
1235            && byte_offset % GET_MODE_SIZE (mode1) == 0)
1236           || (MEM_P (op0)
1237               && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1238                   || (offset * BITS_PER_UNIT % bitsize == 0
1239                       && MEM_ALIGN (op0) % bitsize == 0)))))
1240     {
1241       if (mode1 != GET_MODE (op0))
1242         {
1243           if (MEM_P (op0))
1244             op0 = adjust_address (op0, mode1, offset);
1245           else
1246             {
1247               rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1248                                              byte_offset);
1249               if (sub == NULL)
1250                 goto no_subreg_mode_swap;
1251               op0 = sub;
1252             }
1253         }
1254       if (mode1 != mode)
1255         return convert_to_mode (tmode, op0, unsignedp);
1256       return op0;
1257     }
1258  no_subreg_mode_swap:
1259
1260   /* Handle fields bigger than a word.  */
1261
1262   if (bitsize > BITS_PER_WORD)
1263     {
1264       /* Here we transfer the words of the field
1265          in the order least significant first.
1266          This is because the most significant word is the one which may
1267          be less than full.  */
1268
1269       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1270       unsigned int i;
1271
1272       if (target == 0 || !REG_P (target))
1273         target = gen_reg_rtx (mode);
1274
1275       /* Indicate for flow that the entire target reg is being set.  */
1276       emit_insn (gen_rtx_CLOBBER (VOIDmode, target));
1277
1278       for (i = 0; i < nwords; i++)
1279         {
1280           /* If I is 0, use the low-order word in both field and target;
1281              if I is 1, use the next to lowest word; and so on.  */
1282           /* Word number in TARGET to use.  */
1283           unsigned int wordnum
1284             = (WORDS_BIG_ENDIAN
1285                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1286                : i);
1287           /* Offset from start of field in OP0.  */
1288           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1289                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1290                                                 * (int) BITS_PER_WORD))
1291                                      : (int) i * BITS_PER_WORD);
1292           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1293           rtx result_part
1294             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1295                                            bitsize - i * BITS_PER_WORD),
1296                                  bitnum + bit_offset, 1, target_part, mode,
1297                                  word_mode);
1298
1299           gcc_assert (target_part);
1300
1301           if (result_part != target_part)
1302             emit_move_insn (target_part, result_part);
1303         }
1304
1305       if (unsignedp)
1306         {
1307           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1308              need to be zero'd out.  */
1309           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1310             {
1311               unsigned int i, total_words;
1312
1313               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1314               for (i = nwords; i < total_words; i++)
1315                 emit_move_insn
1316                   (operand_subword (target,
1317                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1318                                     1, VOIDmode),
1319                    const0_rtx);
1320             }
1321           return target;
1322         }
1323
1324       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1325       target = expand_shift (LSHIFT_EXPR, mode, target,
1326                              build_int_cst (NULL_TREE,
1327                                             GET_MODE_BITSIZE (mode) - bitsize),
1328                              NULL_RTX, 0);
1329       return expand_shift (RSHIFT_EXPR, mode, target,
1330                            build_int_cst (NULL_TREE,
1331                                           GET_MODE_BITSIZE (mode) - bitsize),
1332                            NULL_RTX, 0);
1333     }
1334
1335   /* From here on we know the desired field is smaller than a word.  */
1336
1337   /* Check if there is a correspondingly-sized integer field, so we can
1338      safely extract it as one size of integer, if necessary; then
1339      truncate or extend to the size that is wanted; then use SUBREGs or
1340      convert_to_mode to get one of the modes we really wanted.  */
1341
1342   int_mode = int_mode_for_mode (tmode);
1343   if (int_mode == BLKmode)
1344     int_mode = int_mode_for_mode (mode);
1345   /* Should probably push op0 out to memory and then do a load.  */
1346   gcc_assert (int_mode != BLKmode);
1347
1348   /* OFFSET is the number of words or bytes (UNIT says which)
1349      from STR_RTX to the first word or byte containing part of the field.  */
1350   if (!MEM_P (op0))
1351     {
1352       if (offset != 0
1353           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1354         {
1355           if (!REG_P (op0))
1356             op0 = copy_to_reg (op0);
1357           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1358                                 op0, (offset * UNITS_PER_WORD));
1359         }
1360       offset = 0;
1361     }
1362
1363   /* Now OFFSET is nonzero only for memory operands.  */
1364
1365   if (unsignedp)
1366     {
1367       if (HAVE_extzv
1368           && (GET_MODE_BITSIZE (extzv_mode) >= bitsize)
1369           && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1370                 && (bitsize + bitpos > GET_MODE_BITSIZE (extzv_mode))))
1371         {
1372           unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1373           rtx bitsize_rtx, bitpos_rtx;
1374           rtx last = get_last_insn ();
1375           rtx xop0 = op0;
1376           rtx xtarget = target;
1377           rtx xspec_target = spec_target;
1378           rtx xspec_target_subreg = spec_target_subreg;
1379           rtx pat;
1380           enum machine_mode maxmode = mode_for_extraction (EP_extzv, 0);
1381
1382           if (MEM_P (xop0))
1383             {
1384               int save_volatile_ok = volatile_ok;
1385               volatile_ok = 1;
1386
1387               /* Is the memory operand acceptable?  */
1388               if (! ((*insn_data[(int) CODE_FOR_extzv].operand[1].predicate)
1389                      (xop0, GET_MODE (xop0))))
1390                 {
1391                   /* No, load into a reg and extract from there.  */
1392                   enum machine_mode bestmode;
1393
1394                   /* Get the mode to use for inserting into this field.  If
1395                      OP0 is BLKmode, get the smallest mode consistent with the
1396                      alignment. If OP0 is a non-BLKmode object that is no
1397                      wider than MAXMODE, use its mode. Otherwise, use the
1398                      smallest mode containing the field.  */
1399
1400                   if (GET_MODE (xop0) == BLKmode
1401                       || (GET_MODE_SIZE (GET_MODE (op0))
1402                           > GET_MODE_SIZE (maxmode)))
1403                     bestmode = get_best_mode (bitsize, bitnum,
1404                                               MEM_ALIGN (xop0), maxmode,
1405                                               MEM_VOLATILE_P (xop0));
1406                   else
1407                     bestmode = GET_MODE (xop0);
1408
1409                   if (bestmode == VOIDmode
1410                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1411                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1412                     goto extzv_loses;
1413
1414                   /* Compute offset as multiple of this unit,
1415                      counting in bytes.  */
1416                   unit = GET_MODE_BITSIZE (bestmode);
1417                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1418                   xbitpos = bitnum % unit;
1419                   xop0 = adjust_address (xop0, bestmode, xoffset);
1420
1421                   /* Fetch it to a register in that size.  */
1422                   xop0 = force_reg (bestmode, xop0);
1423
1424                   /* XBITPOS counts within UNIT, which is what is expected.  */
1425                 }
1426               else
1427                 /* Get ref to first byte containing part of the field.  */
1428                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1429
1430               volatile_ok = save_volatile_ok;
1431             }
1432
1433           /* If op0 is a register, we need it in MAXMODE (which is usually
1434              SImode). to make it acceptable to the format of extzv.  */
1435           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1436             goto extzv_loses;
1437           if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1438             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1439
1440           /* On big-endian machines, we count bits from the most significant.
1441              If the bit field insn does not, we must invert.  */
1442           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1443             xbitpos = unit - bitsize - xbitpos;
1444
1445           /* Now convert from counting within UNIT to counting in MAXMODE.  */
1446           if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1447             xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
1448
1449           unit = GET_MODE_BITSIZE (maxmode);
1450
1451           if (xtarget == 0
1452               || (flag_force_mem && MEM_P (xtarget)))
1453             xtarget = xspec_target = gen_reg_rtx (tmode);
1454
1455           if (GET_MODE (xtarget) != maxmode)
1456             {
1457               if (REG_P (xtarget))
1458                 {
1459                   int wider = (GET_MODE_SIZE (maxmode)
1460                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1461                   xtarget = gen_lowpart (maxmode, xtarget);
1462                   if (wider)
1463                     xspec_target_subreg = xtarget;
1464                 }
1465               else
1466                 xtarget = gen_reg_rtx (maxmode);
1467             }
1468
1469           /* If this machine's extzv insists on a register target,
1470              make sure we have one.  */
1471           if (! ((*insn_data[(int) CODE_FOR_extzv].operand[0].predicate)
1472                  (xtarget, maxmode)))
1473             xtarget = gen_reg_rtx (maxmode);
1474
1475           bitsize_rtx = GEN_INT (bitsize);
1476           bitpos_rtx = GEN_INT (xbitpos);
1477
1478           pat = gen_extzv (xtarget, xop0, bitsize_rtx, bitpos_rtx);
1479           if (pat)
1480             {
1481               emit_insn (pat);
1482               target = xtarget;
1483               spec_target = xspec_target;
1484               spec_target_subreg = xspec_target_subreg;
1485             }
1486           else
1487             {
1488               delete_insns_since (last);
1489               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1490                                                 bitpos, target, 1);
1491             }
1492         }
1493       else
1494       extzv_loses:
1495         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1496                                           bitpos, target, 1);
1497     }
1498   else
1499     {
1500       if (HAVE_extv
1501           && (GET_MODE_BITSIZE (extv_mode) >= bitsize)
1502           && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1503                 && (bitsize + bitpos > GET_MODE_BITSIZE (extv_mode))))
1504         {
1505           int xbitpos = bitpos, xoffset = offset;
1506           rtx bitsize_rtx, bitpos_rtx;
1507           rtx last = get_last_insn ();
1508           rtx xop0 = op0, xtarget = target;
1509           rtx xspec_target = spec_target;
1510           rtx xspec_target_subreg = spec_target_subreg;
1511           rtx pat;
1512           enum machine_mode maxmode = mode_for_extraction (EP_extv, 0);
1513
1514           if (MEM_P (xop0))
1515             {
1516               /* Is the memory operand acceptable?  */
1517               if (! ((*insn_data[(int) CODE_FOR_extv].operand[1].predicate)
1518                      (xop0, GET_MODE (xop0))))
1519                 {
1520                   /* No, load into a reg and extract from there.  */
1521                   enum machine_mode bestmode;
1522
1523                   /* Get the mode to use for inserting into this field.  If
1524                      OP0 is BLKmode, get the smallest mode consistent with the
1525                      alignment. If OP0 is a non-BLKmode object that is no
1526                      wider than MAXMODE, use its mode. Otherwise, use the
1527                      smallest mode containing the field.  */
1528
1529                   if (GET_MODE (xop0) == BLKmode
1530                       || (GET_MODE_SIZE (GET_MODE (op0))
1531                           > GET_MODE_SIZE (maxmode)))
1532                     bestmode = get_best_mode (bitsize, bitnum,
1533                                               MEM_ALIGN (xop0), maxmode,
1534                                               MEM_VOLATILE_P (xop0));
1535                   else
1536                     bestmode = GET_MODE (xop0);
1537
1538                   if (bestmode == VOIDmode
1539                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1540                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1541                     goto extv_loses;
1542
1543                   /* Compute offset as multiple of this unit,
1544                      counting in bytes.  */
1545                   unit = GET_MODE_BITSIZE (bestmode);
1546                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1547                   xbitpos = bitnum % unit;
1548                   xop0 = adjust_address (xop0, bestmode, xoffset);
1549
1550                   /* Fetch it to a register in that size.  */
1551                   xop0 = force_reg (bestmode, xop0);
1552
1553                   /* XBITPOS counts within UNIT, which is what is expected.  */
1554                 }
1555               else
1556                 /* Get ref to first byte containing part of the field.  */
1557                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1558             }
1559
1560           /* If op0 is a register, we need it in MAXMODE (which is usually
1561              SImode) to make it acceptable to the format of extv.  */
1562           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1563             goto extv_loses;
1564           if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1565             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1566
1567           /* On big-endian machines, we count bits from the most significant.
1568              If the bit field insn does not, we must invert.  */
1569           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1570             xbitpos = unit - bitsize - xbitpos;
1571
1572           /* XBITPOS counts within a size of UNIT.
1573              Adjust to count within a size of MAXMODE.  */
1574           if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1575             xbitpos += (GET_MODE_BITSIZE (maxmode) - unit);
1576
1577           unit = GET_MODE_BITSIZE (maxmode);
1578
1579           if (xtarget == 0
1580               || (flag_force_mem && MEM_P (xtarget)))
1581             xtarget = xspec_target = gen_reg_rtx (tmode);
1582
1583           if (GET_MODE (xtarget) != maxmode)
1584             {
1585               if (REG_P (xtarget))
1586                 {
1587                   int wider = (GET_MODE_SIZE (maxmode)
1588                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1589                   xtarget = gen_lowpart (maxmode, xtarget);
1590                   if (wider)
1591                     xspec_target_subreg = xtarget;
1592                 }
1593               else
1594                 xtarget = gen_reg_rtx (maxmode);
1595             }
1596
1597           /* If this machine's extv insists on a register target,
1598              make sure we have one.  */
1599           if (! ((*insn_data[(int) CODE_FOR_extv].operand[0].predicate)
1600                  (xtarget, maxmode)))
1601             xtarget = gen_reg_rtx (maxmode);
1602
1603           bitsize_rtx = GEN_INT (bitsize);
1604           bitpos_rtx = GEN_INT (xbitpos);
1605
1606           pat = gen_extv (xtarget, xop0, bitsize_rtx, bitpos_rtx);
1607           if (pat)
1608             {
1609               emit_insn (pat);
1610               target = xtarget;
1611               spec_target = xspec_target;
1612               spec_target_subreg = xspec_target_subreg;
1613             }
1614           else
1615             {
1616               delete_insns_since (last);
1617               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1618                                                 bitpos, target, 0);
1619             }
1620         }
1621       else
1622       extv_loses:
1623         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1624                                           bitpos, target, 0);
1625     }
1626   if (target == spec_target)
1627     return target;
1628   if (target == spec_target_subreg)
1629     return spec_target;
1630   if (GET_MODE (target) != tmode && GET_MODE (target) != mode)
1631     {
1632       /* If the target mode is not a scalar integral, first convert to the
1633          integer mode of that size and then access it as a floating-point
1634          value via a SUBREG.  */
1635       if (!SCALAR_INT_MODE_P (tmode))
1636         {
1637           enum machine_mode smode
1638             = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1639           target = convert_to_mode (smode, target, unsignedp);
1640           target = force_reg (smode, target);
1641           return gen_lowpart (tmode, target);
1642         }
1643
1644       return convert_to_mode (tmode, target, unsignedp);
1645     }
1646   return target;
1647 }
1648 \f
1649 /* Extract a bit field using shifts and boolean operations
1650    Returns an rtx to represent the value.
1651    OP0 addresses a register (word) or memory (byte).
1652    BITPOS says which bit within the word or byte the bit field starts in.
1653    OFFSET says how many bytes farther the bit field starts;
1654     it is 0 if OP0 is a register.
1655    BITSIZE says how many bits long the bit field is.
1656     (If OP0 is a register, it may be narrower than a full word,
1657      but BITPOS still counts within a full word,
1658      which is significant on bigendian machines.)
1659
1660    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1661    If TARGET is nonzero, attempts to store the value there
1662    and return TARGET, but this is not guaranteed.
1663    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1664
1665 static rtx
1666 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1667                          unsigned HOST_WIDE_INT offset,
1668                          unsigned HOST_WIDE_INT bitsize,
1669                          unsigned HOST_WIDE_INT bitpos, rtx target,
1670                          int unsignedp)
1671 {
1672   unsigned int total_bits = BITS_PER_WORD;
1673   enum machine_mode mode;
1674
1675   if (GET_CODE (op0) == SUBREG || REG_P (op0))
1676     {
1677       /* Special treatment for a bit field split across two registers.  */
1678       if (bitsize + bitpos > BITS_PER_WORD)
1679         return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1680     }
1681   else
1682     {
1683       /* Get the proper mode to use for this field.  We want a mode that
1684          includes the entire field.  If such a mode would be larger than
1685          a word, we won't be doing the extraction the normal way.  */
1686
1687       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1688                             MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1689
1690       if (mode == VOIDmode)
1691         /* The only way this should occur is if the field spans word
1692            boundaries.  */
1693         return extract_split_bit_field (op0, bitsize,
1694                                         bitpos + offset * BITS_PER_UNIT,
1695                                         unsignedp);
1696
1697       total_bits = GET_MODE_BITSIZE (mode);
1698
1699       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1700          be in the range 0 to total_bits-1, and put any excess bytes in
1701          OFFSET.  */
1702       if (bitpos >= total_bits)
1703         {
1704           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1705           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1706                      * BITS_PER_UNIT);
1707         }
1708
1709       /* Get ref to an aligned byte, halfword, or word containing the field.
1710          Adjust BITPOS to be position within a word,
1711          and OFFSET to be the offset of that word.
1712          Then alter OP0 to refer to that word.  */
1713       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1714       offset -= (offset % (total_bits / BITS_PER_UNIT));
1715       op0 = adjust_address (op0, mode, offset);
1716     }
1717
1718   mode = GET_MODE (op0);
1719
1720   if (BYTES_BIG_ENDIAN)
1721     /* BITPOS is the distance between our msb and that of OP0.
1722        Convert it to the distance from the lsb.  */
1723     bitpos = total_bits - bitsize - bitpos;
1724
1725   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1726      We have reduced the big-endian case to the little-endian case.  */
1727
1728   if (unsignedp)
1729     {
1730       if (bitpos)
1731         {
1732           /* If the field does not already start at the lsb,
1733              shift it so it does.  */
1734           tree amount = build_int_cst (NULL_TREE, bitpos);
1735           /* Maybe propagate the target for the shift.  */
1736           /* But not if we will return it--could confuse integrate.c.  */
1737           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1738           if (tmode != mode) subtarget = 0;
1739           op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1740         }
1741       /* Convert the value to the desired mode.  */
1742       if (mode != tmode)
1743         op0 = convert_to_mode (tmode, op0, 1);
1744
1745       /* Unless the msb of the field used to be the msb when we shifted,
1746          mask out the upper bits.  */
1747
1748       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1749         return expand_binop (GET_MODE (op0), and_optab, op0,
1750                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1751                              target, 1, OPTAB_LIB_WIDEN);
1752       return op0;
1753     }
1754
1755   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1756      then arithmetic-shift its lsb to the lsb of the word.  */
1757   op0 = force_reg (mode, op0);
1758   if (mode != tmode)
1759     target = 0;
1760
1761   /* Find the narrowest integer mode that contains the field.  */
1762
1763   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1764        mode = GET_MODE_WIDER_MODE (mode))
1765     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1766       {
1767         op0 = convert_to_mode (mode, op0, 0);
1768         break;
1769       }
1770
1771   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1772     {
1773       tree amount
1774         = build_int_cst (NULL_TREE,
1775                          GET_MODE_BITSIZE (mode) - (bitsize + bitpos));
1776       /* Maybe propagate the target for the shift.  */
1777       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1778       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1779     }
1780
1781   return expand_shift (RSHIFT_EXPR, mode, op0,
1782                        build_int_cst (NULL_TREE,
1783                                       GET_MODE_BITSIZE (mode) - bitsize),
1784                        target, 0);
1785 }
1786 \f
1787 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1788    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1789    complement of that if COMPLEMENT.  The mask is truncated if
1790    necessary to the width of mode MODE.  The mask is zero-extended if
1791    BITSIZE+BITPOS is too small for MODE.  */
1792
1793 static rtx
1794 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1795 {
1796   HOST_WIDE_INT masklow, maskhigh;
1797
1798   if (bitsize == 0)
1799     masklow = 0;
1800   else if (bitpos < HOST_BITS_PER_WIDE_INT)
1801     masklow = (HOST_WIDE_INT) -1 << bitpos;
1802   else
1803     masklow = 0;
1804
1805   if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1806     masklow &= ((unsigned HOST_WIDE_INT) -1
1807                 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1808
1809   if (bitpos <= HOST_BITS_PER_WIDE_INT)
1810     maskhigh = -1;
1811   else
1812     maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1813
1814   if (bitsize == 0)
1815     maskhigh = 0;
1816   else if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1817     maskhigh &= ((unsigned HOST_WIDE_INT) -1
1818                  >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1819   else
1820     maskhigh = 0;
1821
1822   if (complement)
1823     {
1824       maskhigh = ~maskhigh;
1825       masklow = ~masklow;
1826     }
1827
1828   return immed_double_const (masklow, maskhigh, mode);
1829 }
1830
1831 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1832    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1833
1834 static rtx
1835 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1836 {
1837   unsigned HOST_WIDE_INT v = INTVAL (value);
1838   HOST_WIDE_INT low, high;
1839
1840   if (bitsize < HOST_BITS_PER_WIDE_INT)
1841     v &= ~((HOST_WIDE_INT) -1 << bitsize);
1842
1843   if (bitpos < HOST_BITS_PER_WIDE_INT)
1844     {
1845       low = v << bitpos;
1846       high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1847     }
1848   else
1849     {
1850       low = 0;
1851       high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1852     }
1853
1854   return immed_double_const (low, high, mode);
1855 }
1856 \f
1857 /* Extract a bit field from a memory by forcing the alignment of the
1858    memory.  This efficient only if the field spans at least 4 boundaries.
1859
1860    OP0 is the MEM.
1861    BITSIZE is the field width; BITPOS is the position of the first bit.
1862    UNSIGNEDP is true if the result should be zero-extended.  */
1863
1864 static rtx
1865 extract_force_align_mem_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1866                                    unsigned HOST_WIDE_INT bitpos,
1867                                    int unsignedp)
1868 {
1869   enum machine_mode mode, dmode;
1870   unsigned int m_bitsize, m_size;
1871   unsigned int sign_shift_up, sign_shift_dn;
1872   rtx base, a1, a2, v1, v2, comb, shift, result, start;
1873
1874   /* Choose a mode that will fit BITSIZE.  */
1875   mode = smallest_mode_for_size (bitsize, MODE_INT);
1876   m_size = GET_MODE_SIZE (mode);
1877   m_bitsize = GET_MODE_BITSIZE (mode);
1878
1879   /* Choose a mode twice as wide.  Fail if no such mode exists.  */
1880   dmode = mode_for_size (m_bitsize * 2, MODE_INT, false);
1881   if (dmode == BLKmode)
1882     return NULL;
1883
1884   do_pending_stack_adjust ();
1885   start = get_last_insn ();
1886
1887   /* At the end, we'll need an additional shift to deal with sign/zero
1888      extension.  By default this will be a left+right shift of the
1889      appropriate size.  But we may be able to eliminate one of them.  */
1890   sign_shift_up = sign_shift_dn = m_bitsize - bitsize;
1891
1892   if (STRICT_ALIGNMENT)
1893     {
1894       base = plus_constant (XEXP (op0, 0), bitpos / BITS_PER_UNIT);
1895       bitpos %= BITS_PER_UNIT;
1896
1897       /* We load two values to be concatenate.  There's an edge condition
1898          that bears notice -- an aligned value at the end of a page can
1899          only load one value lest we segfault.  So the two values we load
1900          are at "base & -size" and "(base + size - 1) & -size".  If base
1901          is unaligned, the addresses will be aligned and sequential; if
1902          base is aligned, the addresses will both be equal to base.  */
1903
1904       a1 = expand_simple_binop (Pmode, AND, force_operand (base, NULL),
1905                                 GEN_INT (-(HOST_WIDE_INT)m_size),
1906                                 NULL, true, OPTAB_LIB_WIDEN);
1907       mark_reg_pointer (a1, m_bitsize);
1908       v1 = gen_rtx_MEM (mode, a1);
1909       set_mem_align (v1, m_bitsize);
1910       v1 = force_reg (mode, validize_mem (v1));
1911
1912       a2 = plus_constant (base, GET_MODE_SIZE (mode) - 1);
1913       a2 = expand_simple_binop (Pmode, AND, force_operand (a2, NULL),
1914                                 GEN_INT (-(HOST_WIDE_INT)m_size),
1915                                 NULL, true, OPTAB_LIB_WIDEN);
1916       v2 = gen_rtx_MEM (mode, a2);
1917       set_mem_align (v2, m_bitsize);
1918       v2 = force_reg (mode, validize_mem (v2));
1919
1920       /* Combine these two values into a double-word value.  */
1921       if (m_bitsize == BITS_PER_WORD)
1922         {
1923           comb = gen_reg_rtx (dmode);
1924           emit_insn (gen_rtx_CLOBBER (VOIDmode, comb));
1925           emit_move_insn (gen_rtx_SUBREG (mode, comb, 0), v1);
1926           emit_move_insn (gen_rtx_SUBREG (mode, comb, m_size), v2);
1927         }
1928       else
1929         {
1930           if (BYTES_BIG_ENDIAN)
1931             comb = v1, v1 = v2, v2 = comb;
1932           v1 = convert_modes (dmode, mode, v1, true);
1933           if (v1 == NULL)
1934             goto fail;
1935           v2 = convert_modes (dmode, mode, v2, true);
1936           v2 = expand_simple_binop (dmode, ASHIFT, v2, GEN_INT (m_bitsize),
1937                                     NULL, true, OPTAB_LIB_WIDEN);
1938           if (v2 == NULL)
1939             goto fail;
1940           comb = expand_simple_binop (dmode, IOR, v1, v2, NULL,
1941                                       true, OPTAB_LIB_WIDEN);
1942           if (comb == NULL)
1943             goto fail;
1944         }
1945
1946       shift = expand_simple_binop (Pmode, AND, base, GEN_INT (m_size - 1),
1947                                    NULL, true, OPTAB_LIB_WIDEN);
1948       shift = expand_mult (Pmode, shift, GEN_INT (BITS_PER_UNIT), NULL, 1);
1949
1950       if (bitpos != 0)
1951         {
1952           if (sign_shift_up <= bitpos)
1953             bitpos -= sign_shift_up, sign_shift_up = 0;
1954           shift = expand_simple_binop (Pmode, PLUS, shift, GEN_INT (bitpos),
1955                                        NULL, true, OPTAB_LIB_WIDEN);
1956         }
1957     }
1958   else
1959     {
1960       unsigned HOST_WIDE_INT offset = bitpos / BITS_PER_UNIT;
1961       bitpos %= BITS_PER_UNIT;
1962
1963       /* When strict alignment is not required, we can just load directly
1964          from memory without masking.  If the remaining BITPOS offset is
1965          small enough, we may be able to do all operations in MODE as
1966          opposed to DMODE.  */
1967       if (bitpos + bitsize <= m_bitsize)
1968         dmode = mode;
1969       comb = adjust_address (op0, dmode, offset);
1970
1971       if (sign_shift_up <= bitpos)
1972         bitpos -= sign_shift_up, sign_shift_up = 0;
1973       shift = GEN_INT (bitpos);
1974     }
1975
1976   /* Shift down the double-word such that the requested value is at bit 0.  */
1977   if (shift != const0_rtx)
1978     comb = expand_simple_binop (dmode, unsignedp ? LSHIFTRT : ASHIFTRT,
1979                                 comb, shift, NULL, unsignedp, OPTAB_LIB_WIDEN);
1980   if (comb == NULL)
1981     goto fail;
1982
1983   /* If the field exactly matches MODE, then all we need to do is return the
1984      lowpart.  Otherwise, shift to get the sign bits set properly.  */
1985   result = force_reg (mode, gen_lowpart (mode, comb));
1986
1987   if (sign_shift_up)
1988     result = expand_simple_binop (mode, ASHIFT, result,
1989                                   GEN_INT (sign_shift_up),
1990                                   NULL_RTX, 0, OPTAB_LIB_WIDEN);
1991   if (sign_shift_dn)
1992     result = expand_simple_binop (mode, unsignedp ? LSHIFTRT : ASHIFTRT,
1993                                   result, GEN_INT (sign_shift_dn),
1994                                   NULL_RTX, 0, OPTAB_LIB_WIDEN);
1995
1996   return result;
1997
1998  fail:
1999   delete_insns_since (start);
2000   return NULL;
2001 }
2002
2003 /* Extract a bit field that is split across two words
2004    and return an RTX for the result.
2005
2006    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2007    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2008    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
2009
2010 static rtx
2011 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
2012                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
2013 {
2014   unsigned int unit;
2015   unsigned int bitsdone = 0;
2016   rtx result = NULL_RTX;
2017   int first = 1;
2018
2019   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2020      much at a time.  */
2021   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2022     unit = BITS_PER_WORD;
2023   else
2024     {
2025       unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2026       if (0 && bitsize / unit > 2)
2027         {
2028           rtx tmp = extract_force_align_mem_bit_field (op0, bitsize, bitpos,
2029                                                        unsignedp);
2030           if (tmp)
2031             return tmp;
2032         }
2033     }
2034
2035   while (bitsdone < bitsize)
2036     {
2037       unsigned HOST_WIDE_INT thissize;
2038       rtx part, word;
2039       unsigned HOST_WIDE_INT thispos;
2040       unsigned HOST_WIDE_INT offset;
2041
2042       offset = (bitpos + bitsdone) / unit;
2043       thispos = (bitpos + bitsdone) % unit;
2044
2045       /* THISSIZE must not overrun a word boundary.  Otherwise,
2046          extract_fixed_bit_field will call us again, and we will mutually
2047          recurse forever.  */
2048       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2049       thissize = MIN (thissize, unit - thispos);
2050
2051       /* If OP0 is a register, then handle OFFSET here.
2052
2053          When handling multiword bitfields, extract_bit_field may pass
2054          down a word_mode SUBREG of a larger REG for a bitfield that actually
2055          crosses a word boundary.  Thus, for a SUBREG, we must find
2056          the current word starting from the base register.  */
2057       if (GET_CODE (op0) == SUBREG)
2058         {
2059           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
2060           word = operand_subword_force (SUBREG_REG (op0), word_offset,
2061                                         GET_MODE (SUBREG_REG (op0)));
2062           offset = 0;
2063         }
2064       else if (REG_P (op0))
2065         {
2066           word = operand_subword_force (op0, offset, GET_MODE (op0));
2067           offset = 0;
2068         }
2069       else
2070         word = op0;
2071
2072       /* Extract the parts in bit-counting order,
2073          whose meaning is determined by BYTES_PER_UNIT.
2074          OFFSET is in UNITs, and UNIT is in bits.
2075          extract_fixed_bit_field wants offset in bytes.  */
2076       part = extract_fixed_bit_field (word_mode, word,
2077                                       offset * unit / BITS_PER_UNIT,
2078                                       thissize, thispos, 0, 1);
2079       bitsdone += thissize;
2080
2081       /* Shift this part into place for the result.  */
2082       if (BYTES_BIG_ENDIAN)
2083         {
2084           if (bitsize != bitsdone)
2085             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2086                                  build_int_cst (NULL_TREE, bitsize - bitsdone),
2087                                  0, 1);
2088         }
2089       else
2090         {
2091           if (bitsdone != thissize)
2092             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2093                                  build_int_cst (NULL_TREE,
2094                                                 bitsdone - thissize), 0, 1);
2095         }
2096
2097       if (first)
2098         result = part;
2099       else
2100         /* Combine the parts with bitwise or.  This works
2101            because we extracted each part as an unsigned bit field.  */
2102         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2103                                OPTAB_LIB_WIDEN);
2104
2105       first = 0;
2106     }
2107
2108   /* Unsigned bit field: we are done.  */
2109   if (unsignedp)
2110     return result;
2111   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2112   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2113                          build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
2114                          NULL_RTX, 0);
2115   return expand_shift (RSHIFT_EXPR, word_mode, result,
2116                        build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
2117                        NULL_RTX, 0);
2118 }
2119 \f
2120 /* Add INC into TARGET.  */
2121
2122 void
2123 expand_inc (rtx target, rtx inc)
2124 {
2125   rtx value = expand_binop (GET_MODE (target), add_optab,
2126                             target, inc,
2127                             target, 0, OPTAB_LIB_WIDEN);
2128   if (value != target)
2129     emit_move_insn (target, value);
2130 }
2131
2132 /* Subtract DEC from TARGET.  */
2133
2134 void
2135 expand_dec (rtx target, rtx dec)
2136 {
2137   rtx value = expand_binop (GET_MODE (target), sub_optab,
2138                             target, dec,
2139                             target, 0, OPTAB_LIB_WIDEN);
2140   if (value != target)
2141     emit_move_insn (target, value);
2142 }
2143 \f
2144 /* Output a shift instruction for expression code CODE,
2145    with SHIFTED being the rtx for the value to shift,
2146    and AMOUNT the tree for the amount to shift by.
2147    Store the result in the rtx TARGET, if that is convenient.
2148    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2149    Return the rtx for where the value is.  */
2150
2151 rtx
2152 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2153               tree amount, rtx target, int unsignedp)
2154 {
2155   rtx op1, temp = 0;
2156   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2157   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2158   int try;
2159
2160   /* Previously detected shift-counts computed by NEGATE_EXPR
2161      and shifted in the other direction; but that does not work
2162      on all machines.  */
2163
2164   op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0);
2165
2166   if (SHIFT_COUNT_TRUNCATED)
2167     {
2168       if (GET_CODE (op1) == CONST_INT
2169           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2170               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2171         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2172                        % GET_MODE_BITSIZE (mode));
2173       else if (GET_CODE (op1) == SUBREG
2174                && subreg_lowpart_p (op1))
2175         op1 = SUBREG_REG (op1);
2176     }
2177
2178   if (op1 == const0_rtx)
2179     return shifted;
2180
2181   /* Check whether its cheaper to implement a left shift by a constant
2182      bit count by a sequence of additions.  */
2183   if (code == LSHIFT_EXPR
2184       && GET_CODE (op1) == CONST_INT
2185       && INTVAL (op1) > 0
2186       && INTVAL (op1) < GET_MODE_BITSIZE (mode)
2187       && shift_cost[mode][INTVAL (op1)] > INTVAL (op1) * add_cost[mode])
2188     {
2189       int i;
2190       for (i = 0; i < INTVAL (op1); i++)
2191         {
2192           temp = force_reg (mode, shifted);
2193           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2194                                   unsignedp, OPTAB_LIB_WIDEN);
2195         }
2196       return shifted;
2197     }
2198
2199   for (try = 0; temp == 0 && try < 3; try++)
2200     {
2201       enum optab_methods methods;
2202
2203       if (try == 0)
2204         methods = OPTAB_DIRECT;
2205       else if (try == 1)
2206         methods = OPTAB_WIDEN;
2207       else
2208         methods = OPTAB_LIB_WIDEN;
2209
2210       if (rotate)
2211         {
2212           /* Widening does not work for rotation.  */
2213           if (methods == OPTAB_WIDEN)
2214             continue;
2215           else if (methods == OPTAB_LIB_WIDEN)
2216             {
2217               /* If we have been unable to open-code this by a rotation,
2218                  do it as the IOR of two shifts.  I.e., to rotate A
2219                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2220                  where C is the bitsize of A.
2221
2222                  It is theoretically possible that the target machine might
2223                  not be able to perform either shift and hence we would
2224                  be making two libcalls rather than just the one for the
2225                  shift (similarly if IOR could not be done).  We will allow
2226                  this extremely unlikely lossage to avoid complicating the
2227                  code below.  */
2228
2229               rtx subtarget = target == shifted ? 0 : target;
2230               rtx temp1;
2231               tree type = TREE_TYPE (amount);
2232               tree new_amount = make_tree (type, op1);
2233               tree other_amount
2234                 = fold (build2 (MINUS_EXPR, type, convert
2235                                 (type, build_int_cst
2236                                  (NULL_TREE, GET_MODE_BITSIZE (mode))),
2237                                 amount));
2238
2239               shifted = force_reg (mode, shifted);
2240
2241               temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2242                                    mode, shifted, new_amount, subtarget, 1);
2243               temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2244                                     mode, shifted, other_amount, 0, 1);
2245               return expand_binop (mode, ior_optab, temp, temp1, target,
2246                                    unsignedp, methods);
2247             }
2248
2249           temp = expand_binop (mode,
2250                                left ? rotl_optab : rotr_optab,
2251                                shifted, op1, target, unsignedp, methods);
2252
2253           /* If we don't have the rotate, but we are rotating by a constant
2254              that is in range, try a rotate in the opposite direction.  */
2255
2256           if (temp == 0 && GET_CODE (op1) == CONST_INT
2257               && INTVAL (op1) > 0
2258               && (unsigned int) INTVAL (op1) < GET_MODE_BITSIZE (mode))
2259             temp = expand_binop (mode,
2260                                  left ? rotr_optab : rotl_optab,
2261                                  shifted,
2262                                  GEN_INT (GET_MODE_BITSIZE (mode)
2263                                           - INTVAL (op1)),
2264                                  target, unsignedp, methods);
2265         }
2266       else if (unsignedp)
2267         temp = expand_binop (mode,
2268                              left ? ashl_optab : lshr_optab,
2269                              shifted, op1, target, unsignedp, methods);
2270
2271       /* Do arithmetic shifts.
2272          Also, if we are going to widen the operand, we can just as well
2273          use an arithmetic right-shift instead of a logical one.  */
2274       if (temp == 0 && ! rotate
2275           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2276         {
2277           enum optab_methods methods1 = methods;
2278
2279           /* If trying to widen a log shift to an arithmetic shift,
2280              don't accept an arithmetic shift of the same size.  */
2281           if (unsignedp)
2282             methods1 = OPTAB_MUST_WIDEN;
2283
2284           /* Arithmetic shift */
2285
2286           temp = expand_binop (mode,
2287                                left ? ashl_optab : ashr_optab,
2288                                shifted, op1, target, unsignedp, methods1);
2289         }
2290
2291       /* We used to try extzv here for logical right shifts, but that was
2292          only useful for one machine, the VAX, and caused poor code
2293          generation there for lshrdi3, so the code was deleted and a
2294          define_expand for lshrsi3 was added to vax.md.  */
2295     }
2296
2297   gcc_assert (temp);
2298   return temp;
2299 }
2300 \f
2301 enum alg_code { alg_unknown, alg_zero, alg_m, alg_shift,
2302                   alg_add_t_m2, alg_sub_t_m2,
2303                   alg_add_factor, alg_sub_factor,
2304                   alg_add_t2_m, alg_sub_t2_m };
2305
2306 /* This structure holds the "cost" of a multiply sequence.  The
2307    "cost" field holds the total rtx_cost of every operator in the
2308    synthetic multiplication sequence, hence cost(a op b) is defined
2309    as rtx_cost(op) + cost(a) + cost(b), where cost(leaf) is zero.
2310    The "latency" field holds the minimum possible latency of the
2311    synthetic multiply, on a hypothetical infinitely parallel CPU.
2312    This is the critical path, or the maximum height, of the expression
2313    tree which is the sum of rtx_costs on the most expensive path from
2314    any leaf to the root.  Hence latency(a op b) is defined as zero for
2315    leaves and rtx_cost(op) + max(latency(a), latency(b)) otherwise.  */
2316
2317 struct mult_cost {
2318   short cost;     /* Total rtx_cost of the multiplication sequence.  */
2319   short latency;  /* The latency of the multiplication sequence.  */
2320 };
2321
2322 /* This macro is used to compare a pointer to a mult_cost against an
2323    single integer "rtx_cost" value.  This is equivalent to the macro
2324    CHEAPER_MULT_COST(X,Z) where Z = {Y,Y}.  */
2325 #define MULT_COST_LESS(X,Y) ((X)->cost < (Y)    \
2326                              || ((X)->cost == (Y) && (X)->latency < (Y)))
2327
2328 /* This macro is used to compare two pointers to mult_costs against
2329    each other.  The macro returns true if X is cheaper than Y.
2330    Currently, the cheaper of two mult_costs is the one with the
2331    lower "cost".  If "cost"s are tied, the lower latency is cheaper.  */
2332 #define CHEAPER_MULT_COST(X,Y)  ((X)->cost < (Y)->cost          \
2333                                  || ((X)->cost == (Y)->cost     \
2334                                      && (X)->latency < (Y)->latency))
2335
2336 /* This structure records a sequence of operations.
2337    `ops' is the number of operations recorded.
2338    `cost' is their total cost.
2339    The operations are stored in `op' and the corresponding
2340    logarithms of the integer coefficients in `log'.
2341
2342    These are the operations:
2343    alg_zero             total := 0;
2344    alg_m                total := multiplicand;
2345    alg_shift            total := total * coeff
2346    alg_add_t_m2         total := total + multiplicand * coeff;
2347    alg_sub_t_m2         total := total - multiplicand * coeff;
2348    alg_add_factor       total := total * coeff + total;
2349    alg_sub_factor       total := total * coeff - total;
2350    alg_add_t2_m         total := total * coeff + multiplicand;
2351    alg_sub_t2_m         total := total * coeff - multiplicand;
2352
2353    The first operand must be either alg_zero or alg_m.  */
2354
2355 struct algorithm
2356 {
2357   struct mult_cost cost;
2358   short ops;
2359   /* The size of the OP and LOG fields are not directly related to the
2360      word size, but the worst-case algorithms will be if we have few
2361      consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2362      In that case we will generate shift-by-2, add, shift-by-2, add,...,
2363      in total wordsize operations.  */
2364   enum alg_code op[MAX_BITS_PER_WORD];
2365   char log[MAX_BITS_PER_WORD];
2366 };
2367
2368 /* The entry for our multiplication cache/hash table.  */
2369 struct alg_hash_entry {
2370   /* The number we are multiplying by.  */
2371   unsigned int t;
2372
2373   /* The mode in which we are multiplying something by T.  */
2374   enum machine_mode mode;
2375
2376   /* The best multiplication algorithm for t.  */
2377   enum alg_code alg;
2378 };
2379
2380 /* The number of cache/hash entries.  */
2381 #define NUM_ALG_HASH_ENTRIES 307
2382
2383 /* Each entry of ALG_HASH caches alg_code for some integer.  This is
2384    actually a hash table.  If we have a collision, that the older
2385    entry is kicked out.  */
2386 static struct alg_hash_entry alg_hash[NUM_ALG_HASH_ENTRIES];
2387
2388 /* Indicates the type of fixup needed after a constant multiplication.
2389    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2390    the result should be negated, and ADD_VARIANT means that the
2391    multiplicand should be added to the result.  */
2392 enum mult_variant {basic_variant, negate_variant, add_variant};
2393
2394 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2395                         const struct mult_cost *, enum machine_mode mode);
2396 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2397                                  struct algorithm *, enum mult_variant *, int);
2398 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2399                               const struct algorithm *, enum mult_variant);
2400 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2401                                                  int, rtx *, int *, int *);
2402 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2403 static rtx extract_high_half (enum machine_mode, rtx);
2404 static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2405 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2406                                        int, int);
2407 /* Compute and return the best algorithm for multiplying by T.
2408    The algorithm must cost less than cost_limit
2409    If retval.cost >= COST_LIMIT, no algorithm was found and all
2410    other field of the returned struct are undefined.
2411    MODE is the machine mode of the multiplication.  */
2412
2413 static void
2414 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2415             const struct mult_cost *cost_limit, enum machine_mode mode)
2416 {
2417   int m;
2418   struct algorithm *alg_in, *best_alg;
2419   struct mult_cost best_cost;
2420   struct mult_cost new_limit;
2421   int op_cost, op_latency;
2422   unsigned HOST_WIDE_INT q;
2423   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2424   int hash_index;
2425   bool cache_hit = false;
2426   enum alg_code cache_alg = alg_zero;
2427
2428   /* Indicate that no algorithm is yet found.  If no algorithm
2429      is found, this value will be returned and indicate failure.  */
2430   alg_out->cost.cost = cost_limit->cost + 1;
2431   alg_out->cost.latency = cost_limit->latency + 1;
2432
2433   if (cost_limit->cost < 0
2434       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2435     return;
2436
2437   /* Restrict the bits of "t" to the multiplication's mode.  */
2438   t &= GET_MODE_MASK (mode);
2439
2440   /* t == 1 can be done in zero cost.  */
2441   if (t == 1)
2442     {
2443       alg_out->ops = 1;
2444       alg_out->cost.cost = 0;
2445       alg_out->cost.latency = 0;
2446       alg_out->op[0] = alg_m;
2447       return;
2448     }
2449
2450   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2451      fail now.  */
2452   if (t == 0)
2453     {
2454       if (MULT_COST_LESS (cost_limit, zero_cost))
2455         return;
2456       else
2457         {
2458           alg_out->ops = 1;
2459           alg_out->cost.cost = zero_cost;
2460           alg_out->cost.latency = zero_cost;
2461           alg_out->op[0] = alg_zero;
2462           return;
2463         }
2464     }
2465
2466   /* We'll be needing a couple extra algorithm structures now.  */
2467
2468   alg_in = alloca (sizeof (struct algorithm));
2469   best_alg = alloca (sizeof (struct algorithm));
2470   best_cost = *cost_limit;
2471
2472   /* Compute the hash index.  */
2473   hash_index = (t ^ (unsigned int) mode) % NUM_ALG_HASH_ENTRIES;
2474
2475   /* See if we already know what to do for T.  */
2476   if (alg_hash[hash_index].t == t
2477       && alg_hash[hash_index].mode == mode
2478       && alg_hash[hash_index].alg != alg_unknown)
2479     {
2480       cache_hit = true;
2481       cache_alg = alg_hash[hash_index].alg;
2482       switch (cache_alg)
2483         {
2484         case alg_shift:
2485           goto do_alg_shift;
2486
2487         case alg_add_t_m2:
2488         case alg_sub_t_m2:
2489           goto do_alg_addsub_t_m2;
2490
2491         case alg_add_factor:
2492         case alg_sub_factor:
2493           goto do_alg_addsub_factor;
2494
2495         case alg_add_t2_m:
2496           goto do_alg_add_t2_m;
2497
2498         case alg_sub_t2_m:
2499           goto do_alg_sub_t2_m;
2500
2501         default:
2502           gcc_unreachable ();
2503         }
2504     }
2505
2506   /* If we have a group of zero bits at the low-order part of T, try
2507      multiplying by the remaining bits and then doing a shift.  */
2508
2509   if ((t & 1) == 0)
2510     {
2511     do_alg_shift:
2512       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2513       if (m < maxm)
2514         {
2515           q = t >> m;
2516           /* The function expand_shift will choose between a shift and
2517              a sequence of additions, so the observed cost is given as
2518              MIN (m * add_cost[mode], shift_cost[mode][m]).  */
2519           op_cost = m * add_cost[mode];
2520           if (shift_cost[mode][m] < op_cost)
2521             op_cost = shift_cost[mode][m];
2522           new_limit.cost = best_cost.cost - op_cost;
2523           new_limit.latency = best_cost.latency - op_cost;
2524           synth_mult (alg_in, q, &new_limit, mode);
2525
2526           alg_in->cost.cost += op_cost;
2527           alg_in->cost.latency += op_cost;
2528           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2529             {
2530               struct algorithm *x;
2531               best_cost = alg_in->cost;
2532               x = alg_in, alg_in = best_alg, best_alg = x;
2533               best_alg->log[best_alg->ops] = m;
2534               best_alg->op[best_alg->ops] = alg_shift;
2535             }
2536         }
2537       if (cache_hit)
2538         goto done;
2539     }
2540
2541   /* If we have an odd number, add or subtract one.  */
2542   if ((t & 1) != 0)
2543     {
2544       unsigned HOST_WIDE_INT w;
2545
2546     do_alg_addsub_t_m2:
2547       for (w = 1; (w & t) != 0; w <<= 1)
2548         ;
2549       /* If T was -1, then W will be zero after the loop.  This is another
2550          case where T ends with ...111.  Handling this with (T + 1) and
2551          subtract 1 produces slightly better code and results in algorithm
2552          selection much faster than treating it like the ...0111 case
2553          below.  */
2554       if (w == 0
2555           || (w > 2
2556               /* Reject the case where t is 3.
2557                  Thus we prefer addition in that case.  */
2558               && t != 3))
2559         {
2560           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2561
2562           op_cost = add_cost[mode];
2563           new_limit.cost = best_cost.cost - op_cost;
2564           new_limit.latency = best_cost.latency - op_cost;
2565           synth_mult (alg_in, t + 1, &new_limit, mode);
2566
2567           alg_in->cost.cost += op_cost;
2568           alg_in->cost.latency += op_cost;
2569           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2570             {
2571               struct algorithm *x;
2572               best_cost = alg_in->cost;
2573               x = alg_in, alg_in = best_alg, best_alg = x;
2574               best_alg->log[best_alg->ops] = 0;
2575               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2576             }
2577         }
2578       else
2579         {
2580           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2581
2582           op_cost = add_cost[mode];
2583           new_limit.cost = best_cost.cost - op_cost;
2584           new_limit.latency = best_cost.latency - op_cost;
2585           synth_mult (alg_in, t - 1, &new_limit, mode);
2586
2587           alg_in->cost.cost += op_cost;
2588           alg_in->cost.latency += op_cost;
2589           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2590             {
2591               struct algorithm *x;
2592               best_cost = alg_in->cost;
2593               x = alg_in, alg_in = best_alg, best_alg = x;
2594               best_alg->log[best_alg->ops] = 0;
2595               best_alg->op[best_alg->ops] = alg_add_t_m2;
2596             }
2597         }
2598       if (cache_hit)
2599         goto done;
2600     }
2601
2602   /* Look for factors of t of the form
2603      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2604      If we find such a factor, we can multiply by t using an algorithm that
2605      multiplies by q, shift the result by m and add/subtract it to itself.
2606
2607      We search for large factors first and loop down, even if large factors
2608      are less probable than small; if we find a large factor we will find a
2609      good sequence quickly, and therefore be able to prune (by decreasing
2610      COST_LIMIT) the search.  */
2611
2612  do_alg_addsub_factor:
2613   for (m = floor_log2 (t - 1); m >= 2; m--)
2614     {
2615       unsigned HOST_WIDE_INT d;
2616
2617       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2618       if (t % d == 0 && t > d && m < maxm
2619           && (!cache_hit || cache_alg == alg_add_factor))
2620         {
2621           /* If the target has a cheap shift-and-add instruction use
2622              that in preference to a shift insn followed by an add insn.
2623              Assume that the shift-and-add is "atomic" with a latency
2624              equal to its cost, otherwise assume that on superscalar
2625              hardware the shift may be executed concurrently with the
2626              earlier steps in the algorithm.  */
2627           op_cost = add_cost[mode] + shift_cost[mode][m];
2628           if (shiftadd_cost[mode][m] < op_cost)
2629             {
2630               op_cost = shiftadd_cost[mode][m];
2631               op_latency = op_cost;
2632             }
2633           else
2634             op_latency = add_cost[mode];
2635
2636           new_limit.cost = best_cost.cost - op_cost;
2637           new_limit.latency = best_cost.latency - op_latency;
2638           synth_mult (alg_in, t / d, &new_limit, mode);
2639
2640           alg_in->cost.cost += op_cost;
2641           alg_in->cost.latency += op_latency;
2642           if (alg_in->cost.latency < op_cost)
2643             alg_in->cost.latency = op_cost;
2644           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2645             {
2646               struct algorithm *x;
2647               best_cost = alg_in->cost;
2648               x = alg_in, alg_in = best_alg, best_alg = x;
2649               best_alg->log[best_alg->ops] = m;
2650               best_alg->op[best_alg->ops] = alg_add_factor;
2651             }
2652           /* Other factors will have been taken care of in the recursion.  */
2653           break;
2654         }
2655
2656       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2657       if (t % d == 0 && t > d && m < maxm
2658           && (!cache_hit || cache_alg == alg_sub_factor))
2659         {
2660           /* If the target has a cheap shift-and-subtract insn use
2661              that in preference to a shift insn followed by a sub insn.
2662              Assume that the shift-and-sub is "atomic" with a latency
2663              equal to it's cost, otherwise assume that on superscalar
2664              hardware the shift may be executed concurrently with the
2665              earlier steps in the algorithm.  */
2666           op_cost = add_cost[mode] + shift_cost[mode][m];
2667           if (shiftsub_cost[mode][m] < op_cost)
2668             {
2669               op_cost = shiftsub_cost[mode][m];
2670               op_latency = op_cost;
2671             }
2672           else
2673             op_latency = add_cost[mode];
2674
2675           new_limit.cost = best_cost.cost - op_cost;
2676           new_limit.latency = best_cost.latency - op_latency;
2677           synth_mult (alg_in, t / d, &new_limit, mode);
2678
2679           alg_in->cost.cost += op_cost;
2680           alg_in->cost.latency += op_latency;
2681           if (alg_in->cost.latency < op_cost)
2682             alg_in->cost.latency = op_cost;
2683           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2684             {
2685               struct algorithm *x;
2686               best_cost = alg_in->cost;
2687               x = alg_in, alg_in = best_alg, best_alg = x;
2688               best_alg->log[best_alg->ops] = m;
2689               best_alg->op[best_alg->ops] = alg_sub_factor;
2690             }
2691           break;
2692         }
2693     }
2694   if (cache_hit)
2695     goto done;
2696
2697   /* Try shift-and-add (load effective address) instructions,
2698      i.e. do a*3, a*5, a*9.  */
2699   if ((t & 1) != 0)
2700     {
2701     do_alg_add_t2_m:
2702       q = t - 1;
2703       q = q & -q;
2704       m = exact_log2 (q);
2705       if (m >= 0 && m < maxm)
2706         {
2707           op_cost = shiftadd_cost[mode][m];
2708           new_limit.cost = best_cost.cost - op_cost;
2709           new_limit.latency = best_cost.latency - op_cost;
2710           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2711
2712           alg_in->cost.cost += op_cost;
2713           alg_in->cost.latency += op_cost;
2714           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2715             {
2716               struct algorithm *x;
2717               best_cost = alg_in->cost;
2718               x = alg_in, alg_in = best_alg, best_alg = x;
2719               best_alg->log[best_alg->ops] = m;
2720               best_alg->op[best_alg->ops] = alg_add_t2_m;
2721             }
2722         }
2723       if (cache_hit)
2724         goto done;
2725
2726     do_alg_sub_t2_m:
2727       q = t + 1;
2728       q = q & -q;
2729       m = exact_log2 (q);
2730       if (m >= 0 && m < maxm)
2731         {
2732           op_cost = shiftsub_cost[mode][m];
2733           new_limit.cost = best_cost.cost - op_cost;
2734           new_limit.latency = best_cost.latency - op_cost;
2735           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2736
2737           alg_in->cost.cost += op_cost;
2738           alg_in->cost.latency += op_cost;
2739           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2740             {
2741               struct algorithm *x;
2742               best_cost = alg_in->cost;
2743               x = alg_in, alg_in = best_alg, best_alg = x;
2744               best_alg->log[best_alg->ops] = m;
2745               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2746             }
2747         }
2748       if (cache_hit)
2749         goto done;
2750     }
2751
2752  done:
2753   /* If best_cost has not decreased, we have not found any algorithm.  */
2754   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2755     return;
2756
2757   /* Cache the result.  */
2758   if (!cache_hit)
2759     {
2760       alg_hash[hash_index].t = t;
2761       alg_hash[hash_index].mode = mode;
2762       alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2763     }
2764
2765   /* If we are getting a too long sequence for `struct algorithm'
2766      to record, make this search fail.  */
2767   if (best_alg->ops == MAX_BITS_PER_WORD)
2768     return;
2769
2770   /* Copy the algorithm from temporary space to the space at alg_out.
2771      We avoid using structure assignment because the majority of
2772      best_alg is normally undefined, and this is a critical function.  */
2773   alg_out->ops = best_alg->ops + 1;
2774   alg_out->cost = best_cost;
2775   memcpy (alg_out->op, best_alg->op,
2776           alg_out->ops * sizeof *alg_out->op);
2777   memcpy (alg_out->log, best_alg->log,
2778           alg_out->ops * sizeof *alg_out->log);
2779 }
2780 \f
2781 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2782    Try three variations:
2783
2784        - a shift/add sequence based on VAL itself
2785        - a shift/add sequence based on -VAL, followed by a negation
2786        - a shift/add sequence based on VAL - 1, followed by an addition.
2787
2788    Return true if the cheapest of these cost less than MULT_COST,
2789    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2790
2791 static bool
2792 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2793                      struct algorithm *alg, enum mult_variant *variant,
2794                      int mult_cost)
2795 {
2796   struct algorithm alg2;
2797   struct mult_cost limit;
2798   int op_cost;
2799
2800   *variant = basic_variant;
2801   limit.cost = mult_cost;
2802   limit.latency = mult_cost;
2803   synth_mult (alg, val, &limit, mode);
2804
2805   /* This works only if the inverted value actually fits in an
2806      `unsigned int' */
2807   if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2808     {
2809       op_cost = neg_cost[mode];
2810       if (MULT_COST_LESS (&alg->cost, mult_cost))
2811         {
2812           limit.cost = alg->cost.cost - op_cost;
2813           limit.latency = alg->cost.latency - op_cost;
2814         }
2815       else
2816         {
2817           limit.cost = mult_cost - op_cost;
2818           limit.latency = mult_cost - op_cost;
2819         }
2820
2821       synth_mult (&alg2, -val, &limit, mode);
2822       alg2.cost.cost += op_cost;
2823       alg2.cost.latency += op_cost;
2824       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2825         *alg = alg2, *variant = negate_variant;
2826     }
2827
2828   /* This proves very useful for division-by-constant.  */
2829   op_cost = add_cost[mode];
2830   if (MULT_COST_LESS (&alg->cost, mult_cost))
2831     {
2832       limit.cost = alg->cost.cost - op_cost;
2833       limit.latency = alg->cost.latency - op_cost;
2834     }
2835   else
2836     {
2837       limit.cost = mult_cost - op_cost;
2838       limit.latency = mult_cost - op_cost;
2839     }
2840
2841   synth_mult (&alg2, val - 1, &limit, mode);
2842   alg2.cost.cost += op_cost;
2843   alg2.cost.latency += op_cost;
2844   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2845     *alg = alg2, *variant = add_variant;
2846
2847   return MULT_COST_LESS (&alg->cost, mult_cost);
2848 }
2849
2850 /* A subroutine of expand_mult, used for constant multiplications.
2851    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2852    convenient.  Use the shift/add sequence described by ALG and apply
2853    the final fixup specified by VARIANT.  */
2854
2855 static rtx
2856 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2857                    rtx target, const struct algorithm *alg,
2858                    enum mult_variant variant)
2859 {
2860   HOST_WIDE_INT val_so_far;
2861   rtx insn, accum, tem;
2862   int opno;
2863   enum machine_mode nmode;
2864
2865   /* Avoid referencing memory over and over.
2866      For speed, but also for correctness when mem is volatile.  */
2867   if (MEM_P (op0))
2868     op0 = force_reg (mode, op0);
2869
2870   /* ACCUM starts out either as OP0 or as a zero, depending on
2871      the first operation.  */
2872
2873   if (alg->op[0] == alg_zero)
2874     {
2875       accum = copy_to_mode_reg (mode, const0_rtx);
2876       val_so_far = 0;
2877     }
2878   else if (alg->op[0] == alg_m)
2879     {
2880       accum = copy_to_mode_reg (mode, op0);
2881       val_so_far = 1;
2882     }
2883   else
2884     gcc_unreachable ();
2885
2886   for (opno = 1; opno < alg->ops; opno++)
2887     {
2888       int log = alg->log[opno];
2889       rtx shift_subtarget = optimize ? 0 : accum;
2890       rtx add_target
2891         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2892            && !optimize)
2893           ? target : 0;
2894       rtx accum_target = optimize ? 0 : accum;
2895
2896       switch (alg->op[opno])
2897         {
2898         case alg_shift:
2899           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2900                                 build_int_cst (NULL_TREE, log),
2901                                 NULL_RTX, 0);
2902           val_so_far <<= log;
2903           break;
2904
2905         case alg_add_t_m2:
2906           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2907                               build_int_cst (NULL_TREE, log),
2908                               NULL_RTX, 0);
2909           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2910                                  add_target ? add_target : accum_target);
2911           val_so_far += (HOST_WIDE_INT) 1 << log;
2912           break;
2913
2914         case alg_sub_t_m2:
2915           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2916                               build_int_cst (NULL_TREE, log),
2917                               NULL_RTX, 0);
2918           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2919                                  add_target ? add_target : accum_target);
2920           val_so_far -= (HOST_WIDE_INT) 1 << log;
2921           break;
2922
2923         case alg_add_t2_m:
2924           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2925                                 build_int_cst (NULL_TREE, log),
2926                                 shift_subtarget,
2927                                 0);
2928           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2929                                  add_target ? add_target : accum_target);
2930           val_so_far = (val_so_far << log) + 1;
2931           break;
2932
2933         case alg_sub_t2_m:
2934           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2935                                 build_int_cst (NULL_TREE, log),
2936                                 shift_subtarget, 0);
2937           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2938                                  add_target ? add_target : accum_target);
2939           val_so_far = (val_so_far << log) - 1;
2940           break;
2941
2942         case alg_add_factor:
2943           tem = expand_shift (LSHIFT_EXPR, mode, accum,
2944                               build_int_cst (NULL_TREE, log),
2945                               NULL_RTX, 0);
2946           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2947                                  add_target ? add_target : accum_target);
2948           val_so_far += val_so_far << log;
2949           break;
2950
2951         case alg_sub_factor:
2952           tem = expand_shift (LSHIFT_EXPR, mode, accum,
2953                               build_int_cst (NULL_TREE, log),
2954                               NULL_RTX, 0);
2955           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2956                                  (add_target
2957                                   ? add_target : (optimize ? 0 : tem)));
2958           val_so_far = (val_so_far << log) - val_so_far;
2959           break;
2960
2961         default:
2962           gcc_unreachable ();
2963         }
2964
2965       /* Write a REG_EQUAL note on the last insn so that we can cse
2966          multiplication sequences.  Note that if ACCUM is a SUBREG,
2967          we've set the inner register and must properly indicate
2968          that.  */
2969
2970       tem = op0, nmode = mode;
2971       if (GET_CODE (accum) == SUBREG)
2972         {
2973           nmode = GET_MODE (SUBREG_REG (accum));
2974           tem = gen_lowpart (nmode, op0);
2975         }
2976
2977       insn = get_last_insn ();
2978       set_unique_reg_note (insn, REG_EQUAL,
2979                            gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)));
2980     }
2981
2982   if (variant == negate_variant)
2983     {
2984       val_so_far = -val_so_far;
2985       accum = expand_unop (mode, neg_optab, accum, target, 0);
2986     }
2987   else if (variant == add_variant)
2988     {
2989       val_so_far = val_so_far + 1;
2990       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
2991     }
2992
2993   /* Compare only the bits of val and val_so_far that are significant
2994      in the result mode, to avoid sign-/zero-extension confusion.  */
2995   val &= GET_MODE_MASK (mode);
2996   val_so_far &= GET_MODE_MASK (mode);
2997   gcc_assert (val == val_so_far);
2998
2999   return accum;
3000 }
3001
3002 /* Perform a multiplication and return an rtx for the result.
3003    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3004    TARGET is a suggestion for where to store the result (an rtx).
3005
3006    We check specially for a constant integer as OP1.
3007    If you want this check for OP0 as well, then before calling
3008    you should swap the two operands if OP0 would be constant.  */
3009
3010 rtx
3011 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3012              int unsignedp)
3013 {
3014   rtx const_op1 = op1;
3015   enum mult_variant variant;
3016   struct algorithm algorithm;
3017
3018   /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3019      less than or equal in size to `unsigned int' this doesn't matter.
3020      If the mode is larger than `unsigned int', then synth_mult works only
3021      if the constant value exactly fits in an `unsigned int' without any
3022      truncation.  This means that multiplying by negative values does
3023      not work; results are off by 2^32 on a 32 bit machine.  */
3024
3025   /* If we are multiplying in DImode, it may still be a win
3026      to try to work with shifts and adds.  */
3027   if (GET_CODE (op1) == CONST_DOUBLE
3028       && GET_MODE_CLASS (GET_MODE (op1)) == MODE_INT
3029       && HOST_BITS_PER_INT >= BITS_PER_WORD
3030       && CONST_DOUBLE_HIGH (op1) == 0)
3031     const_op1 = GEN_INT (CONST_DOUBLE_LOW (op1));
3032   else if (HOST_BITS_PER_INT < GET_MODE_BITSIZE (mode)
3033            && GET_CODE (op1) == CONST_INT
3034            && INTVAL (op1) < 0)
3035     const_op1 = 0;
3036
3037   /* We used to test optimize here, on the grounds that it's better to
3038      produce a smaller program when -O is not used.
3039      But this causes such a terrible slowdown sometimes
3040      that it seems better to use synth_mult always.  */
3041
3042   if (const_op1 && GET_CODE (const_op1) == CONST_INT
3043       && (unsignedp || !flag_trapv))
3044     {
3045       HOST_WIDE_INT coeff = INTVAL (const_op1);
3046       int mult_cost;
3047
3048       /* Special case powers of two.  */
3049       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3050         {
3051           if (coeff == 0)
3052             return const0_rtx;
3053           if (coeff == 1)
3054             return op0;
3055           return expand_shift (LSHIFT_EXPR, mode, op0,
3056                                build_int_cst (NULL_TREE, floor_log2 (coeff)),
3057                                target, unsignedp);
3058         }
3059
3060       mult_cost = rtx_cost (gen_rtx_MULT (mode, op0, op1), SET);
3061       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3062                                mult_cost))
3063         return expand_mult_const (mode, op0, coeff, target,
3064                                   &algorithm, variant);
3065     }
3066
3067   if (GET_CODE (op0) == CONST_DOUBLE)
3068     {
3069       rtx temp = op0;
3070       op0 = op1;
3071       op1 = temp;
3072     }
3073
3074   /* Expand x*2.0 as x+x.  */
3075   if (GET_CODE (op1) == CONST_DOUBLE
3076       && GET_MODE_CLASS (mode) == MODE_FLOAT)
3077     {
3078       REAL_VALUE_TYPE d;
3079       REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3080
3081       if (REAL_VALUES_EQUAL (d, dconst2))
3082         {
3083           op0 = force_reg (GET_MODE (op0), op0);
3084           return expand_binop (mode, add_optab, op0, op0,
3085                                target, unsignedp, OPTAB_LIB_WIDEN);
3086         }
3087     }
3088
3089   /* This used to use umul_optab if unsigned, but for non-widening multiply
3090      there is no difference between signed and unsigned.  */
3091   op0 = expand_binop (mode,
3092                       ! unsignedp
3093                       && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3094                       ? smulv_optab : smul_optab,
3095                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3096   gcc_assert (op0);
3097   return op0;
3098 }
3099 \f
3100 /* Return the smallest n such that 2**n >= X.  */
3101
3102 int
3103 ceil_log2 (unsigned HOST_WIDE_INT x)
3104 {
3105   return floor_log2 (x - 1) + 1;
3106 }
3107
3108 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3109    replace division by D, and put the least significant N bits of the result
3110    in *MULTIPLIER_PTR and return the most significant bit.
3111
3112    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3113    needed precision is in PRECISION (should be <= N).
3114
3115    PRECISION should be as small as possible so this function can choose
3116    multiplier more freely.
3117
3118    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3119    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3120
3121    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3122    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3123
3124 static
3125 unsigned HOST_WIDE_INT
3126 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3127                    rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr)
3128 {
3129   HOST_WIDE_INT mhigh_hi, mlow_hi;
3130   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3131   int lgup, post_shift;
3132   int pow, pow2;
3133   unsigned HOST_WIDE_INT nl, dummy1;
3134   HOST_WIDE_INT nh, dummy2;
3135
3136   /* lgup = ceil(log2(divisor)); */
3137   lgup = ceil_log2 (d);
3138
3139   gcc_assert (lgup <= n);
3140
3141   pow = n + lgup;
3142   pow2 = n + lgup - precision;
3143
3144   /* We could handle this with some effort, but this case is much
3145      better handled directly with a scc insn, so rely on caller using
3146      that.  */
3147   gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3148
3149   /* mlow = 2^(N + lgup)/d */
3150  if (pow >= HOST_BITS_PER_WIDE_INT)
3151     {
3152       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3153       nl = 0;
3154     }
3155   else
3156     {
3157       nh = 0;
3158       nl = (unsigned HOST_WIDE_INT) 1 << pow;
3159     }
3160   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3161                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3162
3163   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3164   if (pow2 >= HOST_BITS_PER_WIDE_INT)
3165     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3166   else
3167     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3168   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3169                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3170
3171   gcc_assert (!mhigh_hi || nh - d < d);
3172   gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3173   /* Assert that mlow < mhigh.  */
3174   gcc_assert (mlow_hi < mhigh_hi
3175               || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3176
3177   /* If precision == N, then mlow, mhigh exceed 2^N
3178      (but they do not exceed 2^(N+1)).  */
3179
3180   /* Reduce to lowest terms.  */
3181   for (post_shift = lgup; post_shift > 0; post_shift--)
3182     {
3183       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3184       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3185       if (ml_lo >= mh_lo)
3186         break;
3187
3188       mlow_hi = 0;
3189       mlow_lo = ml_lo;
3190       mhigh_hi = 0;
3191       mhigh_lo = mh_lo;
3192     }
3193
3194   *post_shift_ptr = post_shift;
3195   *lgup_ptr = lgup;
3196   if (n < HOST_BITS_PER_WIDE_INT)
3197     {
3198       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3199       *multiplier_ptr = GEN_INT (mhigh_lo & mask);
3200       return mhigh_lo >= mask;
3201     }
3202   else
3203     {
3204       *multiplier_ptr = GEN_INT (mhigh_lo);
3205       return mhigh_hi;
3206     }
3207 }
3208
3209 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3210    congruent to 1 (mod 2**N).  */
3211
3212 static unsigned HOST_WIDE_INT
3213 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3214 {
3215   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3216
3217   /* The algorithm notes that the choice y = x satisfies
3218      x*y == 1 mod 2^3, since x is assumed odd.
3219      Each iteration doubles the number of bits of significance in y.  */
3220
3221   unsigned HOST_WIDE_INT mask;
3222   unsigned HOST_WIDE_INT y = x;
3223   int nbit = 3;
3224
3225   mask = (n == HOST_BITS_PER_WIDE_INT
3226           ? ~(unsigned HOST_WIDE_INT) 0
3227           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3228
3229   while (nbit < n)
3230     {
3231       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3232       nbit *= 2;
3233     }
3234   return y;
3235 }
3236
3237 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3238    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3239    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3240    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3241    become signed.
3242
3243    The result is put in TARGET if that is convenient.
3244
3245    MODE is the mode of operation.  */
3246
3247 rtx
3248 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3249                              rtx op1, rtx target, int unsignedp)
3250 {
3251   rtx tem;
3252   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3253
3254   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3255                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3256                       NULL_RTX, 0);
3257   tem = expand_and (mode, tem, op1, NULL_RTX);
3258   adj_operand
3259     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3260                      adj_operand);
3261
3262   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3263                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3264                       NULL_RTX, 0);
3265   tem = expand_and (mode, tem, op0, NULL_RTX);
3266   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3267                           target);
3268
3269   return target;
3270 }
3271
3272 /* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */
3273
3274 static rtx
3275 extract_high_half (enum machine_mode mode, rtx op)
3276 {
3277   enum machine_mode wider_mode;
3278
3279   if (mode == word_mode)
3280     return gen_highpart (mode, op);
3281
3282   wider_mode = GET_MODE_WIDER_MODE (mode);
3283   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3284                      build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode)), 0, 1);
3285   return convert_modes (mode, wider_mode, op, 0);
3286 }
3287
3288 /* Like expand_mult_highpart, but only consider using a multiplication
3289    optab.  OP1 is an rtx for the constant operand.  */
3290
3291 static rtx
3292 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3293                             rtx target, int unsignedp, int max_cost)
3294 {
3295   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3296   enum machine_mode wider_mode;
3297   optab moptab;
3298   rtx tem;
3299   int size;
3300
3301   wider_mode = GET_MODE_WIDER_MODE (mode);
3302   size = GET_MODE_BITSIZE (mode);
3303
3304   /* Firstly, try using a multiplication insn that only generates the needed
3305      high part of the product, and in the sign flavor of unsignedp.  */
3306   if (mul_highpart_cost[mode] < max_cost)
3307     {
3308       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3309       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3310                           unsignedp, OPTAB_DIRECT);
3311       if (tem)
3312         return tem;
3313     }
3314
3315   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3316      Need to adjust the result after the multiplication.  */
3317   if (size - 1 < BITS_PER_WORD
3318       && (mul_highpart_cost[mode] + 2 * shift_cost[mode][size-1]
3319           + 4 * add_cost[mode] < max_cost))
3320     {
3321       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3322       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3323                           unsignedp, OPTAB_DIRECT);
3324       if (tem)
3325         /* We used the wrong signedness.  Adjust the result.  */
3326         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3327                                             tem, unsignedp);
3328     }
3329
3330   /* Try widening multiplication.  */
3331   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3332   if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3333       && mul_widen_cost[wider_mode] < max_cost)
3334     {
3335       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3336                           unsignedp, OPTAB_WIDEN);
3337       if (tem)
3338         return extract_high_half (mode, tem);
3339     }
3340
3341   /* Try widening the mode and perform a non-widening multiplication.  */
3342   if (smul_optab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3343       && size - 1 < BITS_PER_WORD
3344       && mul_cost[wider_mode] + shift_cost[mode][size-1] < max_cost)
3345     {
3346       rtx insns, wop0, wop1;
3347
3348       /* We need to widen the operands, for example to ensure the
3349          constant multiplier is correctly sign or zero extended.
3350          Use a sequence to clean-up any instructions emitted by
3351          the conversions if things don't work out.  */
3352       start_sequence ();
3353       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3354       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3355       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3356                           unsignedp, OPTAB_WIDEN);
3357       insns = get_insns ();
3358       end_sequence ();
3359
3360       if (tem)
3361         {
3362           emit_insn (insns);
3363           return extract_high_half (mode, tem);
3364         }
3365     }
3366
3367   /* Try widening multiplication of opposite signedness, and adjust.  */
3368   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3369   if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3370       && size - 1 < BITS_PER_WORD
3371       && (mul_widen_cost[wider_mode] + 2 * shift_cost[mode][size-1]
3372           + 4 * add_cost[mode] < max_cost))
3373     {
3374       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3375                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3376       if (tem != 0)
3377         {
3378           tem = extract_high_half (mode, tem);
3379           /* We used the wrong signedness.  Adjust the result.  */
3380           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3381                                               target, unsignedp);
3382         }
3383     }
3384
3385   return 0;
3386 }
3387
3388 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3389    putting the high half of the result in TARGET if that is convenient,
3390    and return where the result is.  If the operation can not be performed,
3391    0 is returned.
3392
3393    MODE is the mode of operation and result.
3394
3395    UNSIGNEDP nonzero means unsigned multiply.
3396
3397    MAX_COST is the total allowed cost for the expanded RTL.  */
3398
3399 static rtx
3400 expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3401                       rtx target, int unsignedp, int max_cost)
3402 {
3403   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3404   unsigned HOST_WIDE_INT cnst1;
3405   int extra_cost;
3406   bool sign_adjust = false;
3407   enum mult_variant variant;
3408   struct algorithm alg;
3409   rtx tem;
3410
3411   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3412   gcc_assert (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT);
3413
3414   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3415
3416   /* We can't optimize modes wider than BITS_PER_WORD.
3417      ??? We might be able to perform double-word arithmetic if
3418      mode == word_mode, however all the cost calculations in
3419      synth_mult etc. assume single-word operations.  */
3420   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3421     return expand_mult_highpart_optab (mode, op0, op1, target,
3422                                        unsignedp, max_cost);
3423
3424   extra_cost = shift_cost[mode][GET_MODE_BITSIZE (mode) - 1];
3425
3426   /* Check whether we try to multiply by a negative constant.  */
3427   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3428     {
3429       sign_adjust = true;
3430       extra_cost += add_cost[mode];
3431     }
3432
3433   /* See whether shift/add multiplication is cheap enough.  */
3434   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3435                            max_cost - extra_cost))
3436     {
3437       /* See whether the specialized multiplication optabs are
3438          cheaper than the shift/add version.  */
3439       tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3440                                         alg.cost.cost + extra_cost);
3441       if (tem)
3442         return tem;
3443
3444       tem = convert_to_mode (wider_mode, op0, unsignedp);
3445       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3446       tem = extract_high_half (mode, tem);
3447
3448       /* Adjust result for signedness.  */
3449       if (sign_adjust)
3450         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3451
3452       return tem;
3453     }
3454   return expand_mult_highpart_optab (mode, op0, op1, target,
3455                                      unsignedp, max_cost);
3456 }
3457
3458
3459 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3460
3461 static rtx
3462 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3463 {
3464   unsigned HOST_WIDE_INT masklow, maskhigh;
3465   rtx result, temp, shift, label;
3466   int logd;
3467
3468   logd = floor_log2 (d);
3469   result = gen_reg_rtx (mode);
3470
3471   /* Avoid conditional branches when they're expensive.  */
3472   if (BRANCH_COST >= 2
3473       && !optimize_size)
3474     {
3475       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3476                                       mode, 0, -1);
3477       if (signmask)
3478         {
3479           signmask = force_reg (mode, signmask);
3480           masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3481           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3482
3483           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3484              which instruction sequence to use.  If logical right shifts
3485              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3486              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3487
3488           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3489           if (lshr_optab->handlers[mode].insn_code == CODE_FOR_nothing
3490               || rtx_cost (temp, SET) > COSTS_N_INSNS (2))
3491             {
3492               temp = expand_binop (mode, xor_optab, op0, signmask,
3493                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3494               temp = expand_binop (mode, sub_optab, temp, signmask,
3495                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3496               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3497                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3498               temp = expand_binop (mode, xor_optab, temp, signmask,
3499                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3500               temp = expand_binop (mode, sub_optab, temp, signmask,
3501                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3502             }
3503           else
3504             {
3505               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3506                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3507               signmask = force_reg (mode, signmask);
3508
3509               temp = expand_binop (mode, add_optab, op0, signmask,
3510                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3511               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3512                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3513               temp = expand_binop (mode, sub_optab, temp, signmask,
3514                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3515             }
3516           return temp;
3517         }
3518     }
3519
3520   /* Mask contains the mode's signbit and the significant bits of the
3521      modulus.  By including the signbit in the operation, many targets
3522      can avoid an explicit compare operation in the following comparison
3523      against zero.  */
3524
3525   masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3526   if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3527     {
3528       masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3529       maskhigh = -1;
3530     }
3531   else
3532     maskhigh = (HOST_WIDE_INT) -1
3533                  << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3534
3535   temp = expand_binop (mode, and_optab, op0,
3536                        immed_double_const (masklow, maskhigh, mode),
3537                        result, 1, OPTAB_LIB_WIDEN);
3538   if (temp != result)
3539     emit_move_insn (result, temp);
3540
3541   label = gen_label_rtx ();
3542   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3543
3544   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3545                        0, OPTAB_LIB_WIDEN);
3546   masklow = (HOST_WIDE_INT) -1 << logd;
3547   maskhigh = -1;
3548   temp = expand_binop (mode, ior_optab, temp,
3549                        immed_double_const (masklow, maskhigh, mode),
3550                        result, 1, OPTAB_LIB_WIDEN);
3551   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3552                        0, OPTAB_LIB_WIDEN);
3553   if (temp != result)
3554     emit_move_insn (result, temp);
3555   emit_label (label);
3556   return result;
3557 }
3558
3559 /* Expand signed division of OP0 by a power of two D in mode MODE.
3560    This routine is only called for positive values of D.  */
3561
3562 static rtx
3563 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3564 {
3565   rtx temp, label;
3566   tree shift;
3567   int logd;
3568
3569   logd = floor_log2 (d);
3570   shift = build_int_cst (NULL_TREE, logd);
3571
3572   if (d == 2 && BRANCH_COST >= 1)
3573     {
3574       temp = gen_reg_rtx (mode);
3575       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3576       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3577                            0, OPTAB_LIB_WIDEN);
3578       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3579     }
3580
3581 #ifdef HAVE_conditional_move
3582   if (BRANCH_COST >= 2)
3583     {
3584       rtx temp2;
3585
3586       /* ??? emit_conditional_move forces a stack adjustment via
3587          compare_from_rtx so, if the sequence is discarded, it will
3588          be lost.  Do it now instead.  */
3589       do_pending_stack_adjust ();
3590
3591       start_sequence ();
3592       temp2 = copy_to_mode_reg (mode, op0);
3593       temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3594                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3595       temp = force_reg (mode, temp);
3596
3597       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3598       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3599                                      mode, temp, temp2, mode, 0);
3600       if (temp2)
3601         {
3602           rtx seq = get_insns ();
3603           end_sequence ();
3604           emit_insn (seq);
3605           return expand_shift (RSHIFT_EXPR, mode, temp2, shift, NULL_RTX, 0);
3606         }
3607       end_sequence ();
3608     }
3609 #endif
3610
3611   if (BRANCH_COST >= 2)
3612     {
3613       int ushift = GET_MODE_BITSIZE (mode) - logd;
3614
3615       temp = gen_reg_rtx (mode);
3616       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3617       if (shift_cost[mode][ushift] > COSTS_N_INSNS (1))
3618         temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3619                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3620       else
3621         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3622                              build_int_cst (NULL_TREE, ushift),
3623                              NULL_RTX, 1);
3624       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3625                            0, OPTAB_LIB_WIDEN);
3626       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3627     }
3628
3629   label = gen_label_rtx ();
3630   temp = copy_to_mode_reg (mode, op0);
3631   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3632   expand_inc (temp, GEN_INT (d - 1));
3633   emit_label (label);
3634   return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3635 }
3636 \f
3637 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3638    if that is convenient, and returning where the result is.
3639    You may request either the quotient or the remainder as the result;
3640    specify REM_FLAG nonzero to get the remainder.
3641
3642    CODE is the expression code for which kind of division this is;
3643    it controls how rounding is done.  MODE is the machine mode to use.
3644    UNSIGNEDP nonzero means do unsigned division.  */
3645
3646 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3647    and then correct it by or'ing in missing high bits
3648    if result of ANDI is nonzero.
3649    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3650    This could optimize to a bfexts instruction.
3651    But C doesn't use these operations, so their optimizations are
3652    left for later.  */
3653 /* ??? For modulo, we don't actually need the highpart of the first product,
3654    the low part will do nicely.  And for small divisors, the second multiply
3655    can also be a low-part only multiply or even be completely left out.
3656    E.g. to calculate the remainder of a division by 3 with a 32 bit
3657    multiply, multiply with 0x55555556 and extract the upper two bits;
3658    the result is exact for inputs up to 0x1fffffff.
3659    The input range can be reduced by using cross-sum rules.
3660    For odd divisors >= 3, the following table gives right shift counts
3661    so that if a number is shifted by an integer multiple of the given
3662    amount, the remainder stays the same:
3663    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3664    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3665    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3666    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3667    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3668
3669    Cross-sum rules for even numbers can be derived by leaving as many bits
3670    to the right alone as the divisor has zeros to the right.
3671    E.g. if x is an unsigned 32 bit number:
3672    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3673    */
3674
3675 rtx
3676 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3677                rtx op0, rtx op1, rtx target, int unsignedp)
3678 {
3679   enum machine_mode compute_mode;
3680   rtx tquotient;
3681   rtx quotient = 0, remainder = 0;
3682   rtx last;
3683   int size;
3684   rtx insn, set;
3685   optab optab1, optab2;
3686   int op1_is_constant, op1_is_pow2 = 0;
3687   int max_cost, extra_cost;
3688   static HOST_WIDE_INT last_div_const = 0;
3689   static HOST_WIDE_INT ext_op1;
3690
3691   op1_is_constant = GET_CODE (op1) == CONST_INT;
3692   if (op1_is_constant)
3693     {
3694       ext_op1 = INTVAL (op1);
3695       if (unsignedp)
3696         ext_op1 &= GET_MODE_MASK (mode);
3697       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3698                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3699     }
3700
3701   /*
3702      This is the structure of expand_divmod:
3703
3704      First comes code to fix up the operands so we can perform the operations
3705      correctly and efficiently.
3706
3707      Second comes a switch statement with code specific for each rounding mode.
3708      For some special operands this code emits all RTL for the desired
3709      operation, for other cases, it generates only a quotient and stores it in
3710      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3711      to indicate that it has not done anything.
3712
3713      Last comes code that finishes the operation.  If QUOTIENT is set and
3714      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3715      QUOTIENT is not set, it is computed using trunc rounding.
3716
3717      We try to generate special code for division and remainder when OP1 is a
3718      constant.  If |OP1| = 2**n we can use shifts and some other fast
3719      operations.  For other values of OP1, we compute a carefully selected
3720      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3721      by m.
3722
3723      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3724      half of the product.  Different strategies for generating the product are
3725      implemented in expand_mult_highpart.
3726
3727      If what we actually want is the remainder, we generate that by another
3728      by-constant multiplication and a subtraction.  */
3729
3730   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3731      code below will malfunction if we are, so check here and handle
3732      the special case if so.  */
3733   if (op1 == const1_rtx)
3734     return rem_flag ? const0_rtx : op0;
3735
3736     /* When dividing by -1, we could get an overflow.
3737      negv_optab can handle overflows.  */
3738   if (! unsignedp && op1 == constm1_rtx)
3739     {
3740       if (rem_flag)
3741         return const0_rtx;
3742       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3743                           ? negv_optab : neg_optab, op0, target, 0);
3744     }
3745
3746   if (target
3747       /* Don't use the function value register as a target
3748          since we have to read it as well as write it,
3749          and function-inlining gets confused by this.  */
3750       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3751           /* Don't clobber an operand while doing a multi-step calculation.  */
3752           || ((rem_flag || op1_is_constant)
3753               && (reg_mentioned_p (target, op0)
3754                   || (MEM_P (op0) && MEM_P (target))))
3755           || reg_mentioned_p (target, op1)
3756           || (MEM_P (op1) && MEM_P (target))))
3757     target = 0;
3758
3759   /* Get the mode in which to perform this computation.  Normally it will
3760      be MODE, but sometimes we can't do the desired operation in MODE.
3761      If so, pick a wider mode in which we can do the operation.  Convert
3762      to that mode at the start to avoid repeated conversions.
3763
3764      First see what operations we need.  These depend on the expression
3765      we are evaluating.  (We assume that divxx3 insns exist under the
3766      same conditions that modxx3 insns and that these insns don't normally
3767      fail.  If these assumptions are not correct, we may generate less
3768      efficient code in some cases.)
3769
3770      Then see if we find a mode in which we can open-code that operation
3771      (either a division, modulus, or shift).  Finally, check for the smallest
3772      mode for which we can do the operation with a library call.  */
3773
3774   /* We might want to refine this now that we have division-by-constant
3775      optimization.  Since expand_mult_highpart tries so many variants, it is
3776      not straightforward to generalize this.  Maybe we should make an array
3777      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3778
3779   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3780             ? (unsignedp ? lshr_optab : ashr_optab)
3781             : (unsignedp ? udiv_optab : sdiv_optab));
3782   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3783             ? optab1
3784             : (unsignedp ? udivmod_optab : sdivmod_optab));
3785
3786   for (compute_mode = mode; compute_mode != VOIDmode;
3787        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3788     if (optab1->handlers[compute_mode].insn_code != CODE_FOR_nothing
3789         || optab2->handlers[compute_mode].insn_code != CODE_FOR_nothing)
3790       break;
3791
3792   if (compute_mode == VOIDmode)
3793     for (compute_mode = mode; compute_mode != VOIDmode;
3794          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3795       if (optab1->handlers[compute_mode].libfunc
3796           || optab2->handlers[compute_mode].libfunc)
3797         break;
3798
3799   /* If we still couldn't find a mode, use MODE, but we'll probably abort
3800      in expand_binop.  */
3801   if (compute_mode == VOIDmode)
3802     compute_mode = mode;
3803
3804   if (target && GET_MODE (target) == compute_mode)
3805     tquotient = target;
3806   else
3807     tquotient = gen_reg_rtx (compute_mode);
3808
3809   size = GET_MODE_BITSIZE (compute_mode);
3810 #if 0
3811   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3812      (mode), and thereby get better code when OP1 is a constant.  Do that
3813      later.  It will require going over all usages of SIZE below.  */
3814   size = GET_MODE_BITSIZE (mode);
3815 #endif
3816
3817   /* Only deduct something for a REM if the last divide done was
3818      for a different constant.   Then set the constant of the last
3819      divide.  */
3820   max_cost = div_cost[compute_mode]
3821     - (rem_flag && ! (last_div_const != 0 && op1_is_constant
3822                       && INTVAL (op1) == last_div_const)
3823        ? mul_cost[compute_mode] + add_cost[compute_mode]
3824        : 0);
3825
3826   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3827
3828   /* Now convert to the best mode to use.  */
3829   if (compute_mode != mode)
3830     {
3831       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3832       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3833
3834       /* convert_modes may have placed op1 into a register, so we
3835          must recompute the following.  */
3836       op1_is_constant = GET_CODE (op1) == CONST_INT;
3837       op1_is_pow2 = (op1_is_constant
3838                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3839                           || (! unsignedp
3840                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3841     }
3842
3843   /* If one of the operands is a volatile MEM, copy it into a register.  */
3844
3845   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3846     op0 = force_reg (compute_mode, op0);
3847   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3848     op1 = force_reg (compute_mode, op1);
3849
3850   /* If we need the remainder or if OP1 is constant, we need to
3851      put OP0 in a register in case it has any queued subexpressions.  */
3852   if (rem_flag || op1_is_constant)
3853     op0 = force_reg (compute_mode, op0);
3854
3855   last = get_last_insn ();
3856
3857   /* Promote floor rounding to trunc rounding for unsigned operations.  */
3858   if (unsignedp)
3859     {
3860       if (code == FLOOR_DIV_EXPR)
3861         code = TRUNC_DIV_EXPR;
3862       if (code == FLOOR_MOD_EXPR)
3863         code = TRUNC_MOD_EXPR;
3864       if (code == EXACT_DIV_EXPR && op1_is_pow2)
3865         code = TRUNC_DIV_EXPR;
3866     }
3867
3868   if (op1 != const0_rtx)
3869     switch (code)
3870       {
3871       case TRUNC_MOD_EXPR:
3872       case TRUNC_DIV_EXPR:
3873         if (op1_is_constant)
3874           {
3875             if (unsignedp)
3876               {
3877                 unsigned HOST_WIDE_INT mh;
3878                 int pre_shift, post_shift;
3879                 int dummy;
3880                 rtx ml;
3881                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
3882                                             & GET_MODE_MASK (compute_mode));
3883
3884                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3885                   {
3886                     pre_shift = floor_log2 (d);
3887                     if (rem_flag)
3888                       {
3889                         remainder
3890                           = expand_binop (compute_mode, and_optab, op0,
3891                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3892                                           remainder, 1,
3893                                           OPTAB_LIB_WIDEN);
3894                         if (remainder)
3895                           return gen_lowpart (mode, remainder);
3896                       }
3897                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3898                                              build_int_cst (NULL_TREE,
3899                                                             pre_shift),
3900                                              tquotient, 1);
3901                   }
3902                 else if (size <= HOST_BITS_PER_WIDE_INT)
3903                   {
3904                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
3905                       {
3906                         /* Most significant bit of divisor is set; emit an scc
3907                            insn.  */
3908                         quotient = emit_store_flag (tquotient, GEU, op0, op1,
3909                                                     compute_mode, 1, 1);
3910                         if (quotient == 0)
3911                           goto fail1;
3912                       }
3913                     else
3914                       {
3915                         /* Find a suitable multiplier and right shift count
3916                            instead of multiplying with D.  */
3917
3918                         mh = choose_multiplier (d, size, size,
3919                                                 &ml, &post_shift, &dummy);
3920
3921                         /* If the suggested multiplier is more than SIZE bits,
3922                            we can do better for even divisors, using an
3923                            initial right shift.  */
3924                         if (mh != 0 && (d & 1) == 0)
3925                           {
3926                             pre_shift = floor_log2 (d & -d);
3927                             mh = choose_multiplier (d >> pre_shift, size,
3928                                                     size - pre_shift,
3929                                                     &ml, &post_shift, &dummy);
3930                             gcc_assert (!mh);
3931                           }
3932                         else
3933                           pre_shift = 0;
3934
3935                         if (mh != 0)
3936                           {
3937                             rtx t1, t2, t3, t4;
3938
3939                             if (post_shift - 1 >= BITS_PER_WORD)
3940                               goto fail1;
3941
3942                             extra_cost
3943                               = (shift_cost[compute_mode][post_shift - 1]
3944                                  + shift_cost[compute_mode][1]
3945                                  + 2 * add_cost[compute_mode]);
3946                             t1 = expand_mult_highpart (compute_mode, op0, ml,
3947                                                        NULL_RTX, 1,
3948                                                        max_cost - extra_cost);
3949                             if (t1 == 0)
3950                               goto fail1;
3951                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
3952                                                                op0, t1),
3953                                                 NULL_RTX);
3954                             t3 = expand_shift
3955                               (RSHIFT_EXPR, compute_mode, t2,
3956                                build_int_cst (NULL_TREE, 1),
3957                                NULL_RTX,1);
3958                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
3959                                                               t1, t3),
3960                                                 NULL_RTX);
3961                             quotient = expand_shift
3962                               (RSHIFT_EXPR, compute_mode, t4,
3963                                build_int_cst (NULL_TREE, post_shift - 1),
3964                                tquotient, 1);
3965                           }
3966                         else
3967                           {
3968                             rtx t1, t2;
3969
3970                             if (pre_shift >= BITS_PER_WORD
3971                                 || post_shift >= BITS_PER_WORD)
3972                               goto fail1;
3973
3974                             t1 = expand_shift
3975                               (RSHIFT_EXPR, compute_mode, op0,
3976                                build_int_cst (NULL_TREE, pre_shift),
3977                                NULL_RTX, 1);
3978                             extra_cost
3979                               = (shift_cost[compute_mode][pre_shift]
3980                                  + shift_cost[compute_mode][post_shift]);
3981                             t2 = expand_mult_highpart (compute_mode, t1, ml,
3982                                                        NULL_RTX, 1,
3983                                                        max_cost - extra_cost);
3984                             if (t2 == 0)
3985                               goto fail1;
3986                             quotient = expand_shift
3987                               (RSHIFT_EXPR, compute_mode, t2,
3988                                build_int_cst (NULL_TREE, post_shift),
3989                                tquotient, 1);
3990                           }
3991                       }
3992                   }
3993                 else            /* Too wide mode to use tricky code */
3994                   break;
3995
3996                 insn = get_last_insn ();
3997                 if (insn != last
3998                     && (set = single_set (insn)) != 0
3999                     && SET_DEST (set) == quotient)
4000                   set_unique_reg_note (insn,
4001                                        REG_EQUAL,
4002                                        gen_rtx_UDIV (compute_mode, op0, op1));
4003               }
4004             else                /* TRUNC_DIV, signed */
4005               {
4006                 unsigned HOST_WIDE_INT ml;
4007                 int lgup, post_shift;
4008                 rtx mlr;
4009                 HOST_WIDE_INT d = INTVAL (op1);
4010                 unsigned HOST_WIDE_INT abs_d = d >= 0 ? d : -d;
4011
4012                 /* n rem d = n rem -d */
4013                 if (rem_flag && d < 0)
4014                   {
4015                     d = abs_d;
4016                     op1 = gen_int_mode (abs_d, compute_mode);
4017                   }
4018
4019                 if (d == 1)
4020                   quotient = op0;
4021                 else if (d == -1)
4022                   quotient = expand_unop (compute_mode, neg_optab, op0,
4023                                           tquotient, 0);
4024                 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4025                   {
4026                     /* This case is not handled correctly below.  */
4027                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4028                                                 compute_mode, 1, 1);
4029                     if (quotient == 0)
4030                       goto fail1;
4031                   }
4032                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4033                          && (rem_flag ? smod_pow2_cheap[compute_mode]
4034                                       : sdiv_pow2_cheap[compute_mode])
4035                          /* We assume that cheap metric is true if the
4036                             optab has an expander for this mode.  */
4037                          && (((rem_flag ? smod_optab : sdiv_optab)
4038                               ->handlers[compute_mode].insn_code
4039                               != CODE_FOR_nothing)
4040                              || (sdivmod_optab->handlers[compute_mode]
4041                                  .insn_code != CODE_FOR_nothing)))
4042                   ;
4043                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4044                   {
4045                     if (rem_flag)
4046                       {
4047                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4048                         if (remainder)
4049                           return gen_lowpart (mode, remainder);
4050                       }
4051
4052                     if (sdiv_pow2_cheap[compute_mode]
4053                         && ((sdiv_optab->handlers[compute_mode].insn_code
4054                              != CODE_FOR_nothing)
4055                             || (sdivmod_optab->handlers[compute_mode].insn_code
4056                                 != CODE_FOR_nothing)))
4057                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4058                                                 compute_mode, op0,
4059                                                 gen_int_mode (abs_d,
4060                                                               compute_mode),
4061                                                 NULL_RTX, 0);
4062                     else
4063                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4064
4065                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4066                        negate the quotient.  */
4067                     if (d < 0)
4068                       {
4069                         insn = get_last_insn ();
4070                         if (insn != last
4071                             && (set = single_set (insn)) != 0
4072                             && SET_DEST (set) == quotient
4073                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4074                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4075                           set_unique_reg_note (insn,
4076                                                REG_EQUAL,
4077                                                gen_rtx_DIV (compute_mode,
4078                                                             op0,
4079                                                             GEN_INT
4080                                                             (trunc_int_for_mode
4081                                                              (abs_d,
4082                                                               compute_mode))));
4083
4084                         quotient = expand_unop (compute_mode, neg_optab,
4085                                                 quotient, quotient, 0);
4086                       }
4087                   }
4088                 else if (size <= HOST_BITS_PER_WIDE_INT)
4089                   {
4090                     choose_multiplier (abs_d, size, size - 1,
4091                                        &mlr, &post_shift, &lgup);
4092                     ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
4093                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4094                       {
4095                         rtx t1, t2, t3;
4096
4097                         if (post_shift >= BITS_PER_WORD
4098                             || size - 1 >= BITS_PER_WORD)
4099                           goto fail1;
4100
4101                         extra_cost = (shift_cost[compute_mode][post_shift]
4102                                       + shift_cost[compute_mode][size - 1]
4103                                       + add_cost[compute_mode]);
4104                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4105                                                    NULL_RTX, 0,
4106                                                    max_cost - extra_cost);
4107                         if (t1 == 0)
4108                           goto fail1;
4109                         t2 = expand_shift
4110                           (RSHIFT_EXPR, compute_mode, t1,
4111                            build_int_cst (NULL_TREE, post_shift),
4112                            NULL_RTX, 0);
4113                         t3 = expand_shift
4114                           (RSHIFT_EXPR, compute_mode, op0,
4115                            build_int_cst (NULL_TREE, size - 1),
4116                            NULL_RTX, 0);
4117                         if (d < 0)
4118                           quotient
4119                             = force_operand (gen_rtx_MINUS (compute_mode,
4120                                                             t3, t2),
4121                                              tquotient);
4122                         else
4123                           quotient
4124                             = force_operand (gen_rtx_MINUS (compute_mode,
4125                                                             t2, t3),
4126                                              tquotient);
4127                       }
4128                     else
4129                       {
4130                         rtx t1, t2, t3, t4;
4131
4132                         if (post_shift >= BITS_PER_WORD
4133                             || size - 1 >= BITS_PER_WORD)
4134                           goto fail1;
4135
4136                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4137                         mlr = gen_int_mode (ml, compute_mode);
4138                         extra_cost = (shift_cost[compute_mode][post_shift]
4139                                       + shift_cost[compute_mode][size - 1]
4140                                       + 2 * add_cost[compute_mode]);
4141                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4142                                                    NULL_RTX, 0,
4143                                                    max_cost - extra_cost);
4144                         if (t1 == 0)
4145                           goto fail1;
4146                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4147                                                           t1, op0),
4148                                             NULL_RTX);
4149                         t3 = expand_shift
4150                           (RSHIFT_EXPR, compute_mode, t2,
4151                            build_int_cst (NULL_TREE, post_shift),
4152                            NULL_RTX, 0);
4153                         t4 = expand_shift
4154                           (RSHIFT_EXPR, compute_mode, op0,
4155                            build_int_cst (NULL_TREE, size - 1),
4156                            NULL_RTX, 0);
4157                         if (d < 0)
4158                           quotient
4159                             = force_operand (gen_rtx_MINUS (compute_mode,
4160                                                             t4, t3),
4161                                              tquotient);
4162                         else
4163                           quotient
4164                             = force_operand (gen_rtx_MINUS (compute_mode,
4165                                                             t3, t4),
4166                                              tquotient);
4167                       }
4168                   }
4169                 else            /* Too wide mode to use tricky code */
4170                   break;
4171
4172                 insn = get_last_insn ();
4173                 if (insn != last
4174                     && (set = single_set (insn)) != 0
4175                     && SET_DEST (set) == quotient)
4176                   set_unique_reg_note (insn,
4177                                        REG_EQUAL,
4178                                        gen_rtx_DIV (compute_mode, op0, op1));
4179               }
4180             break;
4181           }
4182       fail1:
4183         delete_insns_since (last);
4184         break;
4185
4186       case FLOOR_DIV_EXPR:
4187       case FLOOR_MOD_EXPR:
4188       /* We will come here only for signed operations.  */
4189         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4190           {
4191             unsigned HOST_WIDE_INT mh;
4192             int pre_shift, lgup, post_shift;
4193             HOST_WIDE_INT d = INTVAL (op1);
4194             rtx ml;
4195
4196             if (d > 0)
4197               {
4198                 /* We could just as easily deal with negative constants here,
4199                    but it does not seem worth the trouble for GCC 2.6.  */
4200                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4201                   {
4202                     pre_shift = floor_log2 (d);
4203                     if (rem_flag)
4204                       {
4205                         remainder = expand_binop (compute_mode, and_optab, op0,
4206                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4207                                                   remainder, 0, OPTAB_LIB_WIDEN);
4208                         if (remainder)
4209                           return gen_lowpart (mode, remainder);
4210                       }
4211                     quotient = expand_shift
4212                       (RSHIFT_EXPR, compute_mode, op0,
4213                        build_int_cst (NULL_TREE, pre_shift),
4214                        tquotient, 0);
4215                   }
4216                 else
4217                   {
4218                     rtx t1, t2, t3, t4;
4219
4220                     mh = choose_multiplier (d, size, size - 1,
4221                                             &ml, &post_shift, &lgup);
4222                     gcc_assert (!mh);
4223
4224                     if (post_shift < BITS_PER_WORD
4225                         && size - 1 < BITS_PER_WORD)
4226                       {
4227                         t1 = expand_shift
4228                           (RSHIFT_EXPR, compute_mode, op0,
4229                            build_int_cst (NULL_TREE, size - 1),
4230                            NULL_RTX, 0);
4231                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4232                                            NULL_RTX, 0, OPTAB_WIDEN);
4233                         extra_cost = (shift_cost[compute_mode][post_shift]
4234                                       + shift_cost[compute_mode][size - 1]
4235                                       + 2 * add_cost[compute_mode]);
4236                         t3 = expand_mult_highpart (compute_mode, t2, ml,
4237                                                    NULL_RTX, 1,
4238                                                    max_cost - extra_cost);
4239                         if (t3 != 0)
4240                           {
4241                             t4 = expand_shift
4242                               (RSHIFT_EXPR, compute_mode, t3,
4243                                build_int_cst (NULL_TREE, post_shift),
4244                                NULL_RTX, 1);
4245                             quotient = expand_binop (compute_mode, xor_optab,
4246                                                      t4, t1, tquotient, 0,
4247                                                      OPTAB_WIDEN);
4248                           }
4249                       }
4250                   }
4251               }
4252             else
4253               {
4254                 rtx nsign, t1, t2, t3, t4;
4255                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4256                                                   op0, constm1_rtx), NULL_RTX);
4257                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4258                                    0, OPTAB_WIDEN);
4259                 nsign = expand_shift
4260                   (RSHIFT_EXPR, compute_mode, t2,
4261                    build_int_cst (NULL_TREE, size - 1),
4262                    NULL_RTX, 0);
4263                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4264                                     NULL_RTX);
4265                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4266                                     NULL_RTX, 0);
4267                 if (t4)
4268                   {
4269                     rtx t5;
4270                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4271                                       NULL_RTX, 0);
4272                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4273                                                             t4, t5),
4274                                               tquotient);
4275                   }
4276               }
4277           }
4278
4279         if (quotient != 0)
4280           break;
4281         delete_insns_since (last);
4282
4283         /* Try using an instruction that produces both the quotient and
4284            remainder, using truncation.  We can easily compensate the quotient
4285            or remainder to get floor rounding, once we have the remainder.
4286            Notice that we compute also the final remainder value here,
4287            and return the result right away.  */
4288         if (target == 0 || GET_MODE (target) != compute_mode)
4289           target = gen_reg_rtx (compute_mode);
4290
4291         if (rem_flag)
4292           {
4293             remainder
4294               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4295             quotient = gen_reg_rtx (compute_mode);
4296           }
4297         else
4298           {
4299             quotient
4300               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4301             remainder = gen_reg_rtx (compute_mode);
4302           }
4303
4304         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4305                                  quotient, remainder, 0))
4306           {
4307             /* This could be computed with a branch-less sequence.
4308                Save that for later.  */
4309             rtx tem;
4310             rtx label = gen_label_rtx ();
4311             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4312             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4313                                 NULL_RTX, 0, OPTAB_WIDEN);
4314             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4315             expand_dec (quotient, const1_rtx);
4316             expand_inc (remainder, op1);
4317             emit_label (label);
4318             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4319           }
4320
4321         /* No luck with division elimination or divmod.  Have to do it
4322            by conditionally adjusting op0 *and* the result.  */
4323         {
4324           rtx label1, label2, label3, label4, label5;
4325           rtx adjusted_op0;
4326           rtx tem;
4327
4328           quotient = gen_reg_rtx (compute_mode);
4329           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4330           label1 = gen_label_rtx ();
4331           label2 = gen_label_rtx ();
4332           label3 = gen_label_rtx ();
4333           label4 = gen_label_rtx ();
4334           label5 = gen_label_rtx ();
4335           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4336           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4337           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4338                               quotient, 0, OPTAB_LIB_WIDEN);
4339           if (tem != quotient)
4340             emit_move_insn (quotient, tem);
4341           emit_jump_insn (gen_jump (label5));
4342           emit_barrier ();
4343           emit_label (label1);
4344           expand_inc (adjusted_op0, const1_rtx);
4345           emit_jump_insn (gen_jump (label4));
4346           emit_barrier ();
4347           emit_label (label2);
4348           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4349           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4350                               quotient, 0, OPTAB_LIB_WIDEN);
4351           if (tem != quotient)
4352             emit_move_insn (quotient, tem);
4353           emit_jump_insn (gen_jump (label5));
4354           emit_barrier ();
4355           emit_label (label3);
4356           expand_dec (adjusted_op0, const1_rtx);
4357           emit_label (label4);
4358           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4359                               quotient, 0, OPTAB_LIB_WIDEN);
4360           if (tem != quotient)
4361             emit_move_insn (quotient, tem);
4362           expand_dec (quotient, const1_rtx);
4363           emit_label (label5);
4364         }
4365         break;
4366
4367       case CEIL_DIV_EXPR:
4368       case CEIL_MOD_EXPR:
4369         if (unsignedp)
4370           {
4371             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4372               {
4373                 rtx t1, t2, t3;
4374                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4375                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4376                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4377                                    tquotient, 1);
4378                 t2 = expand_binop (compute_mode, and_optab, op0,
4379                                    GEN_INT (d - 1),
4380                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4381                 t3 = gen_reg_rtx (compute_mode);
4382                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4383                                       compute_mode, 1, 1);
4384                 if (t3 == 0)
4385                   {
4386                     rtx lab;
4387                     lab = gen_label_rtx ();
4388                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4389                     expand_inc (t1, const1_rtx);
4390                     emit_label (lab);
4391                     quotient = t1;
4392                   }
4393                 else
4394                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4395                                                           t1, t3),
4396                                             tquotient);
4397                 break;
4398               }
4399
4400             /* Try using an instruction that produces both the quotient and
4401                remainder, using truncation.  We can easily compensate the
4402                quotient or remainder to get ceiling rounding, once we have the
4403                remainder.  Notice that we compute also the final remainder
4404                value here, and return the result right away.  */
4405             if (target == 0 || GET_MODE (target) != compute_mode)
4406               target = gen_reg_rtx (compute_mode);
4407
4408             if (rem_flag)
4409               {
4410                 remainder = (REG_P (target)
4411                              ? target : gen_reg_rtx (compute_mode));
4412                 quotient = gen_reg_rtx (compute_mode);
4413               }
4414             else
4415               {
4416                 quotient = (REG_P (target)
4417                             ? target : gen_reg_rtx (compute_mode));
4418                 remainder = gen_reg_rtx (compute_mode);
4419               }
4420
4421             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4422                                      remainder, 1))
4423               {
4424                 /* This could be computed with a branch-less sequence.
4425                    Save that for later.  */
4426                 rtx label = gen_label_rtx ();
4427                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4428                                  compute_mode, label);
4429                 expand_inc (quotient, const1_rtx);
4430                 expand_dec (remainder, op1);
4431                 emit_label (label);
4432                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4433               }
4434
4435             /* No luck with division elimination or divmod.  Have to do it
4436                by conditionally adjusting op0 *and* the result.  */
4437             {
4438               rtx label1, label2;
4439               rtx adjusted_op0, tem;
4440
4441               quotient = gen_reg_rtx (compute_mode);
4442               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4443               label1 = gen_label_rtx ();
4444               label2 = gen_label_rtx ();
4445               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4446                                compute_mode, label1);
4447               emit_move_insn  (quotient, const0_rtx);
4448               emit_jump_insn (gen_jump (label2));
4449               emit_barrier ();
4450               emit_label (label1);
4451               expand_dec (adjusted_op0, const1_rtx);
4452               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4453                                   quotient, 1, OPTAB_LIB_WIDEN);
4454               if (tem != quotient)
4455                 emit_move_insn (quotient, tem);
4456               expand_inc (quotient, const1_rtx);
4457               emit_label (label2);
4458             }
4459           }
4460         else /* signed */
4461           {
4462             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4463                 && INTVAL (op1) >= 0)
4464               {
4465                 /* This is extremely similar to the code for the unsigned case
4466                    above.  For 2.7 we should merge these variants, but for
4467                    2.6.1 I don't want to touch the code for unsigned since that
4468                    get used in C.  The signed case will only be used by other
4469                    languages (Ada).  */
4470
4471                 rtx t1, t2, t3;
4472                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4473                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4474                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4475                                    tquotient, 0);
4476                 t2 = expand_binop (compute_mode, and_optab, op0,
4477                                    GEN_INT (d - 1),
4478                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4479                 t3 = gen_reg_rtx (compute_mode);
4480                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4481                                       compute_mode, 1, 1);
4482                 if (t3 == 0)
4483                   {
4484                     rtx lab;
4485                     lab = gen_label_rtx ();
4486                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4487                     expand_inc (t1, const1_rtx);
4488                     emit_label (lab);
4489                     quotient = t1;
4490                   }
4491                 else
4492                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4493                                                           t1, t3),
4494                                             tquotient);
4495                 break;
4496               }
4497
4498             /* Try using an instruction that produces both the quotient and
4499                remainder, using truncation.  We can easily compensate the
4500                quotient or remainder to get ceiling rounding, once we have the
4501                remainder.  Notice that we compute also the final remainder
4502                value here, and return the result right away.  */
4503             if (target == 0 || GET_MODE (target) != compute_mode)
4504               target = gen_reg_rtx (compute_mode);
4505             if (rem_flag)
4506               {
4507                 remainder= (REG_P (target)
4508                             ? target : gen_reg_rtx (compute_mode));
4509                 quotient = gen_reg_rtx (compute_mode);
4510               }
4511             else
4512               {
4513                 quotient = (REG_P (target)
4514                             ? target : gen_reg_rtx (compute_mode));
4515                 remainder = gen_reg_rtx (compute_mode);
4516               }
4517
4518             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4519                                      remainder, 0))
4520               {
4521                 /* This could be computed with a branch-less sequence.
4522                    Save that for later.  */
4523                 rtx tem;
4524                 rtx label = gen_label_rtx ();
4525                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4526                                  compute_mode, label);
4527                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4528                                     NULL_RTX, 0, OPTAB_WIDEN);
4529                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4530                 expand_inc (quotient, const1_rtx);
4531                 expand_dec (remainder, op1);
4532                 emit_label (label);
4533                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4534               }
4535
4536             /* No luck with division elimination or divmod.  Have to do it
4537                by conditionally adjusting op0 *and* the result.  */
4538             {
4539               rtx label1, label2, label3, label4, label5;
4540               rtx adjusted_op0;
4541               rtx tem;
4542
4543               quotient = gen_reg_rtx (compute_mode);
4544               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4545               label1 = gen_label_rtx ();
4546               label2 = gen_label_rtx ();
4547               label3 = gen_label_rtx ();
4548               label4 = gen_label_rtx ();
4549               label5 = gen_label_rtx ();
4550               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4551               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4552                                compute_mode, label1);
4553               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4554                                   quotient, 0, OPTAB_LIB_WIDEN);
4555               if (tem != quotient)
4556                 emit_move_insn (quotient, tem);
4557               emit_jump_insn (gen_jump (label5));
4558               emit_barrier ();
4559               emit_label (label1);
4560               expand_dec (adjusted_op0, const1_rtx);
4561               emit_jump_insn (gen_jump (label4));
4562               emit_barrier ();
4563               emit_label (label2);
4564               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4565                                compute_mode, label3);
4566               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4567                                   quotient, 0, OPTAB_LIB_WIDEN);
4568               if (tem != quotient)
4569                 emit_move_insn (quotient, tem);
4570               emit_jump_insn (gen_jump (label5));
4571               emit_barrier ();
4572               emit_label (label3);
4573               expand_inc (adjusted_op0, const1_rtx);
4574               emit_label (label4);
4575               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4576                                   quotient, 0, OPTAB_LIB_WIDEN);
4577               if (tem != quotient)
4578                 emit_move_insn (quotient, tem);
4579               expand_inc (quotient, const1_rtx);
4580               emit_label (label5);
4581             }
4582           }
4583         break;
4584
4585       case EXACT_DIV_EXPR:
4586         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4587           {
4588             HOST_WIDE_INT d = INTVAL (op1);
4589             unsigned HOST_WIDE_INT ml;
4590             int pre_shift;
4591             rtx t1;
4592
4593             pre_shift = floor_log2 (d & -d);
4594             ml = invert_mod2n (d >> pre_shift, size);
4595             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4596                                build_int_cst (NULL_TREE, pre_shift),
4597                                NULL_RTX, unsignedp);
4598             quotient = expand_mult (compute_mode, t1,
4599                                     gen_int_mode (ml, compute_mode),
4600                                     NULL_RTX, 1);
4601
4602             insn = get_last_insn ();
4603             set_unique_reg_note (insn,
4604                                  REG_EQUAL,
4605                                  gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4606                                                  compute_mode,
4607                                                  op0, op1));
4608           }
4609         break;
4610
4611       case ROUND_DIV_EXPR:
4612       case ROUND_MOD_EXPR:
4613         if (unsignedp)
4614           {
4615             rtx tem;
4616             rtx label;
4617             label = gen_label_rtx ();
4618             quotient = gen_reg_rtx (compute_mode);
4619             remainder = gen_reg_rtx (compute_mode);
4620             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4621               {
4622                 rtx tem;
4623                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4624                                          quotient, 1, OPTAB_LIB_WIDEN);
4625                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4626                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4627                                           remainder, 1, OPTAB_LIB_WIDEN);
4628               }
4629             tem = plus_constant (op1, -1);
4630             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4631                                 build_int_cst (NULL_TREE, 1),
4632                                 NULL_RTX, 1);
4633             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4634             expand_inc (quotient, const1_rtx);
4635             expand_dec (remainder, op1);
4636             emit_label (label);
4637           }
4638         else
4639           {
4640             rtx abs_rem, abs_op1, tem, mask;
4641             rtx label;
4642             label = gen_label_rtx ();
4643             quotient = gen_reg_rtx (compute_mode);
4644             remainder = gen_reg_rtx (compute_mode);
4645             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4646               {
4647                 rtx tem;
4648                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4649                                          quotient, 0, OPTAB_LIB_WIDEN);
4650                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4651                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4652                                           remainder, 0, OPTAB_LIB_WIDEN);
4653               }
4654             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4655             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4656             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4657                                 build_int_cst (NULL_TREE, 1),
4658                                 NULL_RTX, 1);
4659             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4660             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4661                                 NULL_RTX, 0, OPTAB_WIDEN);
4662             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4663                                  build_int_cst (NULL_TREE, size - 1),
4664                                  NULL_RTX, 0);
4665             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4666                                 NULL_RTX, 0, OPTAB_WIDEN);
4667             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4668                                 NULL_RTX, 0, OPTAB_WIDEN);
4669             expand_inc (quotient, tem);
4670             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4671                                 NULL_RTX, 0, OPTAB_WIDEN);
4672             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4673                                 NULL_RTX, 0, OPTAB_WIDEN);
4674             expand_dec (remainder, tem);
4675             emit_label (label);
4676           }
4677         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4678
4679       default:
4680         gcc_unreachable ();
4681       }
4682
4683   if (quotient == 0)
4684     {
4685       if (target && GET_MODE (target) != compute_mode)
4686         target = 0;
4687
4688       if (rem_flag)
4689         {
4690           /* Try to produce the remainder without producing the quotient.
4691              If we seem to have a divmod pattern that does not require widening,
4692              don't try widening here.  We should really have a WIDEN argument
4693              to expand_twoval_binop, since what we'd really like to do here is
4694              1) try a mod insn in compute_mode
4695              2) try a divmod insn in compute_mode
4696              3) try a div insn in compute_mode and multiply-subtract to get
4697                 remainder
4698              4) try the same things with widening allowed.  */
4699           remainder
4700             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4701                                  op0, op1, target,
4702                                  unsignedp,
4703                                  ((optab2->handlers[compute_mode].insn_code
4704                                    != CODE_FOR_nothing)
4705                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4706           if (remainder == 0)
4707             {
4708               /* No luck there.  Can we do remainder and divide at once
4709                  without a library call?  */
4710               remainder = gen_reg_rtx (compute_mode);
4711               if (! expand_twoval_binop ((unsignedp
4712                                           ? udivmod_optab
4713                                           : sdivmod_optab),
4714                                          op0, op1,
4715                                          NULL_RTX, remainder, unsignedp))
4716                 remainder = 0;
4717             }
4718
4719           if (remainder)
4720             return gen_lowpart (mode, remainder);
4721         }
4722
4723       /* Produce the quotient.  Try a quotient insn, but not a library call.
4724          If we have a divmod in this mode, use it in preference to widening
4725          the div (for this test we assume it will not fail). Note that optab2
4726          is set to the one of the two optabs that the call below will use.  */
4727       quotient
4728         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4729                              op0, op1, rem_flag ? NULL_RTX : target,
4730                              unsignedp,
4731                              ((optab2->handlers[compute_mode].insn_code
4732                                != CODE_FOR_nothing)
4733                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4734
4735       if (quotient == 0)
4736         {
4737           /* No luck there.  Try a quotient-and-remainder insn,
4738              keeping the quotient alone.  */
4739           quotient = gen_reg_rtx (compute_mode);
4740           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4741                                      op0, op1,
4742                                      quotient, NULL_RTX, unsignedp))
4743             {
4744               quotient = 0;
4745               if (! rem_flag)
4746                 /* Still no luck.  If we are not computing the remainder,
4747                    use a library call for the quotient.  */
4748                 quotient = sign_expand_binop (compute_mode,
4749                                               udiv_optab, sdiv_optab,
4750                                               op0, op1, target,
4751                                               unsignedp, OPTAB_LIB_WIDEN);
4752             }
4753         }
4754     }
4755
4756   if (rem_flag)
4757     {
4758       if (target && GET_MODE (target) != compute_mode)
4759         target = 0;
4760
4761       if (quotient == 0)
4762         {
4763           /* No divide instruction either.  Use library for remainder.  */
4764           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4765                                          op0, op1, target,
4766                                          unsignedp, OPTAB_LIB_WIDEN);
4767           /* No remainder function.  Try a quotient-and-remainder
4768              function, keeping the remainder.  */
4769           if (!remainder)
4770             {
4771               remainder = gen_reg_rtx (compute_mode);
4772               if (!expand_twoval_binop_libfunc
4773                   (unsignedp ? udivmod_optab : sdivmod_optab,
4774                    op0, op1,
4775                    NULL_RTX, remainder,
4776                    unsignedp ? UMOD : MOD))
4777                 remainder = NULL_RTX;
4778             }
4779         }
4780       else
4781         {
4782           /* We divided.  Now finish doing X - Y * (X / Y).  */
4783           remainder = expand_mult (compute_mode, quotient, op1,
4784                                    NULL_RTX, unsignedp);
4785           remainder = expand_binop (compute_mode, sub_optab, op0,
4786                                     remainder, target, unsignedp,
4787                                     OPTAB_LIB_WIDEN);
4788         }
4789     }
4790
4791   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4792 }
4793 \f
4794 /* Return a tree node with data type TYPE, describing the value of X.
4795    Usually this is an VAR_DECL, if there is no obvious better choice.
4796    X may be an expression, however we only support those expressions
4797    generated by loop.c.  */
4798
4799 tree
4800 make_tree (tree type, rtx x)
4801 {
4802   tree t;
4803
4804   switch (GET_CODE (x))
4805     {
4806     case CONST_INT:
4807       {
4808         HOST_WIDE_INT hi = 0;
4809
4810         if (INTVAL (x) < 0
4811             && !(TYPE_UNSIGNED (type)
4812                  && (GET_MODE_BITSIZE (TYPE_MODE (type))
4813                      < HOST_BITS_PER_WIDE_INT)))
4814           hi = -1;
4815
4816         t = build_int_cst_wide (type, INTVAL (x), hi);
4817
4818         return t;
4819       }
4820
4821     case CONST_DOUBLE:
4822       if (GET_MODE (x) == VOIDmode)
4823         t = build_int_cst_wide (type,
4824                                 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4825       else
4826         {
4827           REAL_VALUE_TYPE d;
4828
4829           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4830           t = build_real (type, d);
4831         }
4832
4833       return t;
4834
4835     case CONST_VECTOR:
4836       {
4837         int i, units;
4838         rtx elt;
4839         tree t = NULL_TREE;
4840
4841         units = CONST_VECTOR_NUNITS (x);
4842
4843         /* Build a tree with vector elements.  */
4844         for (i = units - 1; i >= 0; --i)
4845           {
4846             elt = CONST_VECTOR_ELT (x, i);
4847             t = tree_cons (NULL_TREE, make_tree (type, elt), t);
4848           }
4849
4850         return build_vector (type, t);
4851       }
4852
4853     case PLUS:
4854       return fold (build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4855                            make_tree (type, XEXP (x, 1))));
4856
4857     case MINUS:
4858       return fold (build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4859                            make_tree (type, XEXP (x, 1))));
4860
4861     case NEG:
4862       return fold (build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0))));
4863
4864     case MULT:
4865       return fold (build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4866                            make_tree (type, XEXP (x, 1))));
4867
4868     case ASHIFT:
4869       return fold (build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4870                            make_tree (type, XEXP (x, 1))));
4871
4872     case LSHIFTRT:
4873       t = lang_hooks.types.unsigned_type (type);
4874       return fold (convert (type,
4875                             build2 (RSHIFT_EXPR, t,
4876                                     make_tree (t, XEXP (x, 0)),
4877                                     make_tree (type, XEXP (x, 1)))));
4878
4879     case ASHIFTRT:
4880       t = lang_hooks.types.signed_type (type);
4881       return fold (convert (type,
4882                             build2 (RSHIFT_EXPR, t,
4883                                     make_tree (t, XEXP (x, 0)),
4884                                     make_tree (type, XEXP (x, 1)))));
4885
4886     case DIV:
4887       if (TREE_CODE (type) != REAL_TYPE)
4888         t = lang_hooks.types.signed_type (type);
4889       else
4890         t = type;
4891
4892       return fold (convert (type,
4893                             build2 (TRUNC_DIV_EXPR, t,
4894                                     make_tree (t, XEXP (x, 0)),
4895                                     make_tree (t, XEXP (x, 1)))));
4896     case UDIV:
4897       t = lang_hooks.types.unsigned_type (type);
4898       return fold (convert (type,
4899                             build2 (TRUNC_DIV_EXPR, t,
4900                                     make_tree (t, XEXP (x, 0)),
4901                                     make_tree (t, XEXP (x, 1)))));
4902
4903     case SIGN_EXTEND:
4904     case ZERO_EXTEND:
4905       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
4906                                           GET_CODE (x) == ZERO_EXTEND);
4907       return fold (convert (type, make_tree (t, XEXP (x, 0))));
4908
4909     default:
4910       t = build_decl (VAR_DECL, NULL_TREE, type);
4911
4912       /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
4913          ptr_mode.  So convert.  */
4914       if (POINTER_TYPE_P (type))
4915         x = convert_memory_address (TYPE_MODE (type), x);
4916
4917       /* Note that we do *not* use SET_DECL_RTL here, because we do not
4918          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
4919       t->decl.rtl = x;
4920
4921       return t;
4922     }
4923 }
4924
4925 /* Check whether the multiplication X * MULT + ADD overflows.
4926    X, MULT and ADD must be CONST_*.
4927    MODE is the machine mode for the computation.
4928    X and MULT must have mode MODE.  ADD may have a different mode.
4929    So can X (defaults to same as MODE).
4930    UNSIGNEDP is nonzero to do unsigned multiplication.  */
4931
4932 bool
4933 const_mult_add_overflow_p (rtx x, rtx mult, rtx add,
4934                            enum machine_mode mode, int unsignedp)
4935 {
4936   tree type, mult_type, add_type, result;
4937
4938   type = lang_hooks.types.type_for_mode (mode, unsignedp);
4939
4940   /* In order to get a proper overflow indication from an unsigned
4941      type, we have to pretend that it's a sizetype.  */
4942   mult_type = type;
4943   if (unsignedp)
4944     {
4945       /* FIXME:It would be nice if we could step directly from this
4946          type to its sizetype equivalent.  */
4947       mult_type = build_distinct_type_copy (type);
4948       TYPE_IS_SIZETYPE (mult_type) = 1;
4949     }
4950
4951   add_type = (GET_MODE (add) == VOIDmode ? mult_type
4952               : lang_hooks.types.type_for_mode (GET_MODE (add), unsignedp));
4953
4954   result = fold (build2 (PLUS_EXPR, mult_type,
4955                          fold (build2 (MULT_EXPR, mult_type,
4956                                        make_tree (mult_type, x),
4957                                        make_tree (mult_type, mult))),
4958                          make_tree (add_type, add)));
4959
4960   return TREE_CONSTANT_OVERFLOW (result);
4961 }
4962
4963 /* Return an rtx representing the value of X * MULT + ADD.
4964    TARGET is a suggestion for where to store the result (an rtx).
4965    MODE is the machine mode for the computation.
4966    X and MULT must have mode MODE.  ADD may have a different mode.
4967    So can X (defaults to same as MODE).
4968    UNSIGNEDP is nonzero to do unsigned multiplication.
4969    This may emit insns.  */
4970
4971 rtx
4972 expand_mult_add (rtx x, rtx target, rtx mult, rtx add, enum machine_mode mode,
4973                  int unsignedp)
4974 {
4975   tree type = lang_hooks.types.type_for_mode (mode, unsignedp);
4976   tree add_type = (GET_MODE (add) == VOIDmode
4977                    ? type: lang_hooks.types.type_for_mode (GET_MODE (add),
4978                                                            unsignedp));
4979   tree result =  fold (build2 (PLUS_EXPR, type,
4980                                fold (build2 (MULT_EXPR, type,
4981                                              make_tree (type, x),
4982                                              make_tree (type, mult))),
4983                                make_tree (add_type, add)));
4984
4985   return expand_expr (result, target, VOIDmode, 0);
4986 }
4987 \f
4988 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
4989    and returning TARGET.
4990
4991    If TARGET is 0, a pseudo-register or constant is returned.  */
4992
4993 rtx
4994 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
4995 {
4996   rtx tem = 0;
4997
4998   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
4999     tem = simplify_binary_operation (AND, mode, op0, op1);
5000   if (tem == 0)
5001     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5002
5003   if (target == 0)
5004     target = tem;
5005   else if (tem != target)
5006     emit_move_insn (target, tem);
5007   return target;
5008 }
5009 \f
5010 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5011    and storing in TARGET.  Normally return TARGET.
5012    Return 0 if that cannot be done.
5013
5014    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5015    it is VOIDmode, they cannot both be CONST_INT.
5016
5017    UNSIGNEDP is for the case where we have to widen the operands
5018    to perform the operation.  It says to use zero-extension.
5019
5020    NORMALIZEP is 1 if we should convert the result to be either zero
5021    or one.  Normalize is -1 if we should convert the result to be
5022    either zero or -1.  If NORMALIZEP is zero, the result will be left
5023    "raw" out of the scc insn.  */
5024
5025 rtx
5026 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5027                  enum machine_mode mode, int unsignedp, int normalizep)
5028 {
5029   rtx subtarget;
5030   enum insn_code icode;
5031   enum machine_mode compare_mode;
5032   enum machine_mode target_mode = GET_MODE (target);
5033   rtx tem;
5034   rtx last = get_last_insn ();
5035   rtx pattern, comparison;
5036
5037   if (unsignedp)
5038     code = unsigned_condition (code);
5039
5040   /* If one operand is constant, make it the second one.  Only do this
5041      if the other operand is not constant as well.  */
5042
5043   if (swap_commutative_operands_p (op0, op1))
5044     {
5045       tem = op0;
5046       op0 = op1;
5047       op1 = tem;
5048       code = swap_condition (code);
5049     }
5050
5051   if (mode == VOIDmode)
5052     mode = GET_MODE (op0);
5053
5054   /* For some comparisons with 1 and -1, we can convert this to
5055      comparisons with zero.  This will often produce more opportunities for
5056      store-flag insns.  */
5057
5058   switch (code)
5059     {
5060     case LT:
5061       if (op1 == const1_rtx)
5062         op1 = const0_rtx, code = LE;
5063       break;
5064     case LE:
5065       if (op1 == constm1_rtx)
5066         op1 = const0_rtx, code = LT;
5067       break;
5068     case GE:
5069       if (op1 == const1_rtx)
5070         op1 = const0_rtx, code = GT;
5071       break;
5072     case GT:
5073       if (op1 == constm1_rtx)
5074         op1 = const0_rtx, code = GE;
5075       break;
5076     case GEU:
5077       if (op1 == const1_rtx)
5078         op1 = const0_rtx, code = NE;
5079       break;
5080     case LTU:
5081       if (op1 == const1_rtx)
5082         op1 = const0_rtx, code = EQ;
5083       break;
5084     default:
5085       break;
5086     }
5087
5088   /* If we are comparing a double-word integer with zero or -1, we can
5089      convert the comparison into one involving a single word.  */
5090   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5091       && GET_MODE_CLASS (mode) == MODE_INT
5092       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5093     {
5094       if ((code == EQ || code == NE)
5095           && (op1 == const0_rtx || op1 == constm1_rtx))
5096         {
5097           rtx op00, op01, op0both;
5098
5099           /* Do a logical OR or AND of the two words and compare the result.  */
5100           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5101           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5102           op0both = expand_binop (word_mode,
5103                                   op1 == const0_rtx ? ior_optab : and_optab,
5104                                   op00, op01, NULL_RTX, unsignedp, OPTAB_DIRECT);
5105
5106           if (op0both != 0)
5107             return emit_store_flag (target, code, op0both, op1, word_mode,
5108                                     unsignedp, normalizep);
5109         }
5110       else if ((code == LT || code == GE) && op1 == const0_rtx)
5111         {
5112           rtx op0h;
5113
5114           /* If testing the sign bit, can just test on high word.  */
5115           op0h = simplify_gen_subreg (word_mode, op0, mode,
5116                                       subreg_highpart_offset (word_mode, mode));
5117           return emit_store_flag (target, code, op0h, op1, word_mode,
5118                                   unsignedp, normalizep);
5119         }
5120     }
5121
5122   /* From now on, we won't change CODE, so set ICODE now.  */
5123   icode = setcc_gen_code[(int) code];
5124
5125   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5126      complement of A (for GE) and shifting the sign bit to the low bit.  */
5127   if (op1 == const0_rtx && (code == LT || code == GE)
5128       && GET_MODE_CLASS (mode) == MODE_INT
5129       && (normalizep || STORE_FLAG_VALUE == 1
5130           || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5131               && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5132                   == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
5133     {
5134       subtarget = target;
5135
5136       /* If the result is to be wider than OP0, it is best to convert it
5137          first.  If it is to be narrower, it is *incorrect* to convert it
5138          first.  */
5139       if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5140         {
5141           op0 = convert_modes (target_mode, mode, op0, 0);
5142           mode = target_mode;
5143         }
5144
5145       if (target_mode != mode)
5146         subtarget = 0;
5147
5148       if (code == GE)
5149         op0 = expand_unop (mode, one_cmpl_optab, op0,
5150                            ((STORE_FLAG_VALUE == 1 || normalizep)
5151                             ? 0 : subtarget), 0);
5152
5153       if (STORE_FLAG_VALUE == 1 || normalizep)
5154         /* If we are supposed to produce a 0/1 value, we want to do
5155            a logical shift from the sign bit to the low-order bit; for
5156            a -1/0 value, we do an arithmetic shift.  */
5157         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5158                             size_int (GET_MODE_BITSIZE (mode) - 1),
5159                             subtarget, normalizep != -1);
5160
5161       if (mode != target_mode)
5162         op0 = convert_modes (target_mode, mode, op0, 0);
5163
5164       return op0;
5165     }
5166
5167   if (icode != CODE_FOR_nothing)
5168     {
5169       insn_operand_predicate_fn pred;
5170
5171       /* We think we may be able to do this with a scc insn.  Emit the
5172          comparison and then the scc insn.  */
5173
5174       do_pending_stack_adjust ();
5175       last = get_last_insn ();
5176
5177       comparison
5178         = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX);
5179       if (CONSTANT_P (comparison))
5180         {
5181           switch (GET_CODE (comparison))
5182             {
5183             case CONST_INT:
5184               if (comparison == const0_rtx)
5185                 return const0_rtx;
5186               break;
5187
5188 #ifdef FLOAT_STORE_FLAG_VALUE
5189             case CONST_DOUBLE:
5190               if (comparison == CONST0_RTX (GET_MODE (comparison)))
5191                 return const0_rtx;
5192               break;
5193 #endif
5194             default:
5195               gcc_unreachable ();
5196             }
5197
5198           if (normalizep == 1)
5199             return const1_rtx;
5200           if (normalizep == -1)
5201             return constm1_rtx;
5202           return const_true_rtx;
5203         }
5204
5205       /* The code of COMPARISON may not match CODE if compare_from_rtx
5206          decided to swap its operands and reverse the original code.
5207
5208          We know that compare_from_rtx returns either a CONST_INT or
5209          a new comparison code, so it is safe to just extract the
5210          code from COMPARISON.  */
5211       code = GET_CODE (comparison);
5212
5213       /* Get a reference to the target in the proper mode for this insn.  */
5214       compare_mode = insn_data[(int) icode].operand[0].mode;
5215       subtarget = target;
5216       pred = insn_data[(int) icode].operand[0].predicate;
5217       if (optimize || ! (*pred) (subtarget, compare_mode))
5218         subtarget = gen_reg_rtx (compare_mode);
5219
5220       pattern = GEN_FCN (icode) (subtarget);
5221       if (pattern)
5222         {
5223           emit_insn (pattern);
5224
5225           /* If we are converting to a wider mode, first convert to
5226              TARGET_MODE, then normalize.  This produces better combining
5227              opportunities on machines that have a SIGN_EXTRACT when we are
5228              testing a single bit.  This mostly benefits the 68k.
5229
5230              If STORE_FLAG_VALUE does not have the sign bit set when
5231              interpreted in COMPARE_MODE, we can do this conversion as
5232              unsigned, which is usually more efficient.  */
5233           if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (compare_mode))
5234             {
5235               convert_move (target, subtarget,
5236                             (GET_MODE_BITSIZE (compare_mode)
5237                              <= HOST_BITS_PER_WIDE_INT)
5238                             && 0 == (STORE_FLAG_VALUE
5239                                      & ((HOST_WIDE_INT) 1
5240                                         << (GET_MODE_BITSIZE (compare_mode) -1))));
5241               op0 = target;
5242               compare_mode = target_mode;
5243             }
5244           else
5245             op0 = subtarget;
5246
5247           /* If we want to keep subexpressions around, don't reuse our
5248              last target.  */
5249
5250           if (optimize)
5251             subtarget = 0;
5252
5253           /* Now normalize to the proper value in COMPARE_MODE.  Sometimes
5254              we don't have to do anything.  */
5255           if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5256             ;
5257           /* STORE_FLAG_VALUE might be the most negative number, so write
5258              the comparison this way to avoid a compiler-time warning.  */
5259           else if (- normalizep == STORE_FLAG_VALUE)
5260             op0 = expand_unop (compare_mode, neg_optab, op0, subtarget, 0);
5261
5262           /* We don't want to use STORE_FLAG_VALUE < 0 below since this
5263              makes it hard to use a value of just the sign bit due to
5264              ANSI integer constant typing rules.  */
5265           else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_WIDE_INT
5266                    && (STORE_FLAG_VALUE
5267                        & ((HOST_WIDE_INT) 1
5268                           << (GET_MODE_BITSIZE (compare_mode) - 1))))
5269             op0 = expand_shift (RSHIFT_EXPR, compare_mode, op0,
5270                                 size_int (GET_MODE_BITSIZE (compare_mode) - 1),
5271                                 subtarget, normalizep == 1);
5272           else
5273             {
5274               gcc_assert (STORE_FLAG_VALUE & 1);
5275
5276               op0 = expand_and (compare_mode, op0, const1_rtx, subtarget);
5277               if (normalizep == -1)
5278                 op0 = expand_unop (compare_mode, neg_optab, op0, op0, 0);
5279             }
5280
5281           /* If we were converting to a smaller mode, do the
5282              conversion now.  */
5283           if (target_mode != compare_mode)
5284             {
5285               convert_move (target, op0, 0);
5286               return target;
5287             }
5288           else
5289             return op0;
5290         }
5291     }
5292
5293   delete_insns_since (last);
5294
5295   /* If optimizing, use different pseudo registers for each insn, instead
5296      of reusing the same pseudo.  This leads to better CSE, but slows
5297      down the compiler, since there are more pseudos */
5298   subtarget = (!optimize
5299                && (target_mode == mode)) ? target : NULL_RTX;
5300
5301   /* If we reached here, we can't do this with a scc insn.  However, there
5302      are some comparisons that can be done directly.  For example, if
5303      this is an equality comparison of integers, we can try to exclusive-or
5304      (or subtract) the two operands and use a recursive call to try the
5305      comparison with zero.  Don't do any of these cases if branches are
5306      very cheap.  */
5307
5308   if (BRANCH_COST > 0
5309       && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
5310       && op1 != const0_rtx)
5311     {
5312       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5313                           OPTAB_WIDEN);
5314
5315       if (tem == 0)
5316         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5317                             OPTAB_WIDEN);
5318       if (tem != 0)
5319         tem = emit_store_flag (target, code, tem, const0_rtx,
5320                                mode, unsignedp, normalizep);
5321       if (tem == 0)
5322         delete_insns_since (last);
5323       return tem;
5324     }
5325
5326   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5327      the constant zero.  Reject all other comparisons at this point.  Only
5328      do LE and GT if branches are expensive since they are expensive on
5329      2-operand machines.  */
5330
5331   if (BRANCH_COST == 0
5332       || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
5333       || (code != EQ && code != NE
5334           && (BRANCH_COST <= 1 || (code != LE && code != GT))))
5335     return 0;
5336
5337   /* See what we need to return.  We can only return a 1, -1, or the
5338      sign bit.  */
5339
5340   if (normalizep == 0)
5341     {
5342       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5343         normalizep = STORE_FLAG_VALUE;
5344
5345       else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5346                && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5347                    == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
5348         ;
5349       else
5350         return 0;
5351     }
5352
5353   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5354      do the necessary operation below.  */
5355
5356   tem = 0;
5357
5358   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5359      the sign bit set.  */
5360
5361   if (code == LE)
5362     {
5363       /* This is destructive, so SUBTARGET can't be OP0.  */
5364       if (rtx_equal_p (subtarget, op0))
5365         subtarget = 0;
5366
5367       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5368                           OPTAB_WIDEN);
5369       if (tem)
5370         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5371                             OPTAB_WIDEN);
5372     }
5373
5374   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5375      number of bits in the mode of OP0, minus one.  */
5376
5377   if (code == GT)
5378     {
5379       if (rtx_equal_p (subtarget, op0))
5380         subtarget = 0;
5381
5382       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5383                           size_int (GET_MODE_BITSIZE (mode) - 1),
5384                           subtarget, 0);
5385       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5386                           OPTAB_WIDEN);
5387     }
5388
5389   if (code == EQ || code == NE)
5390     {
5391       /* For EQ or NE, one way to do the comparison is to apply an operation
5392          that converts the operand into a positive number if it is nonzero
5393          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5394          for NE we negate.  This puts the result in the sign bit.  Then we
5395          normalize with a shift, if needed.
5396
5397          Two operations that can do the above actions are ABS and FFS, so try
5398          them.  If that doesn't work, and MODE is smaller than a full word,
5399          we can use zero-extension to the wider mode (an unsigned conversion)
5400          as the operation.  */
5401
5402       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5403          that is compensated by the subsequent overflow when subtracting
5404          one / negating.  */
5405
5406       if (abs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
5407         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5408       else if (ffs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
5409         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5410       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5411         {
5412           tem = convert_modes (word_mode, mode, op0, 1);
5413           mode = word_mode;
5414         }
5415
5416       if (tem != 0)
5417         {
5418           if (code == EQ)
5419             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5420                                 0, OPTAB_WIDEN);
5421           else
5422             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5423         }
5424
5425       /* If we couldn't do it that way, for NE we can "or" the two's complement
5426          of the value with itself.  For EQ, we take the one's complement of
5427          that "or", which is an extra insn, so we only handle EQ if branches
5428          are expensive.  */
5429
5430       if (tem == 0 && (code == NE || BRANCH_COST > 1))
5431         {
5432           if (rtx_equal_p (subtarget, op0))
5433             subtarget = 0;
5434
5435           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5436           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5437                               OPTAB_WIDEN);
5438
5439           if (tem && code == EQ)
5440             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5441         }
5442     }
5443
5444   if (tem && normalizep)
5445     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5446                         size_int (GET_MODE_BITSIZE (mode) - 1),
5447                         subtarget, normalizep == 1);
5448
5449   if (tem)
5450     {
5451       if (GET_MODE (tem) != target_mode)
5452         {
5453           convert_move (target, tem, 0);
5454           tem = target;
5455         }
5456       else if (!subtarget)
5457         {
5458           emit_move_insn (target, tem);
5459           tem = target;
5460         }
5461     }
5462   else
5463     delete_insns_since (last);
5464
5465   return tem;
5466 }
5467
5468 /* Like emit_store_flag, but always succeeds.  */
5469
5470 rtx
5471 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5472                        enum machine_mode mode, int unsignedp, int normalizep)
5473 {
5474   rtx tem, label;
5475
5476   /* First see if emit_store_flag can do the job.  */
5477   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5478   if (tem != 0)
5479     return tem;
5480
5481   if (normalizep == 0)
5482     normalizep = 1;
5483
5484   /* If this failed, we have to do this with set/compare/jump/set code.  */
5485
5486   if (!REG_P (target)
5487       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5488     target = gen_reg_rtx (GET_MODE (target));
5489
5490   emit_move_insn (target, const1_rtx);
5491   label = gen_label_rtx ();
5492   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5493                            NULL_RTX, label);
5494
5495   emit_move_insn (target, const0_rtx);
5496   emit_label (label);
5497
5498   return target;
5499 }
5500 \f
5501 /* Perform possibly multi-word comparison and conditional jump to LABEL
5502    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE
5503
5504    The algorithm is based on the code in expr.c:do_jump.
5505
5506    Note that this does not perform a general comparison.  Only variants
5507    generated within expmed.c are correctly handled, others abort (but could
5508    be handled if needed).  */
5509
5510 static void
5511 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5512                  rtx label)
5513 {
5514   /* If this mode is an integer too wide to compare properly,
5515      compare word by word.  Rely on cse to optimize constant cases.  */
5516
5517   if (GET_MODE_CLASS (mode) == MODE_INT
5518       && ! can_compare_p (op, mode, ccp_jump))
5519     {
5520       rtx label2 = gen_label_rtx ();
5521
5522       switch (op)
5523         {
5524         case LTU:
5525           do_jump_by_parts_greater_rtx (mode, 1, arg2, arg1, label2, label);
5526           break;
5527
5528         case LEU:
5529           do_jump_by_parts_greater_rtx (mode, 1, arg1, arg2, label, label2);
5530           break;
5531
5532         case LT:
5533           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label2, label);
5534           break;
5535
5536         case GT:
5537           do_jump_by_parts_greater_rtx (mode, 0, arg1, arg2, label2, label);
5538           break;
5539
5540         case GE:
5541           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label, label2);
5542           break;
5543
5544           /* do_jump_by_parts_equality_rtx compares with zero.  Luckily
5545              that's the only equality operations we do */
5546         case EQ:
5547           gcc_assert (arg2 == const0_rtx && mode == GET_MODE(arg1));
5548           do_jump_by_parts_equality_rtx (arg1, label2, label);
5549           break;
5550
5551         case NE:
5552           gcc_assert (arg2 == const0_rtx && mode == GET_MODE(arg1));
5553           do_jump_by_parts_equality_rtx (arg1, label, label2);
5554           break;
5555
5556         default:
5557           gcc_unreachable ();
5558         }
5559
5560       emit_label (label2);
5561     }
5562   else
5563     emit_cmp_and_jump_insns (arg1, arg2, op, NULL_RTX, mode, 0, label);
5564 }