gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 2, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING.  If not, write to the Free
  20 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  21 02110-1301, USA.  */
  22
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "coretypes.h"
  27 #include "tm.h"
  28 #include "toplev.h"
  29 #include "rtl.h"
  30 #include "tree.h"
  31 #include "tm_p.h"
  32 #include "flags.h"
  33 #include "insn-config.h"
  34 #include "expr.h"
  35 #include "optabs.h"
  36 #include "real.h"
  37 #include "recog.h"
  38 #include "langhooks.h"
  39
  40 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  41                                    unsigned HOST_WIDE_INT,
  42                                    unsigned HOST_WIDE_INT, rtx);
  43 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  44                                    unsigned HOST_WIDE_INT, rtx);
  45 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  46                                     unsigned HOST_WIDE_INT,
  47                                     unsigned HOST_WIDE_INT,
  48                                     unsigned HOST_WIDE_INT, rtx, int);
  49 static rtx mask_rtx (enum machine_mode, int, int, int);
  50 static rtx lshift_value (enum machine_mode, rtx, int, int);
  51 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  52                                     unsigned HOST_WIDE_INT, int);
  53 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  54 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  55 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  56
  57 /* Test whether a value is zero of a power of two.  */
  58 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
  59
  60 /* Nonzero means divides or modulus operations are relatively cheap for
  61    powers of two, so don't use branches; emit the operation instead.
  62    Usually, this will mean that the MD file will emit non-branch
  63    sequences.  */
  64
  65 static bool sdiv_pow2_cheap[NUM_MACHINE_MODES];
  66 static bool smod_pow2_cheap[NUM_MACHINE_MODES];
  67
  68 #ifndef SLOW_UNALIGNED_ACCESS
  69 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  70 #endif
  71
  72 /* For compilers that support multiple targets with different word sizes,
  73    MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
  74    is the H8/300(H) compiler.  */
  75
  76 #ifndef MAX_BITS_PER_WORD
  77 #define MAX_BITS_PER_WORD BITS_PER_WORD
  78 #endif
  79
  80 /* Reduce conditional compilation elsewhere.  */
  81 #ifndef HAVE_insv
  82 #define HAVE_insv       0
  83 #define CODE_FOR_insv   CODE_FOR_nothing
  84 #define gen_insv(a,b,c,d) NULL_RTX
  85 #endif
  86 #ifndef HAVE_extv
  87 #define HAVE_extv       0
  88 #define CODE_FOR_extv   CODE_FOR_nothing
  89 #define gen_extv(a,b,c,d) NULL_RTX
  90 #endif
  91 #ifndef HAVE_extzv
  92 #define HAVE_extzv      0
  93 #define CODE_FOR_extzv  CODE_FOR_nothing
  94 #define gen_extzv(a,b,c,d) NULL_RTX
  95 #endif
  96
  97 /* Cost of various pieces of RTL.  Note that some of these are indexed by
  98    shift count and some by mode.  */
  99 static int zero_cost;
 100 static int add_cost[NUM_MACHINE_MODES];
 101 static int neg_cost[NUM_MACHINE_MODES];
 102 static int shift_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 103 static int shiftadd_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 104 static int shiftsub_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 105 static int mul_cost[NUM_MACHINE_MODES];
 106 static int div_cost[NUM_MACHINE_MODES];
 107 static int mul_widen_cost[NUM_MACHINE_MODES];
 108 static int mul_highpart_cost[NUM_MACHINE_MODES];
 109
 110 void
 111 init_expmed (void)
 112 {
 113   struct
 114   {
 115     struct rtx_def reg;         rtunion reg_fld[2];
 116     struct rtx_def plus;        rtunion plus_fld1;
 117     struct rtx_def neg;
 118     struct rtx_def udiv;        rtunion udiv_fld1;
 119     struct rtx_def mult;        rtunion mult_fld1;
 120     struct rtx_def div;         rtunion div_fld1;
 121     struct rtx_def mod;         rtunion mod_fld1;
 122     struct rtx_def zext;
 123     struct rtx_def wide_mult;   rtunion wide_mult_fld1;
 124     struct rtx_def wide_lshr;   rtunion wide_lshr_fld1;
 125     struct rtx_def wide_trunc;
 126     struct rtx_def shift;       rtunion shift_fld1;
 127     struct rtx_def shift_mult;  rtunion shift_mult_fld1;
 128     struct rtx_def shift_add;   rtunion shift_add_fld1;
 129     struct rtx_def shift_sub;   rtunion shift_sub_fld1;
 130   } all;
 131
 132   rtx pow2[MAX_BITS_PER_WORD];
 133   rtx cint[MAX_BITS_PER_WORD];
 134   int m, n;
 135   enum machine_mode mode, wider_mode;
 136
 137   zero_cost = rtx_cost (const0_rtx, 0);
 138
 139   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 140     {
 141       pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 142       cint[m] = GEN_INT (m);
 143     }
 144
 145   memset (&all, 0, sizeof all);
 146
 147   PUT_CODE (&all.reg, REG);
 148   /* Avoid using hard regs in ways which may be unsupported.  */
 149   REGNO (&all.reg) = LAST_VIRTUAL_REGISTER + 1;
 150
 151   PUT_CODE (&all.plus, PLUS);
 152   XEXP (&all.plus, 0) = &all.reg;
 153   XEXP (&all.plus, 1) = &all.reg;
 154
 155   PUT_CODE (&all.neg, NEG);
 156   XEXP (&all.neg, 0) = &all.reg;
 157
 158   PUT_CODE (&all.udiv, UDIV);
 159   XEXP (&all.udiv, 0) = &all.reg;
 160   XEXP (&all.udiv, 1) = &all.reg;
 161
 162   PUT_CODE (&all.mult, MULT);
 163   XEXP (&all.mult, 0) = &all.reg;
 164   XEXP (&all.mult, 1) = &all.reg;
 165
 166   PUT_CODE (&all.div, DIV);
 167   XEXP (&all.div, 0) = &all.reg;
 168   XEXP (&all.div, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
 169
 170   PUT_CODE (&all.mod, MOD);
 171   XEXP (&all.mod, 0) = &all.reg;
 172   XEXP (&all.mod, 1) = XEXP (&all.div, 1);
 173
 174   PUT_CODE (&all.zext, ZERO_EXTEND);
 175   XEXP (&all.zext, 0) = &all.reg;
 176
 177   PUT_CODE (&all.wide_mult, MULT);
 178   XEXP (&all.wide_mult, 0) = &all.zext;
 179   XEXP (&all.wide_mult, 1) = &all.zext;
 180
 181   PUT_CODE (&all.wide_lshr, LSHIFTRT);
 182   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
 183
 184   PUT_CODE (&all.wide_trunc, TRUNCATE);
 185   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
 186
 187   PUT_CODE (&all.shift, ASHIFT);
 188   XEXP (&all.shift, 0) = &all.reg;
 189
 190   PUT_CODE (&all.shift_mult, MULT);
 191   XEXP (&all.shift_mult, 0) = &all.reg;
 192
 193   PUT_CODE (&all.shift_add, PLUS);
 194   XEXP (&all.shift_add, 0) = &all.shift_mult;
 195   XEXP (&all.shift_add, 1) = &all.reg;
 196
 197   PUT_CODE (&all.shift_sub, MINUS);
 198   XEXP (&all.shift_sub, 0) = &all.shift_mult;
 199   XEXP (&all.shift_sub, 1) = &all.reg;
 200
 201   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 202        mode != VOIDmode;
 203        mode = GET_MODE_WIDER_MODE (mode))
 204     {
 205       PUT_MODE (&all.reg, mode);
 206       PUT_MODE (&all.plus, mode);
 207       PUT_MODE (&all.neg, mode);
 208       PUT_MODE (&all.udiv, mode);
 209       PUT_MODE (&all.mult, mode);
 210       PUT_MODE (&all.div, mode);
 211       PUT_MODE (&all.mod, mode);
 212       PUT_MODE (&all.wide_trunc, mode);
 213       PUT_MODE (&all.shift, mode);
 214       PUT_MODE (&all.shift_mult, mode);
 215       PUT_MODE (&all.shift_add, mode);
 216       PUT_MODE (&all.shift_sub, mode);
 217
 218       add_cost[mode] = rtx_cost (&all.plus, SET);
 219       neg_cost[mode] = rtx_cost (&all.neg, SET);
 220       div_cost[mode] = rtx_cost (&all.udiv, SET);
 221       mul_cost[mode] = rtx_cost (&all.mult, SET);
 222
 223       sdiv_pow2_cheap[mode] = (rtx_cost (&all.div, SET) <= 2 * add_cost[mode]);
 224       smod_pow2_cheap[mode] = (rtx_cost (&all.mod, SET) <= 4 * add_cost[mode]);
 225
 226       wider_mode = GET_MODE_WIDER_MODE (mode);
 227       if (wider_mode != VOIDmode)
 228         {
 229           PUT_MODE (&all.zext, wider_mode);
 230           PUT_MODE (&all.wide_mult, wider_mode);
 231           PUT_MODE (&all.wide_lshr, wider_mode);
 232           XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
 233
 234           mul_widen_cost[wider_mode] = rtx_cost (&all.wide_mult, SET);
 235           mul_highpart_cost[mode] = rtx_cost (&all.wide_trunc, SET);
 236         }
 237
 238       shift_cost[mode][0] = 0;
 239       shiftadd_cost[mode][0] = shiftsub_cost[mode][0] = add_cost[mode];
 240
 241       n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
 242       for (m = 1; m < n; m++)
 243         {
 244           XEXP (&all.shift, 1) = cint[m];
 245           XEXP (&all.shift_mult, 1) = pow2[m];
 246
 247           shift_cost[mode][m] = rtx_cost (&all.shift, SET);
 248           shiftadd_cost[mode][m] = rtx_cost (&all.shift_add, SET);
 249           shiftsub_cost[mode][m] = rtx_cost (&all.shift_sub, SET);
 250         }
 251     }
 252 }
 253
 254 /* Return an rtx representing minus the value of X.
 255    MODE is the intended mode of the result,
 256    useful if X is a CONST_INT.  */
 257
 258 rtx
 259 negate_rtx (enum machine_mode mode, rtx x)
 260 {
 261   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 262
 263   if (result == 0)
 264     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 265
 266   return result;
 267 }
 268
 269 /* Report on the availability of insv/extv/extzv and the desired mode
 270    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
 271    is false; else the mode of the specified operand.  If OPNO is -1,
 272    all the caller cares about is whether the insn is available.  */
 273 enum machine_mode
 274 mode_for_extraction (enum extraction_pattern pattern, int opno)
 275 {
 276   const struct insn_data *data;
 277
 278   switch (pattern)
 279     {
 280     case EP_insv:
 281       if (HAVE_insv)
 282         {
 283           data = &insn_data[CODE_FOR_insv];
 284           break;
 285         }
 286       return MAX_MACHINE_MODE;
 287
 288     case EP_extv:
 289       if (HAVE_extv)
 290         {
 291           data = &insn_data[CODE_FOR_extv];
 292           break;
 293         }
 294       return MAX_MACHINE_MODE;
 295
 296     case EP_extzv:
 297       if (HAVE_extzv)
 298         {
 299           data = &insn_data[CODE_FOR_extzv];
 300           break;
 301         }
 302       return MAX_MACHINE_MODE;
 303
 304     default:
 305       gcc_unreachable ();
 306     }
 307
 308   if (opno == -1)
 309     return VOIDmode;
 310
 311   /* Everyone who uses this function used to follow it with
 312      if (result == VOIDmode) result = word_mode; */
 313   if (data->operand[opno].mode == VOIDmode)
 314     return word_mode;
 315   return data->operand[opno].mode;
 316 }
 317
 318 \f
 319 /* Generate code to store value from rtx VALUE
 320    into a bit-field within structure STR_RTX
 321    containing BITSIZE bits starting at bit BITNUM.
 322    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
 323    ALIGN is the alignment that STR_RTX is known to have.
 324    TOTAL_SIZE is the size of the structure in bytes, or -1 if varying.  */
 325
 326 /* ??? Note that there are two different ideas here for how
 327    to determine the size to count bits within, for a register.
 328    One is BITS_PER_WORD, and the other is the size of operand 3
 329    of the insv pattern.
 330
 331    If operand 3 of the insv pattern is VOIDmode, then we will use BITS_PER_WORD
 332    else, we use the mode of operand 3.  */
 333
 334 rtx
 335 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 336                  unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
 337                  rtx value)
 338 {
 339   unsigned int unit
 340     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
 341   unsigned HOST_WIDE_INT offset, bitpos;
 342   rtx op0 = str_rtx;
 343   int byte_offset;
 344   rtx orig_value;
 345
 346   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
 347
 348   while (GET_CODE (op0) == SUBREG)
 349     {
 350       /* The following line once was done only if WORDS_BIG_ENDIAN,
 351          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 352          meaningful at a much higher level; when structures are copied
 353          between memory and regs, the higher-numbered regs
 354          always get higher addresses.  */
 355       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
 356       op0 = SUBREG_REG (op0);
 357     }
 358
 359   /* No action is needed if the target is a register and if the field
 360      lies completely outside that register.  This can occur if the source
 361      code contains an out-of-bounds access to a small array.  */
 362   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 363     return value;
 364
 365   /* Use vec_set patterns for inserting parts of vectors whenever
 366      available.  */
 367   if (VECTOR_MODE_P (GET_MODE (op0))
 368       && !MEM_P (op0)
 369       && (vec_set_optab->handlers[GET_MODE (op0)].insn_code
 370           != CODE_FOR_nothing)
 371       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 372       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 373       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 374     {
 375       enum machine_mode outermode = GET_MODE (op0);
 376       enum machine_mode innermode = GET_MODE_INNER (outermode);
 377       int icode = (int) vec_set_optab->handlers[outermode].insn_code;
 378       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 379       rtx rtxpos = GEN_INT (pos);
 380       rtx src = value;
 381       rtx dest = op0;
 382       rtx pat, seq;
 383       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
 384       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
 385       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
 386
 387       start_sequence ();
 388
 389       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
 390         src = copy_to_mode_reg (mode1, src);
 391
 392       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
 393         rtxpos = copy_to_mode_reg (mode1, rtxpos);
 394
 395       /* We could handle this, but we should always be called with a pseudo
 396          for our targets and all insns should take them as outputs.  */
 397       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
 398                   && (*insn_data[icode].operand[1].predicate) (src, mode1)
 399                   && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
 400       pat = GEN_FCN (icode) (dest, src, rtxpos);
 401       seq = get_insns ();
 402       end_sequence ();
 403       if (pat)
 404         {
 405           emit_insn (seq);
 406           emit_insn (pat);
 407           return dest;
 408         }
 409     }
 410
 411   /* If the target is a register, overwriting the entire object, or storing
 412      a full-word or multi-word field can be done with just a SUBREG.
 413
 414      If the target is memory, storing any naturally aligned field can be
 415      done with a simple store.  For targets that support fast unaligned
 416      memory, any naturally sized, unit aligned field can be done directly.  */
 417
 418   offset = bitnum / unit;
 419   bitpos = bitnum % unit;
 420   byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 421                 + (offset * UNITS_PER_WORD);
 422
 423   if (bitpos == 0
 424       && bitsize == GET_MODE_BITSIZE (fieldmode)
 425       && (!MEM_P (op0)
 426           ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 427              || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
 428              && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
 429           : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 430              || (offset * BITS_PER_UNIT % bitsize == 0
 431                  && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
 432     {
 433       if (MEM_P (op0))
 434         op0 = adjust_address (op0, fieldmode, offset);
 435       else if (GET_MODE (op0) != fieldmode)
 436         op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 437                                    byte_offset);
 438       emit_move_insn (op0, value);
 439       return value;
 440     }
 441
 442   /* Make sure we are playing with integral modes.  Pun with subregs
 443      if we aren't.  This must come after the entire register case above,
 444      since that case is valid for any mode.  The following cases are only
 445      valid for integral modes.  */
 446   {
 447     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 448     if (imode != GET_MODE (op0))
 449       {
 450         if (MEM_P (op0))
 451           op0 = adjust_address (op0, imode, 0);
 452         else
 453           {
 454             gcc_assert (imode != BLKmode);
 455             op0 = gen_lowpart (imode, op0);
 456           }
 457       }
 458   }
 459
 460   /* We may be accessing data outside the field, which means
 461      we can alias adjacent data.  */
 462   if (MEM_P (op0))
 463     {
 464       op0 = shallow_copy_rtx (op0);
 465       set_mem_alias_set (op0, 0);
 466       set_mem_expr (op0, 0);
 467     }
 468
 469   /* If OP0 is a register, BITPOS must count within a word.
 470      But as we have it, it counts within whatever size OP0 now has.
 471      On a bigendian machine, these are not the same, so convert.  */
 472   if (BYTES_BIG_ENDIAN
 473       && !MEM_P (op0)
 474       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 475     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 476
 477   /* Storing an lsb-aligned field in a register
 478      can be done with a movestrict instruction.  */
 479
 480   if (!MEM_P (op0)
 481       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 482       && bitsize == GET_MODE_BITSIZE (fieldmode)
 483       && (movstrict_optab->handlers[fieldmode].insn_code
 484           != CODE_FOR_nothing))
 485     {
 486       int icode = movstrict_optab->handlers[fieldmode].insn_code;
 487
 488       /* Get appropriate low part of the value being stored.  */
 489       if (GET_CODE (value) == CONST_INT || REG_P (value))
 490         value = gen_lowpart (fieldmode, value);
 491       else if (!(GET_CODE (value) == SYMBOL_REF
 492                  || GET_CODE (value) == LABEL_REF
 493                  || GET_CODE (value) == CONST))
 494         value = convert_to_mode (fieldmode, value, 0);
 495
 496       if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
 497         value = copy_to_mode_reg (fieldmode, value);
 498
 499       if (GET_CODE (op0) == SUBREG)
 500         {
 501           /* Else we've got some float mode source being extracted into
 502              a different float mode destination -- this combination of
 503              subregs results in Severe Tire Damage.  */
 504           gcc_assert (GET_MODE (SUBREG_REG (op0)) == fieldmode
 505                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 506                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 507           op0 = SUBREG_REG (op0);
 508         }
 509
 510       emit_insn (GEN_FCN (icode)
 511                  (gen_rtx_SUBREG (fieldmode, op0,
 512                                   (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 513                                   + (offset * UNITS_PER_WORD)),
 514                                   value));
 515
 516       return value;
 517     }
 518
 519   /* Handle fields bigger than a word.  */
 520
 521   if (bitsize > BITS_PER_WORD)
 522     {
 523       /* Here we transfer the words of the field
 524          in the order least significant first.
 525          This is because the most significant word is the one which may
 526          be less than full.
 527          However, only do that if the value is not BLKmode.  */
 528
 529       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 530       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 531       unsigned int i;
 532
 533       /* This is the mode we must force value to, so that there will be enough
 534          subwords to extract.  Note that fieldmode will often (always?) be
 535          VOIDmode, because that is what store_field uses to indicate that this
 536          is a bit field, but passing VOIDmode to operand_subword_force
 537          is not allowed.  */
 538       fieldmode = GET_MODE (value);
 539       if (fieldmode == VOIDmode)
 540         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 541
 542       for (i = 0; i < nwords; i++)
 543         {
 544           /* If I is 0, use the low-order word in both field and target;
 545              if I is 1, use the next to lowest word; and so on.  */
 546           unsigned int wordnum = (backwards ? nwords - i - 1 : i);
 547           unsigned int bit_offset = (backwards
 548                                      ? MAX ((int) bitsize - ((int) i + 1)
 549                                             * BITS_PER_WORD,
 550                                             0)
 551                                      : (int) i * BITS_PER_WORD);
 552
 553           store_bit_field (op0, MIN (BITS_PER_WORD,
 554                                      bitsize - i * BITS_PER_WORD),
 555                            bitnum + bit_offset, word_mode,
 556                            operand_subword_force (value, wordnum, fieldmode));
 557         }
 558       return value;
 559     }
 560
 561   /* From here on we can assume that the field to be stored in is
 562      a full-word (whatever type that is), since it is shorter than a word.  */
 563
 564   /* OFFSET is the number of words or bytes (UNIT says which)
 565      from STR_RTX to the first word or byte containing part of the field.  */
 566
 567   if (!MEM_P (op0))
 568     {
 569       if (offset != 0
 570           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 571         {
 572           if (!REG_P (op0))
 573             {
 574               /* Since this is a destination (lvalue), we can't copy
 575                  it to a pseudo.  We can remove a SUBREG that does not
 576                  change the size of the operand.  Such a SUBREG may
 577                  have been added above.  */
 578               gcc_assert (GET_CODE (op0) == SUBREG
 579                           && (GET_MODE_SIZE (GET_MODE (op0))
 580                               == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
 581               op0 = SUBREG_REG (op0);
 582             }
 583           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 584                                 op0, (offset * UNITS_PER_WORD));
 585         }
 586       offset = 0;
 587     }
 588
 589   /* If VALUE has a floating-point or complex mode, access it as an
 590      integer of the corresponding size.  This can occur on a machine
 591      with 64 bit registers that uses SFmode for float.  It can also
 592      occur for unaligned float or complex fields.  */
 593   orig_value = value;
 594   if (GET_MODE (value) != VOIDmode
 595       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 596       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 597     {
 598       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 599       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 600     }
 601
 602   /* Now OFFSET is nonzero only if OP0 is memory
 603      and is therefore always measured in bytes.  */
 604
 605   if (HAVE_insv
 606       && GET_MODE (value) != BLKmode
 607       && !(bitsize == 1 && GET_CODE (value) == CONST_INT)
 608       && bitsize > 0
 609       && GET_MODE_BITSIZE (op_mode) >= bitsize
 610       && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
 611             && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))))
 612     {
 613       int xbitpos = bitpos;
 614       rtx value1;
 615       rtx xop0 = op0;
 616       rtx last = get_last_insn ();
 617       rtx pat;
 618       enum machine_mode maxmode = mode_for_extraction (EP_insv, 3);
 619       int save_volatile_ok = volatile_ok;
 620
 621       volatile_ok = 1;
 622
 623       /* If this machine's insv can only insert into a register, copy OP0
 624          into a register and save it back later.  */
 625       if (MEM_P (op0)
 626           && ! ((*insn_data[(int) CODE_FOR_insv].operand[0].predicate)
 627                 (op0, VOIDmode)))
 628         {
 629           rtx tempreg;
 630           enum machine_mode bestmode;
 631
 632           /* Get the mode to use for inserting into this field.  If OP0 is
 633              BLKmode, get the smallest mode consistent with the alignment. If
 634              OP0 is a non-BLKmode object that is no wider than MAXMODE, use its
 635              mode. Otherwise, use the smallest mode containing the field.  */
 636
 637           if (GET_MODE (op0) == BLKmode
 638               || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (maxmode))
 639             bestmode
 640               = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0), maxmode,
 641                                MEM_VOLATILE_P (op0));
 642           else
 643             bestmode = GET_MODE (op0);
 644
 645           if (bestmode == VOIDmode
 646               || GET_MODE_SIZE (bestmode) < GET_MODE_SIZE (fieldmode)
 647               || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
 648                   && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
 649             goto insv_loses;
 650
 651           /* Adjust address to point to the containing unit of that mode.
 652              Compute offset as multiple of this unit, counting in bytes.  */
 653           unit = GET_MODE_BITSIZE (bestmode);
 654           offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 655           bitpos = bitnum % unit;
 656           op0 = adjust_address (op0, bestmode,  offset);
 657
 658           /* Fetch that unit, store the bitfield in it, then store
 659              the unit.  */
 660           tempreg = copy_to_reg (op0);
 661           store_bit_field (tempreg, bitsize, bitpos, fieldmode, orig_value);
 662           emit_move_insn (op0, tempreg);
 663           return value;
 664         }
 665       volatile_ok = save_volatile_ok;
 666
 667       /* Add OFFSET into OP0's address.  */
 668       if (MEM_P (xop0))
 669         xop0 = adjust_address (xop0, byte_mode, offset);
 670
 671       /* If xop0 is a register, we need it in MAXMODE
 672          to make it acceptable to the format of insv.  */
 673       if (GET_CODE (xop0) == SUBREG)
 674         /* We can't just change the mode, because this might clobber op0,
 675            and we will need the original value of op0 if insv fails.  */
 676         xop0 = gen_rtx_SUBREG (maxmode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 677       if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
 678         xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
 679
 680       /* On big-endian machines, we count bits from the most significant.
 681          If the bit field insn does not, we must invert.  */
 682
 683       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 684         xbitpos = unit - bitsize - xbitpos;
 685
 686       /* We have been counting XBITPOS within UNIT.
 687          Count instead within the size of the register.  */
 688       if (BITS_BIG_ENDIAN && !MEM_P (xop0))
 689         xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
 690
 691       unit = GET_MODE_BITSIZE (maxmode);
 692
 693       /* Convert VALUE to maxmode (which insv insn wants) in VALUE1.  */
 694       value1 = value;
 695       if (GET_MODE (value) != maxmode)
 696         {
 697           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 698             {
 699               /* Optimization: Don't bother really extending VALUE
 700                  if it has all the bits we will actually use.  However,
 701                  if we must narrow it, be sure we do it correctly.  */
 702
 703               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (maxmode))
 704                 {
 705                   rtx tmp;
 706
 707                   tmp = simplify_subreg (maxmode, value1, GET_MODE (value), 0);
 708                   if (! tmp)
 709                     tmp = simplify_gen_subreg (maxmode,
 710                                                force_reg (GET_MODE (value),
 711                                                           value1),
 712                                                GET_MODE (value), 0);
 713                   value1 = tmp;
 714                 }
 715               else
 716                 value1 = gen_lowpart (maxmode, value1);
 717             }
 718           else if (GET_CODE (value) == CONST_INT)
 719             value1 = gen_int_mode (INTVAL (value), maxmode);
 720           else
 721             /* Parse phase is supposed to make VALUE's data type
 722                match that of the component reference, which is a type
 723                at least as wide as the field; so VALUE should have
 724                a mode that corresponds to that type.  */
 725             gcc_assert (CONSTANT_P (value));
 726         }
 727
 728       /* If this machine's insv insists on a register,
 729          get VALUE1 into a register.  */
 730       if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
 731              (value1, maxmode)))
 732         value1 = force_reg (maxmode, value1);
 733
 734       pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
 735       if (pat)
 736         emit_insn (pat);
 737       else
 738         {
 739           delete_insns_since (last);
 740           store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 741         }
 742     }
 743   else
 744     insv_loses:
 745     /* Insv is not available; store using shifts and boolean ops.  */
 746     store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 747   return value;
 748 }
 749 \f
 750 /* Use shifts and boolean operations to store VALUE
 751    into a bit field of width BITSIZE
 752    in a memory location specified by OP0 except offset by OFFSET bytes.
 753      (OFFSET must be 0 if OP0 is a register.)
 754    The field starts at position BITPOS within the byte.
 755     (If OP0 is a register, it may be a full word or a narrower mode,
 756      but BITPOS still counts within a full word,
 757      which is significant on bigendian machines.)  */
 758
 759 static void
 760 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
 761                        unsigned HOST_WIDE_INT bitsize,
 762                        unsigned HOST_WIDE_INT bitpos, rtx value)
 763 {
 764   enum machine_mode mode;
 765   unsigned int total_bits = BITS_PER_WORD;
 766   rtx subtarget, temp;
 767   int all_zero = 0;
 768   int all_one = 0;
 769
 770   /* There is a case not handled here:
 771      a structure with a known alignment of just a halfword
 772      and a field split across two aligned halfwords within the structure.
 773      Or likewise a structure with a known alignment of just a byte
 774      and a field split across two bytes.
 775      Such cases are not supposed to be able to occur.  */
 776
 777   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 778     {
 779       gcc_assert (!offset);
 780       /* Special treatment for a bit field split across two registers.  */
 781       if (bitsize + bitpos > BITS_PER_WORD)
 782         {
 783           store_split_bit_field (op0, bitsize, bitpos, value);
 784           return;
 785         }
 786     }
 787   else
 788     {
 789       /* Get the proper mode to use for this field.  We want a mode that
 790          includes the entire field.  If such a mode would be larger than
 791          a word, we won't be doing the extraction the normal way.
 792          We don't want a mode bigger than the destination.  */
 793
 794       mode = GET_MODE (op0);
 795       if (GET_MODE_BITSIZE (mode) == 0
 796           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 797         mode = word_mode;
 798       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 799                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 800
 801       if (mode == VOIDmode)
 802         {
 803           /* The only way this should occur is if the field spans word
 804              boundaries.  */
 805           store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
 806                                  value);
 807           return;
 808         }
 809
 810       total_bits = GET_MODE_BITSIZE (mode);
 811
 812       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 813          be in the range 0 to total_bits-1, and put any excess bytes in
 814          OFFSET.  */
 815       if (bitpos >= total_bits)
 816         {
 817           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 818           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 819                      * BITS_PER_UNIT);
 820         }
 821
 822       /* Get ref to an aligned byte, halfword, or word containing the field.
 823          Adjust BITPOS to be position within a word,
 824          and OFFSET to be the offset of that word.
 825          Then alter OP0 to refer to that word.  */
 826       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 827       offset -= (offset % (total_bits / BITS_PER_UNIT));
 828       op0 = adjust_address (op0, mode, offset);
 829     }
 830
 831   mode = GET_MODE (op0);
 832
 833   /* Now MODE is either some integral mode for a MEM as OP0,
 834      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 835      The bit field is contained entirely within OP0.
 836      BITPOS is the starting bit number within OP0.
 837      (OP0's mode may actually be narrower than MODE.)  */
 838
 839   if (BYTES_BIG_ENDIAN)
 840       /* BITPOS is the distance between our msb
 841          and that of the containing datum.
 842          Convert it to the distance from the lsb.  */
 843       bitpos = total_bits - bitsize - bitpos;
 844
 845   /* Now BITPOS is always the distance between our lsb
 846      and that of OP0.  */
 847
 848   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 849      we must first convert its mode to MODE.  */
 850
 851   if (GET_CODE (value) == CONST_INT)
 852     {
 853       HOST_WIDE_INT v = INTVAL (value);
 854
 855       if (bitsize < HOST_BITS_PER_WIDE_INT)
 856         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 857
 858       if (v == 0)
 859         all_zero = 1;
 860       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 861                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 862                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 863         all_one = 1;
 864
 865       value = lshift_value (mode, value, bitpos, bitsize);
 866     }
 867   else
 868     {
 869       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 870                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 871
 872       if (GET_MODE (value) != mode)
 873         {
 874           if ((REG_P (value) || GET_CODE (value) == SUBREG)
 875               && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
 876             value = gen_lowpart (mode, value);
 877           else
 878             value = convert_to_mode (mode, value, 1);
 879         }
 880
 881       if (must_and)
 882         value = expand_binop (mode, and_optab, value,
 883                               mask_rtx (mode, 0, bitsize, 0),
 884                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
 885       if (bitpos > 0)
 886         value = expand_shift (LSHIFT_EXPR, mode, value,
 887                               build_int_cst (NULL_TREE, bitpos), NULL_RTX, 1);
 888     }
 889
 890   /* Now clear the chosen bits in OP0,
 891      except that if VALUE is -1 we need not bother.  */
 892
 893   subtarget = op0;
 894
 895   if (! all_one)
 896     {
 897       temp = expand_binop (mode, and_optab, op0,
 898                            mask_rtx (mode, bitpos, bitsize, 1),
 899                            subtarget, 1, OPTAB_LIB_WIDEN);
 900       subtarget = temp;
 901     }
 902   else
 903     temp = op0;
 904
 905   /* Now logical-or VALUE into OP0, unless it is zero.  */
 906
 907   if (! all_zero)
 908     temp = expand_binop (mode, ior_optab, temp, value,
 909                          subtarget, 1, OPTAB_LIB_WIDEN);
 910   if (op0 != temp)
 911     emit_move_insn (op0, temp);
 912 }
 913 \f
 914 /* Store a bit field that is split across multiple accessible memory objects.
 915
 916    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
 917    BITSIZE is the field width; BITPOS the position of its first bit
 918    (within the word).
 919    VALUE is the value to store.
 920
 921    This does not yet handle fields wider than BITS_PER_WORD.  */
 922
 923 static void
 924 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
 925                        unsigned HOST_WIDE_INT bitpos, rtx value)
 926 {
 927   unsigned int unit;
 928   unsigned int bitsdone = 0;
 929
 930   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
 931      much at a time.  */
 932   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 933     unit = BITS_PER_WORD;
 934   else
 935     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
 936
 937   /* If VALUE is a constant other than a CONST_INT, get it into a register in
 938      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
 939      that VALUE might be a floating-point constant.  */
 940   if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
 941     {
 942       rtx word = gen_lowpart_common (word_mode, value);
 943
 944       if (word && (value != word))
 945         value = word;
 946       else
 947         value = gen_lowpart_common (word_mode,
 948                                     force_reg (GET_MODE (value) != VOIDmode
 949                                                ? GET_MODE (value)
 950                                                : word_mode, value));
 951     }
 952
 953   while (bitsdone < bitsize)
 954     {
 955       unsigned HOST_WIDE_INT thissize;
 956       rtx part, word;
 957       unsigned HOST_WIDE_INT thispos;
 958       unsigned HOST_WIDE_INT offset;
 959
 960       offset = (bitpos + bitsdone) / unit;
 961       thispos = (bitpos + bitsdone) % unit;
 962
 963       /* THISSIZE must not overrun a word boundary.  Otherwise,
 964          store_fixed_bit_field will call us again, and we will mutually
 965          recurse forever.  */
 966       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
 967       thissize = MIN (thissize, unit - thispos);
 968
 969       if (BYTES_BIG_ENDIAN)
 970         {
 971           int total_bits;
 972
 973           /* We must do an endian conversion exactly the same way as it is
 974              done in extract_bit_field, so that the two calls to
 975              extract_fixed_bit_field will have comparable arguments.  */
 976           if (!MEM_P (value) || GET_MODE (value) == BLKmode)
 977             total_bits = BITS_PER_WORD;
 978           else
 979             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
 980
 981           /* Fetch successively less significant portions.  */
 982           if (GET_CODE (value) == CONST_INT)
 983             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 984                              >> (bitsize - bitsdone - thissize))
 985                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
 986           else
 987             /* The args are chosen so that the last part includes the
 988                lsb.  Give extract_bit_field the value it needs (with
 989                endianness compensation) to fetch the piece we want.  */
 990             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
 991                                             total_bits - bitsize + bitsdone,
 992                                             NULL_RTX, 1);
 993         }
 994       else
 995         {
 996           /* Fetch successively more significant portions.  */
 997           if (GET_CODE (value) == CONST_INT)
 998             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 999                              >> bitsdone)
1000                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1001           else
1002             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1003                                             bitsdone, NULL_RTX, 1);
1004         }
1005
1006       /* If OP0 is a register, then handle OFFSET here.
1007
1008          When handling multiword bitfields, extract_bit_field may pass
1009          down a word_mode SUBREG of a larger REG for a bitfield that actually
1010          crosses a word boundary.  Thus, for a SUBREG, we must find
1011          the current word starting from the base register.  */
1012       if (GET_CODE (op0) == SUBREG)
1013         {
1014           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1015           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1016                                         GET_MODE (SUBREG_REG (op0)));
1017           offset = 0;
1018         }
1019       else if (REG_P (op0))
1020         {
1021           word = operand_subword_force (op0, offset, GET_MODE (op0));
1022           offset = 0;
1023         }
1024       else
1025         word = op0;
1026
1027       /* OFFSET is in UNITs, and UNIT is in bits.
1028          store_fixed_bit_field wants offset in bytes.  */
1029       store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1030                              thispos, part);
1031       bitsdone += thissize;
1032     }
1033 }
1034 \f
1035 /* Generate code to extract a byte-field from STR_RTX
1036    containing BITSIZE bits, starting at BITNUM,
1037    and put it in TARGET if possible (if TARGET is nonzero).
1038    Regardless of TARGET, we return the rtx for where the value is placed.
1039
1040    STR_RTX is the structure containing the byte (a REG or MEM).
1041    UNSIGNEDP is nonzero if this is an unsigned bit field.
1042    MODE is the natural mode of the field value once extracted.
1043    TMODE is the mode the caller would like the value to have;
1044    but the value may be returned with type MODE instead.
1045
1046    TOTAL_SIZE is the size in bytes of the containing structure,
1047    or -1 if varying.
1048
1049    If a TARGET is specified and we can store in it at no extra cost,
1050    we do so, and return TARGET.
1051    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1052    if they are equally easy.  */
1053
1054 rtx
1055 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1056                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1057                    enum machine_mode mode, enum machine_mode tmode)
1058 {
1059   unsigned int unit
1060     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1061   unsigned HOST_WIDE_INT offset, bitpos;
1062   rtx op0 = str_rtx;
1063   rtx spec_target = target;
1064   rtx spec_target_subreg = 0;
1065   enum machine_mode int_mode;
1066   enum machine_mode extv_mode = mode_for_extraction (EP_extv, 0);
1067   enum machine_mode extzv_mode = mode_for_extraction (EP_extzv, 0);
1068   enum machine_mode mode1;
1069   int byte_offset;
1070
1071   if (tmode == VOIDmode)
1072     tmode = mode;
1073
1074   while (GET_CODE (op0) == SUBREG)
1075     {
1076       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1077       op0 = SUBREG_REG (op0);
1078     }
1079
1080   /* If we have an out-of-bounds access to a register, just return an
1081      uninitialized register of the required mode.  This can occur if the
1082      source code contains an out-of-bounds access to a small array.  */
1083   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1084     return gen_reg_rtx (tmode);
1085
1086   if (REG_P (op0)
1087       && mode == GET_MODE (op0)
1088       && bitnum == 0
1089       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1090     {
1091       /* We're trying to extract a full register from itself.  */
1092       return op0;
1093     }
1094
1095   /* Use vec_extract patterns for extracting parts of vectors whenever
1096      available.  */
1097   if (VECTOR_MODE_P (GET_MODE (op0))
1098       && !MEM_P (op0)
1099       && (vec_extract_optab->handlers[GET_MODE (op0)].insn_code
1100           != CODE_FOR_nothing)
1101       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1102           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1103     {
1104       enum machine_mode outermode = GET_MODE (op0);
1105       enum machine_mode innermode = GET_MODE_INNER (outermode);
1106       int icode = (int) vec_extract_optab->handlers[outermode].insn_code;
1107       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1108       rtx rtxpos = GEN_INT (pos);
1109       rtx src = op0;
1110       rtx dest = NULL, pat, seq;
1111       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
1112       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
1113       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
1114
1115       if (innermode == tmode || innermode == mode)
1116         dest = target;
1117
1118       if (!dest)
1119         dest = gen_reg_rtx (innermode);
1120
1121       start_sequence ();
1122
1123       if (! (*insn_data[icode].operand[0].predicate) (dest, mode0))
1124         dest = copy_to_mode_reg (mode0, dest);
1125
1126       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
1127         src = copy_to_mode_reg (mode1, src);
1128
1129       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1130         rtxpos = copy_to_mode_reg (mode1, rtxpos);
1131
1132       /* We could handle this, but we should always be called with a pseudo
1133          for our targets and all insns should take them as outputs.  */
1134       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
1135                   && (*insn_data[icode].operand[1].predicate) (src, mode1)
1136                   && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
1137
1138       pat = GEN_FCN (icode) (dest, src, rtxpos);
1139       seq = get_insns ();
1140       end_sequence ();
1141       if (pat)
1142         {
1143           emit_insn (seq);
1144           emit_insn (pat);
1145           return dest;
1146         }
1147     }
1148
1149   /* Make sure we are playing with integral modes.  Pun with subregs
1150      if we aren't.  */
1151   {
1152     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1153     if (imode != GET_MODE (op0))
1154       {
1155         if (MEM_P (op0))
1156           op0 = adjust_address (op0, imode, 0);
1157         else
1158           {
1159             gcc_assert (imode != BLKmode);
1160             op0 = gen_lowpart (imode, op0);
1161
1162             /* If we got a SUBREG, force it into a register since we
1163                aren't going to be able to do another SUBREG on it.  */
1164             if (GET_CODE (op0) == SUBREG)
1165               op0 = force_reg (imode, op0);
1166           }
1167       }
1168   }
1169
1170   /* We may be accessing data outside the field, which means
1171      we can alias adjacent data.  */
1172   if (MEM_P (op0))
1173     {
1174       op0 = shallow_copy_rtx (op0);
1175       set_mem_alias_set (op0, 0);
1176       set_mem_expr (op0, 0);
1177     }
1178
1179   /* Extraction of a full-word or multi-word value from a structure
1180      in a register or aligned memory can be done with just a SUBREG.
1181      A subword value in the least significant part of a register
1182      can also be extracted with a SUBREG.  For this, we need the
1183      byte offset of the value in op0.  */
1184
1185   bitpos = bitnum % unit;
1186   offset = bitnum / unit;
1187   byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1188
1189   /* If OP0 is a register, BITPOS must count within a word.
1190      But as we have it, it counts within whatever size OP0 now has.
1191      On a bigendian machine, these are not the same, so convert.  */
1192   if (BYTES_BIG_ENDIAN
1193       && !MEM_P (op0)
1194       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1195     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1196
1197   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1198      If that's wrong, the solution is to test for it and set TARGET to 0
1199      if needed.  */
1200
1201   /* Only scalar integer modes can be converted via subregs.  There is an
1202      additional problem for FP modes here in that they can have a precision
1203      which is different from the size.  mode_for_size uses precision, but
1204      we want a mode based on the size, so we must avoid calling it for FP
1205      modes.  */
1206   mode1  = (SCALAR_INT_MODE_P (tmode)
1207             ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1208             : mode);
1209
1210   if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1211         && bitpos % BITS_PER_WORD == 0)
1212        || (mode1 != BLKmode
1213            /* ??? The big endian test here is wrong.  This is correct
1214               if the value is in a register, and if mode_for_size is not
1215               the same mode as op0.  This causes us to get unnecessarily
1216               inefficient code from the Thumb port when -mbig-endian.  */
1217            && (BYTES_BIG_ENDIAN
1218                ? bitpos + bitsize == BITS_PER_WORD
1219                : bitpos == 0)))
1220       && ((!MEM_P (op0)
1221            && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
1222                                      GET_MODE_BITSIZE (GET_MODE (op0)))
1223            && GET_MODE_SIZE (mode1) != 0
1224            && byte_offset % GET_MODE_SIZE (mode1) == 0)
1225           || (MEM_P (op0)
1226               && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1227                   || (offset * BITS_PER_UNIT % bitsize == 0
1228                       && MEM_ALIGN (op0) % bitsize == 0)))))
1229     {
1230       if (mode1 != GET_MODE (op0))
1231         {
1232           if (MEM_P (op0))
1233             op0 = adjust_address (op0, mode1, offset);
1234           else
1235             {
1236               rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1237                                              byte_offset);
1238               if (sub == NULL)
1239                 goto no_subreg_mode_swap;
1240               op0 = sub;
1241             }
1242         }
1243       if (mode1 != mode)
1244         return convert_to_mode (tmode, op0, unsignedp);
1245       return op0;
1246     }
1247  no_subreg_mode_swap:
1248
1249   /* Handle fields bigger than a word.  */
1250
1251   if (bitsize > BITS_PER_WORD)
1252     {
1253       /* Here we transfer the words of the field
1254          in the order least significant first.
1255          This is because the most significant word is the one which may
1256          be less than full.  */
1257
1258       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1259       unsigned int i;
1260
1261       if (target == 0 || !REG_P (target))
1262         target = gen_reg_rtx (mode);
1263
1264       /* Indicate for flow that the entire target reg is being set.  */
1265       emit_insn (gen_rtx_CLOBBER (VOIDmode, target));
1266
1267       for (i = 0; i < nwords; i++)
1268         {
1269           /* If I is 0, use the low-order word in both field and target;
1270              if I is 1, use the next to lowest word; and so on.  */
1271           /* Word number in TARGET to use.  */
1272           unsigned int wordnum
1273             = (WORDS_BIG_ENDIAN
1274                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1275                : i);
1276           /* Offset from start of field in OP0.  */
1277           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1278                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1279                                                 * (int) BITS_PER_WORD))
1280                                      : (int) i * BITS_PER_WORD);
1281           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1282           rtx result_part
1283             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1284                                            bitsize - i * BITS_PER_WORD),
1285                                  bitnum + bit_offset, 1, target_part, mode,
1286                                  word_mode);
1287
1288           gcc_assert (target_part);
1289
1290           if (result_part != target_part)
1291             emit_move_insn (target_part, result_part);
1292         }
1293
1294       if (unsignedp)
1295         {
1296           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1297              need to be zero'd out.  */
1298           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1299             {
1300               unsigned int i, total_words;
1301
1302               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1303               for (i = nwords; i < total_words; i++)
1304                 emit_move_insn
1305                   (operand_subword (target,
1306                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1307                                     1, VOIDmode),
1308                    const0_rtx);
1309             }
1310           return target;
1311         }
1312
1313       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1314       target = expand_shift (LSHIFT_EXPR, mode, target,
1315                              build_int_cst (NULL_TREE,
1316                                             GET_MODE_BITSIZE (mode) - bitsize),
1317                              NULL_RTX, 0);
1318       return expand_shift (RSHIFT_EXPR, mode, target,
1319                            build_int_cst (NULL_TREE,
1320                                           GET_MODE_BITSIZE (mode) - bitsize),
1321                            NULL_RTX, 0);
1322     }
1323
1324   /* From here on we know the desired field is smaller than a word.  */
1325
1326   /* Check if there is a correspondingly-sized integer field, so we can
1327      safely extract it as one size of integer, if necessary; then
1328      truncate or extend to the size that is wanted; then use SUBREGs or
1329      convert_to_mode to get one of the modes we really wanted.  */
1330
1331   int_mode = int_mode_for_mode (tmode);
1332   if (int_mode == BLKmode)
1333     int_mode = int_mode_for_mode (mode);
1334   /* Should probably push op0 out to memory and then do a load.  */
1335   gcc_assert (int_mode != BLKmode);
1336
1337   /* OFFSET is the number of words or bytes (UNIT says which)
1338      from STR_RTX to the first word or byte containing part of the field.  */
1339   if (!MEM_P (op0))
1340     {
1341       if (offset != 0
1342           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1343         {
1344           if (!REG_P (op0))
1345             op0 = copy_to_reg (op0);
1346           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1347                                 op0, (offset * UNITS_PER_WORD));
1348         }
1349       offset = 0;
1350     }
1351
1352   /* Now OFFSET is nonzero only for memory operands.  */
1353
1354   if (unsignedp)
1355     {
1356       if (HAVE_extzv
1357           && bitsize > 0
1358           && GET_MODE_BITSIZE (extzv_mode) >= bitsize
1359           && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1360                 && (bitsize + bitpos > GET_MODE_BITSIZE (extzv_mode))))
1361         {
1362           unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1363           rtx bitsize_rtx, bitpos_rtx;
1364           rtx last = get_last_insn ();
1365           rtx xop0 = op0;
1366           rtx xtarget = target;
1367           rtx xspec_target = spec_target;
1368           rtx xspec_target_subreg = spec_target_subreg;
1369           rtx pat;
1370           enum machine_mode maxmode = mode_for_extraction (EP_extzv, 0);
1371
1372           if (MEM_P (xop0))
1373             {
1374               int save_volatile_ok = volatile_ok;
1375               volatile_ok = 1;
1376
1377               /* Is the memory operand acceptable?  */
1378               if (! ((*insn_data[(int) CODE_FOR_extzv].operand[1].predicate)
1379                      (xop0, GET_MODE (xop0))))
1380                 {
1381                   /* No, load into a reg and extract from there.  */
1382                   enum machine_mode bestmode;
1383
1384                   /* Get the mode to use for inserting into this field.  If
1385                      OP0 is BLKmode, get the smallest mode consistent with the
1386                      alignment. If OP0 is a non-BLKmode object that is no
1387                      wider than MAXMODE, use its mode. Otherwise, use the
1388                      smallest mode containing the field.  */
1389
1390                   if (GET_MODE (xop0) == BLKmode
1391                       || (GET_MODE_SIZE (GET_MODE (op0))
1392                           > GET_MODE_SIZE (maxmode)))
1393                     bestmode = get_best_mode (bitsize, bitnum,
1394                                               MEM_ALIGN (xop0), maxmode,
1395                                               MEM_VOLATILE_P (xop0));
1396                   else
1397                     bestmode = GET_MODE (xop0);
1398
1399                   if (bestmode == VOIDmode
1400                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1401                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1402                     goto extzv_loses;
1403
1404                   /* Compute offset as multiple of this unit,
1405                      counting in bytes.  */
1406                   unit = GET_MODE_BITSIZE (bestmode);
1407                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1408                   xbitpos = bitnum % unit;
1409                   xop0 = adjust_address (xop0, bestmode, xoffset);
1410
1411                   /* Make sure register is big enough for the whole field. */
1412                   if (xoffset * BITS_PER_UNIT + unit
1413                       < offset * BITS_PER_UNIT + bitsize)
1414                     goto extzv_loses;
1415
1416                   /* Fetch it to a register in that size.  */
1417                   xop0 = force_reg (bestmode, xop0);
1418
1419                   /* XBITPOS counts within UNIT, which is what is expected.  */
1420                 }
1421               else
1422                 /* Get ref to first byte containing part of the field.  */
1423                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1424
1425               volatile_ok = save_volatile_ok;
1426             }
1427
1428           /* If op0 is a register, we need it in MAXMODE (which is usually
1429              SImode). to make it acceptable to the format of extzv.  */
1430           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1431             goto extzv_loses;
1432           if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1433             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1434
1435           /* On big-endian machines, we count bits from the most significant.
1436              If the bit field insn does not, we must invert.  */
1437           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1438             xbitpos = unit - bitsize - xbitpos;
1439
1440           /* Now convert from counting within UNIT to counting in MAXMODE.  */
1441           if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1442             xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
1443
1444           unit = GET_MODE_BITSIZE (maxmode);
1445
1446           if (xtarget == 0)
1447             xtarget = xspec_target = gen_reg_rtx (tmode);
1448
1449           if (GET_MODE (xtarget) != maxmode)
1450             {
1451               if (REG_P (xtarget))
1452                 {
1453                   int wider = (GET_MODE_SIZE (maxmode)
1454                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1455                   xtarget = gen_lowpart (maxmode, xtarget);
1456                   if (wider)
1457                     xspec_target_subreg = xtarget;
1458                 }
1459               else
1460                 xtarget = gen_reg_rtx (maxmode);
1461             }
1462
1463           /* If this machine's extzv insists on a register target,
1464              make sure we have one.  */
1465           if (! ((*insn_data[(int) CODE_FOR_extzv].operand[0].predicate)
1466                  (xtarget, maxmode)))
1467             xtarget = gen_reg_rtx (maxmode);
1468
1469           bitsize_rtx = GEN_INT (bitsize);
1470           bitpos_rtx = GEN_INT (xbitpos);
1471
1472           pat = gen_extzv (xtarget, xop0, bitsize_rtx, bitpos_rtx);
1473           if (pat)
1474             {
1475               emit_insn (pat);
1476               target = xtarget;
1477               spec_target = xspec_target;
1478               spec_target_subreg = xspec_target_subreg;
1479             }
1480           else
1481             {
1482               delete_insns_since (last);
1483               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1484                                                 bitpos, target, 1);
1485             }
1486         }
1487       else
1488       extzv_loses:
1489         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1490                                           bitpos, target, 1);
1491     }
1492   else
1493     {
1494       if (HAVE_extv
1495           && bitsize > 0
1496           && GET_MODE_BITSIZE (extv_mode) >= bitsize
1497           && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1498                 && (bitsize + bitpos > GET_MODE_BITSIZE (extv_mode))))
1499         {
1500           int xbitpos = bitpos, xoffset = offset;
1501           rtx bitsize_rtx, bitpos_rtx;
1502           rtx last = get_last_insn ();
1503           rtx xop0 = op0, xtarget = target;
1504           rtx xspec_target = spec_target;
1505           rtx xspec_target_subreg = spec_target_subreg;
1506           rtx pat;
1507           enum machine_mode maxmode = mode_for_extraction (EP_extv, 0);
1508
1509           if (MEM_P (xop0))
1510             {
1511               /* Is the memory operand acceptable?  */
1512               if (! ((*insn_data[(int) CODE_FOR_extv].operand[1].predicate)
1513                      (xop0, GET_MODE (xop0))))
1514                 {
1515                   /* No, load into a reg and extract from there.  */
1516                   enum machine_mode bestmode;
1517
1518                   /* Get the mode to use for inserting into this field.  If
1519                      OP0 is BLKmode, get the smallest mode consistent with the
1520                      alignment. If OP0 is a non-BLKmode object that is no
1521                      wider than MAXMODE, use its mode. Otherwise, use the
1522                      smallest mode containing the field.  */
1523
1524                   if (GET_MODE (xop0) == BLKmode
1525                       || (GET_MODE_SIZE (GET_MODE (op0))
1526                           > GET_MODE_SIZE (maxmode)))
1527                     bestmode = get_best_mode (bitsize, bitnum,
1528                                               MEM_ALIGN (xop0), maxmode,
1529                                               MEM_VOLATILE_P (xop0));
1530                   else
1531                     bestmode = GET_MODE (xop0);
1532
1533                   if (bestmode == VOIDmode
1534                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1535                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1536                     goto extv_loses;
1537
1538                   /* Compute offset as multiple of this unit,
1539                      counting in bytes.  */
1540                   unit = GET_MODE_BITSIZE (bestmode);
1541                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1542                   xbitpos = bitnum % unit;
1543                   xop0 = adjust_address (xop0, bestmode, xoffset);
1544
1545                   /* Make sure register is big enough for the whole field. */
1546                   if (xoffset * BITS_PER_UNIT + unit
1547                       < offset * BITS_PER_UNIT + bitsize)
1548                     goto extv_loses;
1549
1550                   /* Fetch it to a register in that size.  */
1551                   xop0 = force_reg (bestmode, xop0);
1552
1553                   /* XBITPOS counts within UNIT, which is what is expected.  */
1554                 }
1555               else
1556                 /* Get ref to first byte containing part of the field.  */
1557                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1558             }
1559
1560           /* If op0 is a register, we need it in MAXMODE (which is usually
1561              SImode) to make it acceptable to the format of extv.  */
1562           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1563             goto extv_loses;
1564           if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1565             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1566
1567           /* On big-endian machines, we count bits from the most significant.
1568              If the bit field insn does not, we must invert.  */
1569           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1570             xbitpos = unit - bitsize - xbitpos;
1571
1572           /* XBITPOS counts within a size of UNIT.
1573              Adjust to count within a size of MAXMODE.  */
1574           if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1575             xbitpos += (GET_MODE_BITSIZE (maxmode) - unit);
1576
1577           unit = GET_MODE_BITSIZE (maxmode);
1578
1579           if (xtarget == 0)
1580             xtarget = xspec_target = gen_reg_rtx (tmode);
1581
1582           if (GET_MODE (xtarget) != maxmode)
1583             {
1584               if (REG_P (xtarget))
1585                 {
1586                   int wider = (GET_MODE_SIZE (maxmode)
1587                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1588                   xtarget = gen_lowpart (maxmode, xtarget);
1589                   if (wider)
1590                     xspec_target_subreg = xtarget;
1591                 }
1592               else
1593                 xtarget = gen_reg_rtx (maxmode);
1594             }
1595
1596           /* If this machine's extv insists on a register target,
1597              make sure we have one.  */
1598           if (! ((*insn_data[(int) CODE_FOR_extv].operand[0].predicate)
1599                  (xtarget, maxmode)))
1600             xtarget = gen_reg_rtx (maxmode);
1601
1602           bitsize_rtx = GEN_INT (bitsize);
1603           bitpos_rtx = GEN_INT (xbitpos);
1604
1605           pat = gen_extv (xtarget, xop0, bitsize_rtx, bitpos_rtx);
1606           if (pat)
1607             {
1608               emit_insn (pat);
1609               target = xtarget;
1610               spec_target = xspec_target;
1611               spec_target_subreg = xspec_target_subreg;
1612             }
1613           else
1614             {
1615               delete_insns_since (last);
1616               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1617                                                 bitpos, target, 0);
1618             }
1619         }
1620       else
1621       extv_loses:
1622         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1623                                           bitpos, target, 0);
1624     }
1625   if (target == spec_target)
1626     return target;
1627   if (target == spec_target_subreg)
1628     return spec_target;
1629   if (GET_MODE (target) != tmode && GET_MODE (target) != mode)
1630     {
1631       /* If the target mode is not a scalar integral, first convert to the
1632          integer mode of that size and then access it as a floating-point
1633          value via a SUBREG.  */
1634       if (!SCALAR_INT_MODE_P (tmode))
1635         {
1636           enum machine_mode smode
1637             = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1638           target = convert_to_mode (smode, target, unsignedp);
1639           target = force_reg (smode, target);
1640           return gen_lowpart (tmode, target);
1641         }
1642
1643       return convert_to_mode (tmode, target, unsignedp);
1644     }
1645   return target;
1646 }
1647 \f
1648 /* Extract a bit field using shifts and boolean operations
1649    Returns an rtx to represent the value.
1650    OP0 addresses a register (word) or memory (byte).
1651    BITPOS says which bit within the word or byte the bit field starts in.
1652    OFFSET says how many bytes farther the bit field starts;
1653     it is 0 if OP0 is a register.
1654    BITSIZE says how many bits long the bit field is.
1655     (If OP0 is a register, it may be narrower than a full word,
1656      but BITPOS still counts within a full word,
1657      which is significant on bigendian machines.)
1658
1659    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1660    If TARGET is nonzero, attempts to store the value there
1661    and return TARGET, but this is not guaranteed.
1662    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1663
1664 static rtx
1665 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1666                          unsigned HOST_WIDE_INT offset,
1667                          unsigned HOST_WIDE_INT bitsize,
1668                          unsigned HOST_WIDE_INT bitpos, rtx target,
1669                          int unsignedp)
1670 {
1671   unsigned int total_bits = BITS_PER_WORD;
1672   enum machine_mode mode;
1673
1674   if (GET_CODE (op0) == SUBREG || REG_P (op0))
1675     {
1676       /* Special treatment for a bit field split across two registers.  */
1677       if (bitsize + bitpos > BITS_PER_WORD)
1678         return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1679     }
1680   else
1681     {
1682       /* Get the proper mode to use for this field.  We want a mode that
1683          includes the entire field.  If such a mode would be larger than
1684          a word, we won't be doing the extraction the normal way.  */
1685
1686       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1687                             MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1688
1689       if (mode == VOIDmode)
1690         /* The only way this should occur is if the field spans word
1691            boundaries.  */
1692         return extract_split_bit_field (op0, bitsize,
1693                                         bitpos + offset * BITS_PER_UNIT,
1694                                         unsignedp);
1695
1696       total_bits = GET_MODE_BITSIZE (mode);
1697
1698       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1699          be in the range 0 to total_bits-1, and put any excess bytes in
1700          OFFSET.  */
1701       if (bitpos >= total_bits)
1702         {
1703           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1704           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1705                      * BITS_PER_UNIT);
1706         }
1707
1708       /* Get ref to an aligned byte, halfword, or word containing the field.
1709          Adjust BITPOS to be position within a word,
1710          and OFFSET to be the offset of that word.
1711          Then alter OP0 to refer to that word.  */
1712       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1713       offset -= (offset % (total_bits / BITS_PER_UNIT));
1714       op0 = adjust_address (op0, mode, offset);
1715     }
1716
1717   mode = GET_MODE (op0);
1718
1719   if (BYTES_BIG_ENDIAN)
1720     /* BITPOS is the distance between our msb and that of OP0.
1721        Convert it to the distance from the lsb.  */
1722     bitpos = total_bits - bitsize - bitpos;
1723
1724   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1725      We have reduced the big-endian case to the little-endian case.  */
1726
1727   if (unsignedp)
1728     {
1729       if (bitpos)
1730         {
1731           /* If the field does not already start at the lsb,
1732              shift it so it does.  */
1733           tree amount = build_int_cst (NULL_TREE, bitpos);
1734           /* Maybe propagate the target for the shift.  */
1735           /* But not if we will return it--could confuse integrate.c.  */
1736           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1737           if (tmode != mode) subtarget = 0;
1738           op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1739         }
1740       /* Convert the value to the desired mode.  */
1741       if (mode != tmode)
1742         op0 = convert_to_mode (tmode, op0, 1);
1743
1744       /* Unless the msb of the field used to be the msb when we shifted,
1745          mask out the upper bits.  */
1746
1747       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1748         return expand_binop (GET_MODE (op0), and_optab, op0,
1749                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1750                              target, 1, OPTAB_LIB_WIDEN);
1751       return op0;
1752     }
1753
1754   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1755      then arithmetic-shift its lsb to the lsb of the word.  */
1756   op0 = force_reg (mode, op0);
1757   if (mode != tmode)
1758     target = 0;
1759
1760   /* Find the narrowest integer mode that contains the field.  */
1761
1762   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1763        mode = GET_MODE_WIDER_MODE (mode))
1764     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1765       {
1766         op0 = convert_to_mode (mode, op0, 0);
1767         break;
1768       }
1769
1770   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1771     {
1772       tree amount
1773         = build_int_cst (NULL_TREE,
1774                          GET_MODE_BITSIZE (mode) - (bitsize + bitpos));
1775       /* Maybe propagate the target for the shift.  */
1776       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1777       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1778     }
1779
1780   return expand_shift (RSHIFT_EXPR, mode, op0,
1781                        build_int_cst (NULL_TREE,
1782                                       GET_MODE_BITSIZE (mode) - bitsize),
1783                        target, 0);
1784 }
1785 \f
1786 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1787    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1788    complement of that if COMPLEMENT.  The mask is truncated if
1789    necessary to the width of mode MODE.  The mask is zero-extended if
1790    BITSIZE+BITPOS is too small for MODE.  */
1791
1792 static rtx
1793 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1794 {
1795   HOST_WIDE_INT masklow, maskhigh;
1796
1797   if (bitsize == 0)
1798     masklow = 0;
1799   else if (bitpos < HOST_BITS_PER_WIDE_INT)
1800     masklow = (HOST_WIDE_INT) -1 << bitpos;
1801   else
1802     masklow = 0;
1803
1804   if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1805     masklow &= ((unsigned HOST_WIDE_INT) -1
1806                 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1807
1808   if (bitpos <= HOST_BITS_PER_WIDE_INT)
1809     maskhigh = -1;
1810   else
1811     maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1812
1813   if (bitsize == 0)
1814     maskhigh = 0;
1815   else if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1816     maskhigh &= ((unsigned HOST_WIDE_INT) -1
1817                  >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1818   else
1819     maskhigh = 0;
1820
1821   if (complement)
1822     {
1823       maskhigh = ~maskhigh;
1824       masklow = ~masklow;
1825     }
1826
1827   return immed_double_const (masklow, maskhigh, mode);
1828 }
1829
1830 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1831    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1832
1833 static rtx
1834 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1835 {
1836   unsigned HOST_WIDE_INT v = INTVAL (value);
1837   HOST_WIDE_INT low, high;
1838
1839   if (bitsize < HOST_BITS_PER_WIDE_INT)
1840     v &= ~((HOST_WIDE_INT) -1 << bitsize);
1841
1842   if (bitpos < HOST_BITS_PER_WIDE_INT)
1843     {
1844       low = v << bitpos;
1845       high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1846     }
1847   else
1848     {
1849       low = 0;
1850       high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1851     }
1852
1853   return immed_double_const (low, high, mode);
1854 }
1855 \f
1856 /* Extract a bit field from a memory by forcing the alignment of the
1857    memory.  This efficient only if the field spans at least 4 boundaries.
1858
1859    OP0 is the MEM.
1860    BITSIZE is the field width; BITPOS is the position of the first bit.
1861    UNSIGNEDP is true if the result should be zero-extended.  */
1862
1863 static rtx
1864 extract_force_align_mem_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1865                                    unsigned HOST_WIDE_INT bitpos,
1866                                    int unsignedp)
1867 {
1868   enum machine_mode mode, dmode;
1869   unsigned int m_bitsize, m_size;
1870   unsigned int sign_shift_up, sign_shift_dn;
1871   rtx base, a1, a2, v1, v2, comb, shift, result, start;
1872
1873   /* Choose a mode that will fit BITSIZE.  */
1874   mode = smallest_mode_for_size (bitsize, MODE_INT);
1875   m_size = GET_MODE_SIZE (mode);
1876   m_bitsize = GET_MODE_BITSIZE (mode);
1877
1878   /* Choose a mode twice as wide.  Fail if no such mode exists.  */
1879   dmode = mode_for_size (m_bitsize * 2, MODE_INT, false);
1880   if (dmode == BLKmode)
1881     return NULL;
1882
1883   do_pending_stack_adjust ();
1884   start = get_last_insn ();
1885
1886   /* At the end, we'll need an additional shift to deal with sign/zero
1887      extension.  By default this will be a left+right shift of the
1888      appropriate size.  But we may be able to eliminate one of them.  */
1889   sign_shift_up = sign_shift_dn = m_bitsize - bitsize;
1890
1891   if (STRICT_ALIGNMENT)
1892     {
1893       base = plus_constant (XEXP (op0, 0), bitpos / BITS_PER_UNIT);
1894       bitpos %= BITS_PER_UNIT;
1895
1896       /* We load two values to be concatenate.  There's an edge condition
1897          that bears notice -- an aligned value at the end of a page can
1898          only load one value lest we segfault.  So the two values we load
1899          are at "base & -size" and "(base + size - 1) & -size".  If base
1900          is unaligned, the addresses will be aligned and sequential; if
1901          base is aligned, the addresses will both be equal to base.  */
1902
1903       a1 = expand_simple_binop (Pmode, AND, force_operand (base, NULL),
1904                                 GEN_INT (-(HOST_WIDE_INT)m_size),
1905                                 NULL, true, OPTAB_LIB_WIDEN);
1906       mark_reg_pointer (a1, m_bitsize);
1907       v1 = gen_rtx_MEM (mode, a1);
1908       set_mem_align (v1, m_bitsize);
1909       v1 = force_reg (mode, validize_mem (v1));
1910
1911       a2 = plus_constant (base, GET_MODE_SIZE (mode) - 1);
1912       a2 = expand_simple_binop (Pmode, AND, force_operand (a2, NULL),
1913                                 GEN_INT (-(HOST_WIDE_INT)m_size),
1914                                 NULL, true, OPTAB_LIB_WIDEN);
1915       v2 = gen_rtx_MEM (mode, a2);
1916       set_mem_align (v2, m_bitsize);
1917       v2 = force_reg (mode, validize_mem (v2));
1918
1919       /* Combine these two values into a double-word value.  */
1920       if (m_bitsize == BITS_PER_WORD)
1921         {
1922           comb = gen_reg_rtx (dmode);
1923           emit_insn (gen_rtx_CLOBBER (VOIDmode, comb));
1924           emit_move_insn (gen_rtx_SUBREG (mode, comb, 0), v1);
1925           emit_move_insn (gen_rtx_SUBREG (mode, comb, m_size), v2);
1926         }
1927       else
1928         {
1929           if (BYTES_BIG_ENDIAN)
1930             comb = v1, v1 = v2, v2 = comb;
1931           v1 = convert_modes (dmode, mode, v1, true);
1932           if (v1 == NULL)
1933             goto fail;
1934           v2 = convert_modes (dmode, mode, v2, true);
1935           v2 = expand_simple_binop (dmode, ASHIFT, v2, GEN_INT (m_bitsize),
1936                                     NULL, true, OPTAB_LIB_WIDEN);
1937           if (v2 == NULL)
1938             goto fail;
1939           comb = expand_simple_binop (dmode, IOR, v1, v2, NULL,
1940                                       true, OPTAB_LIB_WIDEN);
1941           if (comb == NULL)
1942             goto fail;
1943         }
1944
1945       shift = expand_simple_binop (Pmode, AND, base, GEN_INT (m_size - 1),
1946                                    NULL, true, OPTAB_LIB_WIDEN);
1947       shift = expand_mult (Pmode, shift, GEN_INT (BITS_PER_UNIT), NULL, 1);
1948
1949       if (bitpos != 0)
1950         {
1951           if (sign_shift_up <= bitpos)
1952             bitpos -= sign_shift_up, sign_shift_up = 0;
1953           shift = expand_simple_binop (Pmode, PLUS, shift, GEN_INT (bitpos),
1954                                        NULL, true, OPTAB_LIB_WIDEN);
1955         }
1956     }
1957   else
1958     {
1959       unsigned HOST_WIDE_INT offset = bitpos / BITS_PER_UNIT;
1960       bitpos %= BITS_PER_UNIT;
1961
1962       /* When strict alignment is not required, we can just load directly
1963          from memory without masking.  If the remaining BITPOS offset is
1964          small enough, we may be able to do all operations in MODE as
1965          opposed to DMODE.  */
1966       if (bitpos + bitsize <= m_bitsize)
1967         dmode = mode;
1968       comb = adjust_address (op0, dmode, offset);
1969
1970       if (sign_shift_up <= bitpos)
1971         bitpos -= sign_shift_up, sign_shift_up = 0;
1972       shift = GEN_INT (bitpos);
1973     }
1974
1975   /* Shift down the double-word such that the requested value is at bit 0.  */
1976   if (shift != const0_rtx)
1977     comb = expand_simple_binop (dmode, unsignedp ? LSHIFTRT : ASHIFTRT,
1978                                 comb, shift, NULL, unsignedp, OPTAB_LIB_WIDEN);
1979   if (comb == NULL)
1980     goto fail;
1981
1982   /* If the field exactly matches MODE, then all we need to do is return the
1983      lowpart.  Otherwise, shift to get the sign bits set properly.  */
1984   result = force_reg (mode, gen_lowpart (mode, comb));
1985
1986   if (sign_shift_up)
1987     result = expand_simple_binop (mode, ASHIFT, result,
1988                                   GEN_INT (sign_shift_up),
1989                                   NULL_RTX, 0, OPTAB_LIB_WIDEN);
1990   if (sign_shift_dn)
1991     result = expand_simple_binop (mode, unsignedp ? LSHIFTRT : ASHIFTRT,
1992                                   result, GEN_INT (sign_shift_dn),
1993                                   NULL_RTX, 0, OPTAB_LIB_WIDEN);
1994
1995   return result;
1996
1997  fail:
1998   delete_insns_since (start);
1999   return NULL;
2000 }
2001
2002 /* Extract a bit field that is split across two words
2003    and return an RTX for the result.
2004
2005    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2006    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2007    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
2008
2009 static rtx
2010 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
2011                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
2012 {
2013   unsigned int unit;
2014   unsigned int bitsdone = 0;
2015   rtx result = NULL_RTX;
2016   int first = 1;
2017
2018   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2019      much at a time.  */
2020   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2021     unit = BITS_PER_WORD;
2022   else
2023     {
2024       unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2025       if (0 && bitsize / unit > 2)
2026         {
2027           rtx tmp = extract_force_align_mem_bit_field (op0, bitsize, bitpos,
2028                                                        unsignedp);
2029           if (tmp)
2030             return tmp;
2031         }
2032     }
2033
2034   while (bitsdone < bitsize)
2035     {
2036       unsigned HOST_WIDE_INT thissize;
2037       rtx part, word;
2038       unsigned HOST_WIDE_INT thispos;
2039       unsigned HOST_WIDE_INT offset;
2040
2041       offset = (bitpos + bitsdone) / unit;
2042       thispos = (bitpos + bitsdone) % unit;
2043
2044       /* THISSIZE must not overrun a word boundary.  Otherwise,
2045          extract_fixed_bit_field will call us again, and we will mutually
2046          recurse forever.  */
2047       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2048       thissize = MIN (thissize, unit - thispos);
2049
2050       /* If OP0 is a register, then handle OFFSET here.
2051
2052          When handling multiword bitfields, extract_bit_field may pass
2053          down a word_mode SUBREG of a larger REG for a bitfield that actually
2054          crosses a word boundary.  Thus, for a SUBREG, we must find
2055          the current word starting from the base register.  */
2056       if (GET_CODE (op0) == SUBREG)
2057         {
2058           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
2059           word = operand_subword_force (SUBREG_REG (op0), word_offset,
2060                                         GET_MODE (SUBREG_REG (op0)));
2061           offset = 0;
2062         }
2063       else if (REG_P (op0))
2064         {
2065           word = operand_subword_force (op0, offset, GET_MODE (op0));
2066           offset = 0;
2067         }
2068       else
2069         word = op0;
2070
2071       /* Extract the parts in bit-counting order,
2072          whose meaning is determined by BYTES_PER_UNIT.
2073          OFFSET is in UNITs, and UNIT is in bits.
2074          extract_fixed_bit_field wants offset in bytes.  */
2075       part = extract_fixed_bit_field (word_mode, word,
2076                                       offset * unit / BITS_PER_UNIT,
2077                                       thissize, thispos, 0, 1);
2078       bitsdone += thissize;
2079
2080       /* Shift this part into place for the result.  */
2081       if (BYTES_BIG_ENDIAN)
2082         {
2083           if (bitsize != bitsdone)
2084             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2085                                  build_int_cst (NULL_TREE, bitsize - bitsdone),
2086                                  0, 1);
2087         }
2088       else
2089         {
2090           if (bitsdone != thissize)
2091             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2092                                  build_int_cst (NULL_TREE,
2093                                                 bitsdone - thissize), 0, 1);
2094         }
2095
2096       if (first)
2097         result = part;
2098       else
2099         /* Combine the parts with bitwise or.  This works
2100            because we extracted each part as an unsigned bit field.  */
2101         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2102                                OPTAB_LIB_WIDEN);
2103
2104       first = 0;
2105     }
2106
2107   /* Unsigned bit field: we are done.  */
2108   if (unsignedp)
2109     return result;
2110   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2111   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2112                          build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
2113                          NULL_RTX, 0);
2114   return expand_shift (RSHIFT_EXPR, word_mode, result,
2115                        build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
2116                        NULL_RTX, 0);
2117 }
2118 \f
2119 /* Add INC into TARGET.  */
2120
2121 void
2122 expand_inc (rtx target, rtx inc)
2123 {
2124   rtx value = expand_binop (GET_MODE (target), add_optab,
2125                             target, inc,
2126                             target, 0, OPTAB_LIB_WIDEN);
2127   if (value != target)
2128     emit_move_insn (target, value);
2129 }
2130
2131 /* Subtract DEC from TARGET.  */
2132
2133 void
2134 expand_dec (rtx target, rtx dec)
2135 {
2136   rtx value = expand_binop (GET_MODE (target), sub_optab,
2137                             target, dec,
2138                             target, 0, OPTAB_LIB_WIDEN);
2139   if (value != target)
2140     emit_move_insn (target, value);
2141 }
2142 \f
2143 /* Output a shift instruction for expression code CODE,
2144    with SHIFTED being the rtx for the value to shift,
2145    and AMOUNT the tree for the amount to shift by.
2146    Store the result in the rtx TARGET, if that is convenient.
2147    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2148    Return the rtx for where the value is.  */
2149
2150 rtx
2151 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2152               tree amount, rtx target, int unsignedp)
2153 {
2154   rtx op1, temp = 0;
2155   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2156   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2157   int try;
2158
2159   /* Previously detected shift-counts computed by NEGATE_EXPR
2160      and shifted in the other direction; but that does not work
2161      on all machines.  */
2162
2163   op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0);
2164
2165   if (SHIFT_COUNT_TRUNCATED)
2166     {
2167       if (GET_CODE (op1) == CONST_INT
2168           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2169               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2170         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2171                        % GET_MODE_BITSIZE (mode));
2172       else if (GET_CODE (op1) == SUBREG
2173                && subreg_lowpart_p (op1))
2174         op1 = SUBREG_REG (op1);
2175     }
2176
2177   if (op1 == const0_rtx)
2178     return shifted;
2179
2180   /* Check whether its cheaper to implement a left shift by a constant
2181      bit count by a sequence of additions.  */
2182   if (code == LSHIFT_EXPR
2183       && GET_CODE (op1) == CONST_INT
2184       && INTVAL (op1) > 0
2185       && INTVAL (op1) < GET_MODE_BITSIZE (mode)
2186       && shift_cost[mode][INTVAL (op1)] > INTVAL (op1) * add_cost[mode])
2187     {
2188       int i;
2189       for (i = 0; i < INTVAL (op1); i++)
2190         {
2191           temp = force_reg (mode, shifted);
2192           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2193                                   unsignedp, OPTAB_LIB_WIDEN);
2194         }
2195       return shifted;
2196     }
2197
2198   for (try = 0; temp == 0 && try < 3; try++)
2199     {
2200       enum optab_methods methods;
2201
2202       if (try == 0)
2203         methods = OPTAB_DIRECT;
2204       else if (try == 1)
2205         methods = OPTAB_WIDEN;
2206       else
2207         methods = OPTAB_LIB_WIDEN;
2208
2209       if (rotate)
2210         {
2211           /* Widening does not work for rotation.  */
2212           if (methods == OPTAB_WIDEN)
2213             continue;
2214           else if (methods == OPTAB_LIB_WIDEN)
2215             {
2216               /* If we have been unable to open-code this by a rotation,
2217                  do it as the IOR of two shifts.  I.e., to rotate A
2218                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2219                  where C is the bitsize of A.
2220
2221                  It is theoretically possible that the target machine might
2222                  not be able to perform either shift and hence we would
2223                  be making two libcalls rather than just the one for the
2224                  shift (similarly if IOR could not be done).  We will allow
2225                  this extremely unlikely lossage to avoid complicating the
2226                  code below.  */
2227
2228               rtx subtarget = target == shifted ? 0 : target;
2229               rtx temp1;
2230               tree type = TREE_TYPE (amount);
2231               tree new_amount = make_tree (type, op1);
2232               tree other_amount
2233                 = fold_build2 (MINUS_EXPR, type,
2234                                build_int_cst (type, GET_MODE_BITSIZE (mode)),
2235                                amount);
2236
2237               shifted = force_reg (mode, shifted);
2238
2239               temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2240                                    mode, shifted, new_amount, 0, 1);
2241               temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2242                                     mode, shifted, other_amount, subtarget, 1);
2243               return expand_binop (mode, ior_optab, temp, temp1, target,
2244                                    unsignedp, methods);
2245             }
2246
2247           temp = expand_binop (mode,
2248                                left ? rotl_optab : rotr_optab,
2249                                shifted, op1, target, unsignedp, methods);
2250         }
2251       else if (unsignedp)
2252         temp = expand_binop (mode,
2253                              left ? ashl_optab : lshr_optab,
2254                              shifted, op1, target, unsignedp, methods);
2255
2256       /* Do arithmetic shifts.
2257          Also, if we are going to widen the operand, we can just as well
2258          use an arithmetic right-shift instead of a logical one.  */
2259       if (temp == 0 && ! rotate
2260           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2261         {
2262           enum optab_methods methods1 = methods;
2263
2264           /* If trying to widen a log shift to an arithmetic shift,
2265              don't accept an arithmetic shift of the same size.  */
2266           if (unsignedp)
2267             methods1 = OPTAB_MUST_WIDEN;
2268
2269           /* Arithmetic shift */
2270
2271           temp = expand_binop (mode,
2272                                left ? ashl_optab : ashr_optab,
2273                                shifted, op1, target, unsignedp, methods1);
2274         }
2275
2276       /* We used to try extzv here for logical right shifts, but that was
2277          only useful for one machine, the VAX, and caused poor code
2278          generation there for lshrdi3, so the code was deleted and a
2279          define_expand for lshrsi3 was added to vax.md.  */
2280     }
2281
2282   gcc_assert (temp);
2283   return temp;
2284 }
2285 \f
2286 enum alg_code {
2287   alg_unknown,
2288   alg_zero,
2289   alg_m, alg_shift,
2290   alg_add_t_m2,
2291   alg_sub_t_m2,
2292   alg_add_factor,
2293   alg_sub_factor,
2294   alg_add_t2_m,
2295   alg_sub_t2_m,
2296   alg_impossible
2297 };
2298
2299 /* This structure holds the "cost" of a multiply sequence.  The
2300    "cost" field holds the total rtx_cost of every operator in the
2301    synthetic multiplication sequence, hence cost(a op b) is defined
2302    as rtx_cost(op) + cost(a) + cost(b), where cost(leaf) is zero.
2303    The "latency" field holds the minimum possible latency of the
2304    synthetic multiply, on a hypothetical infinitely parallel CPU.
2305    This is the critical path, or the maximum height, of the expression
2306    tree which is the sum of rtx_costs on the most expensive path from
2307    any leaf to the root.  Hence latency(a op b) is defined as zero for
2308    leaves and rtx_cost(op) + max(latency(a), latency(b)) otherwise.  */
2309
2310 struct mult_cost {
2311   short cost;     /* Total rtx_cost of the multiplication sequence.  */
2312   short latency;  /* The latency of the multiplication sequence.  */
2313 };
2314
2315 /* This macro is used to compare a pointer to a mult_cost against an
2316    single integer "rtx_cost" value.  This is equivalent to the macro
2317    CHEAPER_MULT_COST(X,Z) where Z = {Y,Y}.  */
2318 #define MULT_COST_LESS(X,Y) ((X)->cost < (Y)    \
2319                              || ((X)->cost == (Y) && (X)->latency < (Y)))
2320
2321 /* This macro is used to compare two pointers to mult_costs against
2322    each other.  The macro returns true if X is cheaper than Y.
2323    Currently, the cheaper of two mult_costs is the one with the
2324    lower "cost".  If "cost"s are tied, the lower latency is cheaper.  */
2325 #define CHEAPER_MULT_COST(X,Y)  ((X)->cost < (Y)->cost          \
2326                                  || ((X)->cost == (Y)->cost     \
2327                                      && (X)->latency < (Y)->latency))
2328
2329 /* This structure records a sequence of operations.
2330    `ops' is the number of operations recorded.
2331    `cost' is their total cost.
2332    The operations are stored in `op' and the corresponding
2333    logarithms of the integer coefficients in `log'.
2334
2335    These are the operations:
2336    alg_zero             total := 0;
2337    alg_m                total := multiplicand;
2338    alg_shift            total := total * coeff
2339    alg_add_t_m2         total := total + multiplicand * coeff;
2340    alg_sub_t_m2         total := total - multiplicand * coeff;
2341    alg_add_factor       total := total * coeff + total;
2342    alg_sub_factor       total := total * coeff - total;
2343    alg_add_t2_m         total := total * coeff + multiplicand;
2344    alg_sub_t2_m         total := total * coeff - multiplicand;
2345
2346    The first operand must be either alg_zero or alg_m.  */
2347
2348 struct algorithm
2349 {
2350   struct mult_cost cost;
2351   short ops;
2352   /* The size of the OP and LOG fields are not directly related to the
2353      word size, but the worst-case algorithms will be if we have few
2354      consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2355      In that case we will generate shift-by-2, add, shift-by-2, add,...,
2356      in total wordsize operations.  */
2357   enum alg_code op[MAX_BITS_PER_WORD];
2358   char log[MAX_BITS_PER_WORD];
2359 };
2360
2361 /* The entry for our multiplication cache/hash table.  */
2362 struct alg_hash_entry {
2363   /* The number we are multiplying by.  */
2364   unsigned int t;
2365
2366   /* The mode in which we are multiplying something by T.  */
2367   enum machine_mode mode;
2368
2369   /* The best multiplication algorithm for t.  */
2370   enum alg_code alg;
2371
2372   /* The cost of multiplication if ALG_CODE is not alg_impossible.
2373      Otherwise, the cost within which multiplication by T is
2374      impossible.  */
2375   struct mult_cost cost;
2376 };
2377
2378 /* The number of cache/hash entries.  */
2379 #define NUM_ALG_HASH_ENTRIES 307
2380
2381 /* Each entry of ALG_HASH caches alg_code for some integer.  This is
2382    actually a hash table.  If we have a collision, that the older
2383    entry is kicked out.  */
2384 static struct alg_hash_entry alg_hash[NUM_ALG_HASH_ENTRIES];
2385
2386 /* Indicates the type of fixup needed after a constant multiplication.
2387    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2388    the result should be negated, and ADD_VARIANT means that the
2389    multiplicand should be added to the result.  */
2390 enum mult_variant {basic_variant, negate_variant, add_variant};
2391
2392 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2393                         const struct mult_cost *, enum machine_mode mode);
2394 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2395                                  struct algorithm *, enum mult_variant *, int);
2396 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2397                               const struct algorithm *, enum mult_variant);
2398 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2399                                                  int, rtx *, int *, int *);
2400 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2401 static rtx extract_high_half (enum machine_mode, rtx);
2402 static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2403 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2404                                        int, int);
2405 /* Compute and return the best algorithm for multiplying by T.
2406    The algorithm must cost less than cost_limit
2407    If retval.cost >= COST_LIMIT, no algorithm was found and all
2408    other field of the returned struct are undefined.
2409    MODE is the machine mode of the multiplication.  */
2410
2411 static void
2412 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2413             const struct mult_cost *cost_limit, enum machine_mode mode)
2414 {
2415   int m;
2416   struct algorithm *alg_in, *best_alg;
2417   struct mult_cost best_cost;
2418   struct mult_cost new_limit;
2419   int op_cost, op_latency;
2420   unsigned HOST_WIDE_INT q;
2421   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2422   int hash_index;
2423   bool cache_hit = false;
2424   enum alg_code cache_alg = alg_zero;
2425
2426   /* Indicate that no algorithm is yet found.  If no algorithm
2427      is found, this value will be returned and indicate failure.  */
2428   alg_out->cost.cost = cost_limit->cost + 1;
2429   alg_out->cost.latency = cost_limit->latency + 1;
2430
2431   if (cost_limit->cost < 0
2432       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2433     return;
2434
2435   /* Restrict the bits of "t" to the multiplication's mode.  */
2436   t &= GET_MODE_MASK (mode);
2437
2438   /* t == 1 can be done in zero cost.  */
2439   if (t == 1)
2440     {
2441       alg_out->ops = 1;
2442       alg_out->cost.cost = 0;
2443       alg_out->cost.latency = 0;
2444       alg_out->op[0] = alg_m;
2445       return;
2446     }
2447
2448   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2449      fail now.  */
2450   if (t == 0)
2451     {
2452       if (MULT_COST_LESS (cost_limit, zero_cost))
2453         return;
2454       else
2455         {
2456           alg_out->ops = 1;
2457           alg_out->cost.cost = zero_cost;
2458           alg_out->cost.latency = zero_cost;
2459           alg_out->op[0] = alg_zero;
2460           return;
2461         }
2462     }
2463
2464   /* We'll be needing a couple extra algorithm structures now.  */
2465
2466   alg_in = alloca (sizeof (struct algorithm));
2467   best_alg = alloca (sizeof (struct algorithm));
2468   best_cost = *cost_limit;
2469
2470   /* Compute the hash index.  */
2471   hash_index = (t ^ (unsigned int) mode) % NUM_ALG_HASH_ENTRIES;
2472
2473   /* See if we already know what to do for T.  */
2474   if (alg_hash[hash_index].t == t
2475       && alg_hash[hash_index].mode == mode
2476       && alg_hash[hash_index].alg != alg_unknown)
2477     {
2478       cache_alg = alg_hash[hash_index].alg;
2479
2480       if (cache_alg == alg_impossible)
2481         {
2482           /* The cache tells us that it's impossible to synthesize
2483              multiplication by T within alg_hash[hash_index].cost.  */
2484           if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit))
2485             /* COST_LIMIT is at least as restrictive as the one
2486                recorded in the hash table, in which case we have no
2487                hope of synthesizing a multiplication.  Just
2488                return.  */
2489             return;
2490
2491           /* If we get here, COST_LIMIT is less restrictive than the
2492              one recorded in the hash table, so we may be able to
2493              synthesize a multiplication.  Proceed as if we didn't
2494              have the cache entry.  */
2495         }
2496       else
2497         {
2498           if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost))
2499             /* The cached algorithm shows that this multiplication
2500                requires more cost than COST_LIMIT.  Just return.  This
2501                way, we don't clobber this cache entry with
2502                alg_impossible but retain useful information.  */
2503             return;
2504
2505           cache_hit = true;
2506
2507           switch (cache_alg)
2508             {
2509             case alg_shift:
2510               goto do_alg_shift;
2511
2512             case alg_add_t_m2:
2513             case alg_sub_t_m2:
2514               goto do_alg_addsub_t_m2;
2515
2516             case alg_add_factor:
2517             case alg_sub_factor:
2518               goto do_alg_addsub_factor;
2519
2520             case alg_add_t2_m:
2521               goto do_alg_add_t2_m;
2522
2523             case alg_sub_t2_m:
2524               goto do_alg_sub_t2_m;
2525
2526             default:
2527               gcc_unreachable ();
2528             }
2529         }
2530     }
2531
2532   /* If we have a group of zero bits at the low-order part of T, try
2533      multiplying by the remaining bits and then doing a shift.  */
2534
2535   if ((t & 1) == 0)
2536     {
2537     do_alg_shift:
2538       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2539       if (m < maxm)
2540         {
2541           q = t >> m;
2542           /* The function expand_shift will choose between a shift and
2543              a sequence of additions, so the observed cost is given as
2544              MIN (m * add_cost[mode], shift_cost[mode][m]).  */
2545           op_cost = m * add_cost[mode];
2546           if (shift_cost[mode][m] < op_cost)
2547             op_cost = shift_cost[mode][m];
2548           new_limit.cost = best_cost.cost - op_cost;
2549           new_limit.latency = best_cost.latency - op_cost;
2550           synth_mult (alg_in, q, &new_limit, mode);
2551
2552           alg_in->cost.cost += op_cost;
2553           alg_in->cost.latency += op_cost;
2554           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2555             {
2556               struct algorithm *x;
2557               best_cost = alg_in->cost;
2558               x = alg_in, alg_in = best_alg, best_alg = x;
2559               best_alg->log[best_alg->ops] = m;
2560               best_alg->op[best_alg->ops] = alg_shift;
2561             }
2562         }
2563       if (cache_hit)
2564         goto done;
2565     }
2566
2567   /* If we have an odd number, add or subtract one.  */
2568   if ((t & 1) != 0)
2569     {
2570       unsigned HOST_WIDE_INT w;
2571
2572     do_alg_addsub_t_m2:
2573       for (w = 1; (w & t) != 0; w <<= 1)
2574         ;
2575       /* If T was -1, then W will be zero after the loop.  This is another
2576          case where T ends with ...111.  Handling this with (T + 1) and
2577          subtract 1 produces slightly better code and results in algorithm
2578          selection much faster than treating it like the ...0111 case
2579          below.  */
2580       if (w == 0
2581           || (w > 2
2582               /* Reject the case where t is 3.
2583                  Thus we prefer addition in that case.  */
2584               && t != 3))
2585         {
2586           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2587
2588           op_cost = add_cost[mode];
2589           new_limit.cost = best_cost.cost - op_cost;
2590           new_limit.latency = best_cost.latency - op_cost;
2591           synth_mult (alg_in, t + 1, &new_limit, mode);
2592
2593           alg_in->cost.cost += op_cost;
2594           alg_in->cost.latency += op_cost;
2595           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2596             {
2597               struct algorithm *x;
2598               best_cost = alg_in->cost;
2599               x = alg_in, alg_in = best_alg, best_alg = x;
2600               best_alg->log[best_alg->ops] = 0;
2601               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2602             }
2603         }
2604       else
2605         {
2606           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2607
2608           op_cost = add_cost[mode];
2609           new_limit.cost = best_cost.cost - op_cost;
2610           new_limit.latency = best_cost.latency - op_cost;
2611           synth_mult (alg_in, t - 1, &new_limit, mode);
2612
2613           alg_in->cost.cost += op_cost;
2614           alg_in->cost.latency += op_cost;
2615           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2616             {
2617               struct algorithm *x;
2618               best_cost = alg_in->cost;
2619               x = alg_in, alg_in = best_alg, best_alg = x;
2620               best_alg->log[best_alg->ops] = 0;
2621               best_alg->op[best_alg->ops] = alg_add_t_m2;
2622             }
2623         }
2624       if (cache_hit)
2625         goto done;
2626     }
2627
2628   /* Look for factors of t of the form
2629      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2630      If we find such a factor, we can multiply by t using an algorithm that
2631      multiplies by q, shift the result by m and add/subtract it to itself.
2632
2633      We search for large factors first and loop down, even if large factors
2634      are less probable than small; if we find a large factor we will find a
2635      good sequence quickly, and therefore be able to prune (by decreasing
2636      COST_LIMIT) the search.  */
2637
2638  do_alg_addsub_factor:
2639   for (m = floor_log2 (t - 1); m >= 2; m--)
2640     {
2641       unsigned HOST_WIDE_INT d;
2642
2643       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2644       if (t % d == 0 && t > d && m < maxm
2645           && (!cache_hit || cache_alg == alg_add_factor))
2646         {
2647           /* If the target has a cheap shift-and-add instruction use
2648              that in preference to a shift insn followed by an add insn.
2649              Assume that the shift-and-add is "atomic" with a latency
2650              equal to its cost, otherwise assume that on superscalar
2651              hardware the shift may be executed concurrently with the
2652              earlier steps in the algorithm.  */
2653           op_cost = add_cost[mode] + shift_cost[mode][m];
2654           if (shiftadd_cost[mode][m] < op_cost)
2655             {
2656               op_cost = shiftadd_cost[mode][m];
2657               op_latency = op_cost;
2658             }
2659           else
2660             op_latency = add_cost[mode];
2661
2662           new_limit.cost = best_cost.cost - op_cost;
2663           new_limit.latency = best_cost.latency - op_latency;
2664           synth_mult (alg_in, t / d, &new_limit, mode);
2665
2666           alg_in->cost.cost += op_cost;
2667           alg_in->cost.latency += op_latency;
2668           if (alg_in->cost.latency < op_cost)
2669             alg_in->cost.latency = op_cost;
2670           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2671             {
2672               struct algorithm *x;
2673               best_cost = alg_in->cost;
2674               x = alg_in, alg_in = best_alg, best_alg = x;
2675               best_alg->log[best_alg->ops] = m;
2676               best_alg->op[best_alg->ops] = alg_add_factor;
2677             }
2678           /* Other factors will have been taken care of in the recursion.  */
2679           break;
2680         }
2681
2682       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2683       if (t % d == 0 && t > d && m < maxm
2684           && (!cache_hit || cache_alg == alg_sub_factor))
2685         {
2686           /* If the target has a cheap shift-and-subtract insn use
2687              that in preference to a shift insn followed by a sub insn.
2688              Assume that the shift-and-sub is "atomic" with a latency
2689              equal to it's cost, otherwise assume that on superscalar
2690              hardware the shift may be executed concurrently with the
2691              earlier steps in the algorithm.  */
2692           op_cost = add_cost[mode] + shift_cost[mode][m];
2693           if (shiftsub_cost[mode][m] < op_cost)
2694             {
2695               op_cost = shiftsub_cost[mode][m];
2696               op_latency = op_cost;
2697             }
2698           else
2699             op_latency = add_cost[mode];
2700
2701           new_limit.cost = best_cost.cost - op_cost;
2702           new_limit.latency = best_cost.latency - op_latency;
2703           synth_mult (alg_in, t / d, &new_limit, mode);
2704
2705           alg_in->cost.cost += op_cost;
2706           alg_in->cost.latency += op_latency;
2707           if (alg_in->cost.latency < op_cost)
2708             alg_in->cost.latency = op_cost;
2709           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2710             {
2711               struct algorithm *x;
2712               best_cost = alg_in->cost;
2713               x = alg_in, alg_in = best_alg, best_alg = x;
2714               best_alg->log[best_alg->ops] = m;
2715               best_alg->op[best_alg->ops] = alg_sub_factor;
2716             }
2717           break;
2718         }
2719     }
2720   if (cache_hit)
2721     goto done;
2722
2723   /* Try shift-and-add (load effective address) instructions,
2724      i.e. do a*3, a*5, a*9.  */
2725   if ((t & 1) != 0)
2726     {
2727     do_alg_add_t2_m:
2728       q = t - 1;
2729       q = q & -q;
2730       m = exact_log2 (q);
2731       if (m >= 0 && m < maxm)
2732         {
2733           op_cost = shiftadd_cost[mode][m];
2734           new_limit.cost = best_cost.cost - op_cost;
2735           new_limit.latency = best_cost.latency - op_cost;
2736           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2737
2738           alg_in->cost.cost += op_cost;
2739           alg_in->cost.latency += op_cost;
2740           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2741             {
2742               struct algorithm *x;
2743               best_cost = alg_in->cost;
2744               x = alg_in, alg_in = best_alg, best_alg = x;
2745               best_alg->log[best_alg->ops] = m;
2746               best_alg->op[best_alg->ops] = alg_add_t2_m;
2747             }
2748         }
2749       if (cache_hit)
2750         goto done;
2751
2752     do_alg_sub_t2_m:
2753       q = t + 1;
2754       q = q & -q;
2755       m = exact_log2 (q);
2756       if (m >= 0 && m < maxm)
2757         {
2758           op_cost = shiftsub_cost[mode][m];
2759           new_limit.cost = best_cost.cost - op_cost;
2760           new_limit.latency = best_cost.latency - op_cost;
2761           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2762
2763           alg_in->cost.cost += op_cost;
2764           alg_in->cost.latency += op_cost;
2765           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2766             {
2767               struct algorithm *x;
2768               best_cost = alg_in->cost;
2769               x = alg_in, alg_in = best_alg, best_alg = x;
2770               best_alg->log[best_alg->ops] = m;
2771               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2772             }
2773         }
2774       if (cache_hit)
2775         goto done;
2776     }
2777
2778  done:
2779   /* If best_cost has not decreased, we have not found any algorithm.  */
2780   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2781     {
2782       /* We failed to find an algorithm.  Record alg_impossible for
2783          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2784          we are asked to find an algorithm for T within the same or
2785          lower COST_LIMIT, we can immediately return to the
2786          caller.  */
2787       alg_hash[hash_index].t = t;
2788       alg_hash[hash_index].mode = mode;
2789       alg_hash[hash_index].alg = alg_impossible;
2790       alg_hash[hash_index].cost = *cost_limit;
2791       return;
2792     }
2793
2794   /* Cache the result.  */
2795   if (!cache_hit)
2796     {
2797       alg_hash[hash_index].t = t;
2798       alg_hash[hash_index].mode = mode;
2799       alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2800       alg_hash[hash_index].cost.cost = best_cost.cost;
2801       alg_hash[hash_index].cost.latency = best_cost.latency;
2802     }
2803
2804   /* If we are getting a too long sequence for `struct algorithm'
2805      to record, make this search fail.  */
2806   if (best_alg->ops == MAX_BITS_PER_WORD)
2807     return;
2808
2809   /* Copy the algorithm from temporary space to the space at alg_out.
2810      We avoid using structure assignment because the majority of
2811      best_alg is normally undefined, and this is a critical function.  */
2812   alg_out->ops = best_alg->ops + 1;
2813   alg_out->cost = best_cost;
2814   memcpy (alg_out->op, best_alg->op,
2815           alg_out->ops * sizeof *alg_out->op);
2816   memcpy (alg_out->log, best_alg->log,
2817           alg_out->ops * sizeof *alg_out->log);
2818 }
2819 \f
2820 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2821    Try three variations:
2822
2823        - a shift/add sequence based on VAL itself
2824        - a shift/add sequence based on -VAL, followed by a negation
2825        - a shift/add sequence based on VAL - 1, followed by an addition.
2826
2827    Return true if the cheapest of these cost less than MULT_COST,
2828    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2829
2830 static bool
2831 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2832                      struct algorithm *alg, enum mult_variant *variant,
2833                      int mult_cost)
2834 {
2835   struct algorithm alg2;
2836   struct mult_cost limit;
2837   int op_cost;
2838
2839   /* Fail quickly for impossible bounds.  */
2840   if (mult_cost < 0)
2841     return false;
2842
2843   /* Ensure that mult_cost provides a reasonable upper bound.
2844      Any constant multiplication can be performed with less
2845      than 2 * bits additions.  */
2846   op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[mode];
2847   if (mult_cost > op_cost)
2848     mult_cost = op_cost;
2849
2850   *variant = basic_variant;
2851   limit.cost = mult_cost;
2852   limit.latency = mult_cost;
2853   synth_mult (alg, val, &limit, mode);
2854
2855   /* This works only if the inverted value actually fits in an
2856      `unsigned int' */
2857   if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2858     {
2859       op_cost = neg_cost[mode];
2860       if (MULT_COST_LESS (&alg->cost, mult_cost))
2861         {
2862           limit.cost = alg->cost.cost - op_cost;
2863           limit.latency = alg->cost.latency - op_cost;
2864         }
2865       else
2866         {
2867           limit.cost = mult_cost - op_cost;
2868           limit.latency = mult_cost - op_cost;
2869         }
2870
2871       synth_mult (&alg2, -val, &limit, mode);
2872       alg2.cost.cost += op_cost;
2873       alg2.cost.latency += op_cost;
2874       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2875         *alg = alg2, *variant = negate_variant;
2876     }
2877
2878   /* This proves very useful for division-by-constant.  */
2879   op_cost = add_cost[mode];
2880   if (MULT_COST_LESS (&alg->cost, mult_cost))
2881     {
2882       limit.cost = alg->cost.cost - op_cost;
2883       limit.latency = alg->cost.latency - op_cost;
2884     }
2885   else
2886     {
2887       limit.cost = mult_cost - op_cost;
2888       limit.latency = mult_cost - op_cost;
2889     }
2890
2891   synth_mult (&alg2, val - 1, &limit, mode);
2892   alg2.cost.cost += op_cost;
2893   alg2.cost.latency += op_cost;
2894   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2895     *alg = alg2, *variant = add_variant;
2896
2897   return MULT_COST_LESS (&alg->cost, mult_cost);
2898 }
2899
2900 /* A subroutine of expand_mult, used for constant multiplications.
2901    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2902    convenient.  Use the shift/add sequence described by ALG and apply
2903    the final fixup specified by VARIANT.  */
2904
2905 static rtx
2906 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2907                    rtx target, const struct algorithm *alg,
2908                    enum mult_variant variant)
2909 {
2910   HOST_WIDE_INT val_so_far;
2911   rtx insn, accum, tem;
2912   int opno;
2913   enum machine_mode nmode;
2914
2915   /* Avoid referencing memory over and over.
2916      For speed, but also for correctness when mem is volatile.  */
2917   if (MEM_P (op0))
2918     op0 = force_reg (mode, op0);
2919
2920   /* ACCUM starts out either as OP0 or as a zero, depending on
2921      the first operation.  */
2922
2923   if (alg->op[0] == alg_zero)
2924     {
2925       accum = copy_to_mode_reg (mode, const0_rtx);
2926       val_so_far = 0;
2927     }
2928   else if (alg->op[0] == alg_m)
2929     {
2930       accum = copy_to_mode_reg (mode, op0);
2931       val_so_far = 1;
2932     }
2933   else
2934     gcc_unreachable ();
2935
2936   for (opno = 1; opno < alg->ops; opno++)
2937     {
2938       int log = alg->log[opno];
2939       rtx shift_subtarget = optimize ? 0 : accum;
2940       rtx add_target
2941         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2942            && !optimize)
2943           ? target : 0;
2944       rtx accum_target = optimize ? 0 : accum;
2945
2946       switch (alg->op[opno])
2947         {
2948         case alg_shift:
2949           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2950                                 build_int_cst (NULL_TREE, log),
2951                                 NULL_RTX, 0);
2952           val_so_far <<= log;
2953           break;
2954
2955         case alg_add_t_m2:
2956           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2957                               build_int_cst (NULL_TREE, log),
2958                               NULL_RTX, 0);
2959           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2960                                  add_target ? add_target : accum_target);
2961           val_so_far += (HOST_WIDE_INT) 1 << log;
2962           break;
2963
2964         case alg_sub_t_m2:
2965           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2966                               build_int_cst (NULL_TREE, log),
2967                               NULL_RTX, 0);
2968           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2969                                  add_target ? add_target : accum_target);
2970           val_so_far -= (HOST_WIDE_INT) 1 << log;
2971           break;
2972
2973         case alg_add_t2_m:
2974           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2975                                 build_int_cst (NULL_TREE, log),
2976                                 shift_subtarget,
2977                                 0);
2978           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2979                                  add_target ? add_target : accum_target);
2980           val_so_far = (val_so_far << log) + 1;
2981           break;
2982
2983         case alg_sub_t2_m:
2984           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2985                                 build_int_cst (NULL_TREE, log),
2986                                 shift_subtarget, 0);
2987           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2988                                  add_target ? add_target : accum_target);
2989           val_so_far = (val_so_far << log) - 1;
2990           break;
2991
2992         case alg_add_factor:
2993           tem = expand_shift (LSHIFT_EXPR, mode, accum,
2994                               build_int_cst (NULL_TREE, log),
2995                               NULL_RTX, 0);
2996           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2997                                  add_target ? add_target : accum_target);
2998           val_so_far += val_so_far << log;
2999           break;
3000
3001         case alg_sub_factor:
3002           tem = expand_shift (LSHIFT_EXPR, mode, accum,
3003                               build_int_cst (NULL_TREE, log),
3004                               NULL_RTX, 0);
3005           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3006                                  (add_target
3007                                   ? add_target : (optimize ? 0 : tem)));
3008           val_so_far = (val_so_far << log) - val_so_far;
3009           break;
3010
3011         default:
3012           gcc_unreachable ();
3013         }
3014
3015       /* Write a REG_EQUAL note on the last insn so that we can cse
3016          multiplication sequences.  Note that if ACCUM is a SUBREG,
3017          we've set the inner register and must properly indicate
3018          that.  */
3019
3020       tem = op0, nmode = mode;
3021       if (GET_CODE (accum) == SUBREG)
3022         {
3023           nmode = GET_MODE (SUBREG_REG (accum));
3024           tem = gen_lowpart (nmode, op0);
3025         }
3026
3027       insn = get_last_insn ();
3028       set_unique_reg_note (insn, REG_EQUAL,
3029                            gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)));
3030     }
3031
3032   if (variant == negate_variant)
3033     {
3034       val_so_far = -val_so_far;
3035       accum = expand_unop (mode, neg_optab, accum, target, 0);
3036     }
3037   else if (variant == add_variant)
3038     {
3039       val_so_far = val_so_far + 1;
3040       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3041     }
3042
3043   /* Compare only the bits of val and val_so_far that are significant
3044      in the result mode, to avoid sign-/zero-extension confusion.  */
3045   val &= GET_MODE_MASK (mode);
3046   val_so_far &= GET_MODE_MASK (mode);
3047   gcc_assert (val == val_so_far);
3048
3049   return accum;
3050 }
3051
3052 /* Perform a multiplication and return an rtx for the result.
3053    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3054    TARGET is a suggestion for where to store the result (an rtx).
3055
3056    We check specially for a constant integer as OP1.
3057    If you want this check for OP0 as well, then before calling
3058    you should swap the two operands if OP0 would be constant.  */
3059
3060 rtx
3061 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3062              int unsignedp)
3063 {
3064   enum mult_variant variant;
3065   struct algorithm algorithm;
3066   int max_cost;
3067
3068   /* Handling const0_rtx here allows us to use zero as a rogue value for
3069      coeff below.  */
3070   if (op1 == const0_rtx)
3071     return const0_rtx;
3072   if (op1 == const1_rtx)
3073     return op0;
3074   if (op1 == constm1_rtx)
3075     return expand_unop (mode,
3076                         GET_MODE_CLASS (mode) == MODE_INT
3077                         && !unsignedp && flag_trapv
3078                         ? negv_optab : neg_optab,
3079                         op0, target, 0);
3080
3081   /* These are the operations that are potentially turned into a sequence
3082      of shifts and additions.  */
3083   if (SCALAR_INT_MODE_P (mode)
3084       && (unsignedp || !flag_trapv))
3085     {
3086       HOST_WIDE_INT coeff = 0;
3087       rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3088
3089       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3090          less than or equal in size to `unsigned int' this doesn't matter.
3091          If the mode is larger than `unsigned int', then synth_mult works
3092          only if the constant value exactly fits in an `unsigned int' without
3093          any truncation.  This means that multiplying by negative values does
3094          not work; results are off by 2^32 on a 32 bit machine.  */
3095
3096       if (GET_CODE (op1) == CONST_INT)
3097         {
3098           /* Attempt to handle multiplication of DImode values by negative
3099              coefficients, by performing the multiplication by a positive
3100              multiplier and then inverting the result.  */
3101           if (INTVAL (op1) < 0
3102               && GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3103             {
3104               /* Its safe to use -INTVAL (op1) even for INT_MIN, as the
3105                  result is interpreted as an unsigned coefficient.
3106                  Exclude cost of op0 from max_cost to match the cost
3107                  calculation of the synth_mult.  */
3108               max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET)
3109                          - neg_cost[mode];
3110               if (max_cost > 0
3111                   && choose_mult_variant (mode, -INTVAL (op1), &algorithm,
3112                                           &variant, max_cost))
3113                 {
3114                   rtx temp = expand_mult_const (mode, op0, -INTVAL (op1),
3115                                                 NULL_RTX, &algorithm,
3116                                                 variant);
3117                   return expand_unop (mode, neg_optab, temp, target, 0);
3118                 }
3119             }
3120           else coeff = INTVAL (op1);
3121         }
3122       else if (GET_CODE (op1) == CONST_DOUBLE)
3123         {
3124           /* If we are multiplying in DImode, it may still be a win
3125              to try to work with shifts and adds.  */
3126           if (CONST_DOUBLE_HIGH (op1) == 0)
3127             coeff = CONST_DOUBLE_LOW (op1);
3128           else if (CONST_DOUBLE_LOW (op1) == 0
3129                    && EXACT_POWER_OF_2_OR_ZERO_P (CONST_DOUBLE_HIGH (op1)))
3130             {
3131               int shift = floor_log2 (CONST_DOUBLE_HIGH (op1))
3132                           + HOST_BITS_PER_WIDE_INT;
3133               return expand_shift (LSHIFT_EXPR, mode, op0,
3134                                    build_int_cst (NULL_TREE, shift),
3135                                    target, unsignedp);
3136             }
3137         }
3138
3139       /* We used to test optimize here, on the grounds that it's better to
3140          produce a smaller program when -O is not used.  But this causes
3141          such a terrible slowdown sometimes that it seems better to always
3142          use synth_mult.  */
3143       if (coeff != 0)
3144         {
3145           /* Special case powers of two.  */
3146           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3147             return expand_shift (LSHIFT_EXPR, mode, op0,
3148                                  build_int_cst (NULL_TREE, floor_log2 (coeff)),
3149                                  target, unsignedp);
3150
3151           /* Exclude cost of op0 from max_cost to match the cost
3152              calculation of the synth_mult.  */
3153           max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET);
3154           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3155                                    max_cost))
3156             return expand_mult_const (mode, op0, coeff, target,
3157                                       &algorithm, variant);
3158         }
3159     }
3160
3161   if (GET_CODE (op0) == CONST_DOUBLE)
3162     {
3163       rtx temp = op0;
3164       op0 = op1;
3165       op1 = temp;
3166     }
3167
3168   /* Expand x*2.0 as x+x.  */
3169   if (GET_CODE (op1) == CONST_DOUBLE
3170       && SCALAR_FLOAT_MODE_P (mode))
3171     {
3172       REAL_VALUE_TYPE d;
3173       REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3174
3175       if (REAL_VALUES_EQUAL (d, dconst2))
3176         {
3177           op0 = force_reg (GET_MODE (op0), op0);
3178           return expand_binop (mode, add_optab, op0, op0,
3179                                target, unsignedp, OPTAB_LIB_WIDEN);
3180         }
3181     }
3182
3183   /* This used to use umul_optab if unsigned, but for non-widening multiply
3184      there is no difference between signed and unsigned.  */
3185   op0 = expand_binop (mode,
3186                       ! unsignedp
3187                       && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3188                       ? smulv_optab : smul_optab,
3189                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3190   gcc_assert (op0);
3191   return op0;
3192 }
3193 \f
3194 /* Return the smallest n such that 2**n >= X.  */
3195
3196 int
3197 ceil_log2 (unsigned HOST_WIDE_INT x)
3198 {
3199   return floor_log2 (x - 1) + 1;
3200 }
3201
3202 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3203    replace division by D, and put the least significant N bits of the result
3204    in *MULTIPLIER_PTR and return the most significant bit.
3205
3206    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3207    needed precision is in PRECISION (should be <= N).
3208
3209    PRECISION should be as small as possible so this function can choose
3210    multiplier more freely.
3211
3212    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3213    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3214
3215    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3216    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3217
3218 static
3219 unsigned HOST_WIDE_INT
3220 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3221                    rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr)
3222 {
3223   HOST_WIDE_INT mhigh_hi, mlow_hi;
3224   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3225   int lgup, post_shift;
3226   int pow, pow2;
3227   unsigned HOST_WIDE_INT nl, dummy1;
3228   HOST_WIDE_INT nh, dummy2;
3229
3230   /* lgup = ceil(log2(divisor)); */
3231   lgup = ceil_log2 (d);
3232
3233   gcc_assert (lgup <= n);
3234
3235   pow = n + lgup;
3236   pow2 = n + lgup - precision;
3237
3238   /* We could handle this with some effort, but this case is much
3239      better handled directly with a scc insn, so rely on caller using
3240      that.  */
3241   gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3242
3243   /* mlow = 2^(N + lgup)/d */
3244  if (pow >= HOST_BITS_PER_WIDE_INT)
3245     {
3246       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3247       nl = 0;
3248     }
3249   else
3250     {
3251       nh = 0;
3252       nl = (unsigned HOST_WIDE_INT) 1 << pow;
3253     }
3254   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3255                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3256
3257   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3258   if (pow2 >= HOST_BITS_PER_WIDE_INT)
3259     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3260   else
3261     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3262   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3263                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3264
3265   gcc_assert (!mhigh_hi || nh - d < d);
3266   gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3267   /* Assert that mlow < mhigh.  */
3268   gcc_assert (mlow_hi < mhigh_hi
3269               || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3270
3271   /* If precision == N, then mlow, mhigh exceed 2^N
3272      (but they do not exceed 2^(N+1)).  */
3273
3274   /* Reduce to lowest terms.  */
3275   for (post_shift = lgup; post_shift > 0; post_shift--)
3276     {
3277       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3278       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3279       if (ml_lo >= mh_lo)
3280         break;
3281
3282       mlow_hi = 0;
3283       mlow_lo = ml_lo;
3284       mhigh_hi = 0;
3285       mhigh_lo = mh_lo;
3286     }
3287
3288   *post_shift_ptr = post_shift;
3289   *lgup_ptr = lgup;
3290   if (n < HOST_BITS_PER_WIDE_INT)
3291     {
3292       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3293       *multiplier_ptr = GEN_INT (mhigh_lo & mask);
3294       return mhigh_lo >= mask;
3295     }
3296   else
3297     {
3298       *multiplier_ptr = GEN_INT (mhigh_lo);
3299       return mhigh_hi;
3300     }
3301 }
3302
3303 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3304    congruent to 1 (mod 2**N).  */
3305
3306 static unsigned HOST_WIDE_INT
3307 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3308 {
3309   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3310
3311   /* The algorithm notes that the choice y = x satisfies
3312      x*y == 1 mod 2^3, since x is assumed odd.
3313      Each iteration doubles the number of bits of significance in y.  */
3314
3315   unsigned HOST_WIDE_INT mask;
3316   unsigned HOST_WIDE_INT y = x;
3317   int nbit = 3;
3318
3319   mask = (n == HOST_BITS_PER_WIDE_INT
3320           ? ~(unsigned HOST_WIDE_INT) 0
3321           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3322
3323   while (nbit < n)
3324     {
3325       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3326       nbit *= 2;
3327     }
3328   return y;
3329 }
3330
3331 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3332    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3333    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3334    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3335    become signed.
3336
3337    The result is put in TARGET if that is convenient.
3338
3339    MODE is the mode of operation.  */
3340
3341 rtx
3342 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3343                              rtx op1, rtx target, int unsignedp)
3344 {
3345   rtx tem;
3346   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3347
3348   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3349                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3350                       NULL_RTX, 0);
3351   tem = expand_and (mode, tem, op1, NULL_RTX);
3352   adj_operand
3353     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3354                      adj_operand);
3355
3356   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3357                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3358                       NULL_RTX, 0);
3359   tem = expand_and (mode, tem, op0, NULL_RTX);
3360   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3361                           target);
3362
3363   return target;
3364 }
3365
3366 /* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */
3367
3368 static rtx
3369 extract_high_half (enum machine_mode mode, rtx op)
3370 {
3371   enum machine_mode wider_mode;
3372
3373   if (mode == word_mode)
3374     return gen_highpart (mode, op);
3375
3376   wider_mode = GET_MODE_WIDER_MODE (mode);
3377   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3378                      build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode)), 0, 1);
3379   return convert_modes (mode, wider_mode, op, 0);
3380 }
3381
3382 /* Like expand_mult_highpart, but only consider using a multiplication
3383    optab.  OP1 is an rtx for the constant operand.  */
3384
3385 static rtx
3386 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3387                             rtx target, int unsignedp, int max_cost)
3388 {
3389   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3390   enum machine_mode wider_mode;
3391   optab moptab;
3392   rtx tem;
3393   int size;
3394
3395   wider_mode = GET_MODE_WIDER_MODE (mode);
3396   size = GET_MODE_BITSIZE (mode);
3397
3398   /* Firstly, try using a multiplication insn that only generates the needed
3399      high part of the product, and in the sign flavor of unsignedp.  */
3400   if (mul_highpart_cost[mode] < max_cost)
3401     {
3402       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3403       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3404                           unsignedp, OPTAB_DIRECT);
3405       if (tem)
3406         return tem;
3407     }
3408
3409   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3410      Need to adjust the result after the multiplication.  */
3411   if (size - 1 < BITS_PER_WORD
3412       && (mul_highpart_cost[mode] + 2 * shift_cost[mode][size-1]
3413           + 4 * add_cost[mode] < max_cost))
3414     {
3415       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3416       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3417                           unsignedp, OPTAB_DIRECT);
3418       if (tem)
3419         /* We used the wrong signedness.  Adjust the result.  */
3420         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3421                                             tem, unsignedp);
3422     }
3423
3424   /* Try widening multiplication.  */
3425   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3426   if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3427       && mul_widen_cost[wider_mode] < max_cost)
3428     {
3429       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3430                           unsignedp, OPTAB_WIDEN);
3431       if (tem)
3432         return extract_high_half (mode, tem);
3433     }
3434
3435   /* Try widening the mode and perform a non-widening multiplication.  */
3436   if (smul_optab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3437       && size - 1 < BITS_PER_WORD
3438       && mul_cost[wider_mode] + shift_cost[mode][size-1] < max_cost)
3439     {
3440       rtx insns, wop0, wop1;
3441
3442       /* We need to widen the operands, for example to ensure the
3443          constant multiplier is correctly sign or zero extended.
3444          Use a sequence to clean-up any instructions emitted by
3445          the conversions if things don't work out.  */
3446       start_sequence ();
3447       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3448       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3449       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3450                           unsignedp, OPTAB_WIDEN);
3451       insns = get_insns ();
3452       end_sequence ();
3453
3454       if (tem)
3455         {
3456           emit_insn (insns);
3457           return extract_high_half (mode, tem);
3458         }
3459     }
3460
3461   /* Try widening multiplication of opposite signedness, and adjust.  */
3462   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3463   if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3464       && size - 1 < BITS_PER_WORD
3465       && (mul_widen_cost[wider_mode] + 2 * shift_cost[mode][size-1]
3466           + 4 * add_cost[mode] < max_cost))
3467     {
3468       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3469                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3470       if (tem != 0)
3471         {
3472           tem = extract_high_half (mode, tem);
3473           /* We used the wrong signedness.  Adjust the result.  */
3474           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3475                                               target, unsignedp);
3476         }
3477     }
3478
3479   return 0;
3480 }
3481
3482 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3483    putting the high half of the result in TARGET if that is convenient,
3484    and return where the result is.  If the operation can not be performed,
3485    0 is returned.
3486
3487    MODE is the mode of operation and result.
3488
3489    UNSIGNEDP nonzero means unsigned multiply.
3490
3491    MAX_COST is the total allowed cost for the expanded RTL.  */
3492
3493 static rtx
3494 expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3495                       rtx target, int unsignedp, int max_cost)
3496 {
3497   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3498   unsigned HOST_WIDE_INT cnst1;
3499   int extra_cost;
3500   bool sign_adjust = false;
3501   enum mult_variant variant;
3502   struct algorithm alg;
3503   rtx tem;
3504
3505   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3506   gcc_assert (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT);
3507
3508   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3509
3510   /* We can't optimize modes wider than BITS_PER_WORD.
3511      ??? We might be able to perform double-word arithmetic if
3512      mode == word_mode, however all the cost calculations in
3513      synth_mult etc. assume single-word operations.  */
3514   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3515     return expand_mult_highpart_optab (mode, op0, op1, target,
3516                                        unsignedp, max_cost);
3517
3518   extra_cost = shift_cost[mode][GET_MODE_BITSIZE (mode) - 1];
3519
3520   /* Check whether we try to multiply by a negative constant.  */
3521   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3522     {
3523       sign_adjust = true;
3524       extra_cost += add_cost[mode];
3525     }
3526
3527   /* See whether shift/add multiplication is cheap enough.  */
3528   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3529                            max_cost - extra_cost))
3530     {
3531       /* See whether the specialized multiplication optabs are
3532          cheaper than the shift/add version.  */
3533       tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3534                                         alg.cost.cost + extra_cost);
3535       if (tem)
3536         return tem;
3537
3538       tem = convert_to_mode (wider_mode, op0, unsignedp);
3539       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3540       tem = extract_high_half (mode, tem);
3541
3542       /* Adjust result for signedness.  */
3543       if (sign_adjust)
3544         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3545
3546       return tem;
3547     }
3548   return expand_mult_highpart_optab (mode, op0, op1, target,
3549                                      unsignedp, max_cost);
3550 }
3551
3552
3553 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3554
3555 static rtx
3556 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3557 {
3558   unsigned HOST_WIDE_INT masklow, maskhigh;
3559   rtx result, temp, shift, label;
3560   int logd;
3561
3562   logd = floor_log2 (d);
3563   result = gen_reg_rtx (mode);
3564
3565   /* Avoid conditional branches when they're expensive.  */
3566   if (BRANCH_COST >= 2
3567       && !optimize_size)
3568     {
3569       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3570                                       mode, 0, -1);
3571       if (signmask)
3572         {
3573           signmask = force_reg (mode, signmask);
3574           masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3575           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3576
3577           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3578              which instruction sequence to use.  If logical right shifts
3579              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3580              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3581
3582           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3583           if (lshr_optab->handlers[mode].insn_code == CODE_FOR_nothing
3584               || rtx_cost (temp, SET) > COSTS_N_INSNS (2))
3585             {
3586               temp = expand_binop (mode, xor_optab, op0, signmask,
3587                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3588               temp = expand_binop (mode, sub_optab, temp, signmask,
3589                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3590               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3591                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3592               temp = expand_binop (mode, xor_optab, temp, signmask,
3593                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3594               temp = expand_binop (mode, sub_optab, temp, signmask,
3595                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3596             }
3597           else
3598             {
3599               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3600                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3601               signmask = force_reg (mode, signmask);
3602
3603               temp = expand_binop (mode, add_optab, op0, signmask,
3604                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3605               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3606                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3607               temp = expand_binop (mode, sub_optab, temp, signmask,
3608                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3609             }
3610           return temp;
3611         }
3612     }
3613
3614   /* Mask contains the mode's signbit and the significant bits of the
3615      modulus.  By including the signbit in the operation, many targets
3616      can avoid an explicit compare operation in the following comparison
3617      against zero.  */
3618
3619   masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3620   if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3621     {
3622       masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3623       maskhigh = -1;
3624     }
3625   else
3626     maskhigh = (HOST_WIDE_INT) -1
3627                  << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3628
3629   temp = expand_binop (mode, and_optab, op0,
3630                        immed_double_const (masklow, maskhigh, mode),
3631                        result, 1, OPTAB_LIB_WIDEN);
3632   if (temp != result)
3633     emit_move_insn (result, temp);
3634
3635   label = gen_label_rtx ();
3636   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3637
3638   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3639                        0, OPTAB_LIB_WIDEN);
3640   masklow = (HOST_WIDE_INT) -1 << logd;
3641   maskhigh = -1;
3642   temp = expand_binop (mode, ior_optab, temp,
3643                        immed_double_const (masklow, maskhigh, mode),
3644                        result, 1, OPTAB_LIB_WIDEN);
3645   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3646                        0, OPTAB_LIB_WIDEN);
3647   if (temp != result)
3648     emit_move_insn (result, temp);
3649   emit_label (label);
3650   return result;
3651 }
3652
3653 /* Expand signed division of OP0 by a power of two D in mode MODE.
3654    This routine is only called for positive values of D.  */
3655
3656 static rtx
3657 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3658 {
3659   rtx temp, label;
3660   tree shift;
3661   int logd;
3662
3663   logd = floor_log2 (d);
3664   shift = build_int_cst (NULL_TREE, logd);
3665
3666   if (d == 2 && BRANCH_COST >= 1)
3667     {
3668       temp = gen_reg_rtx (mode);
3669       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3670       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3671                            0, OPTAB_LIB_WIDEN);
3672       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3673     }
3674
3675 #ifdef HAVE_conditional_move
3676   if (BRANCH_COST >= 2)
3677     {
3678       rtx temp2;
3679
3680       /* ??? emit_conditional_move forces a stack adjustment via
3681          compare_from_rtx so, if the sequence is discarded, it will
3682          be lost.  Do it now instead.  */
3683       do_pending_stack_adjust ();
3684
3685       start_sequence ();
3686       temp2 = copy_to_mode_reg (mode, op0);
3687       temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3688                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3689       temp = force_reg (mode, temp);
3690
3691       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3692       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3693                                      mode, temp, temp2, mode, 0);
3694       if (temp2)
3695         {
3696           rtx seq = get_insns ();
3697           end_sequence ();
3698           emit_insn (seq);
3699           return expand_shift (RSHIFT_EXPR, mode, temp2, shift, NULL_RTX, 0);
3700         }
3701       end_sequence ();
3702     }
3703 #endif
3704
3705   if (BRANCH_COST >= 2)
3706     {
3707       int ushift = GET_MODE_BITSIZE (mode) - logd;
3708
3709       temp = gen_reg_rtx (mode);
3710       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3711       if (shift_cost[mode][ushift] > COSTS_N_INSNS (1))
3712         temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3713                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3714       else
3715         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3716                              build_int_cst (NULL_TREE, ushift),
3717                              NULL_RTX, 1);
3718       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3719                            0, OPTAB_LIB_WIDEN);
3720       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3721     }
3722
3723   label = gen_label_rtx ();
3724   temp = copy_to_mode_reg (mode, op0);
3725   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3726   expand_inc (temp, GEN_INT (d - 1));
3727   emit_label (label);
3728   return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3729 }
3730 \f
3731 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3732    if that is convenient, and returning where the result is.
3733    You may request either the quotient or the remainder as the result;
3734    specify REM_FLAG nonzero to get the remainder.
3735
3736    CODE is the expression code for which kind of division this is;
3737    it controls how rounding is done.  MODE is the machine mode to use.
3738    UNSIGNEDP nonzero means do unsigned division.  */
3739
3740 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3741    and then correct it by or'ing in missing high bits
3742    if result of ANDI is nonzero.
3743    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3744    This could optimize to a bfexts instruction.
3745    But C doesn't use these operations, so their optimizations are
3746    left for later.  */
3747 /* ??? For modulo, we don't actually need the highpart of the first product,
3748    the low part will do nicely.  And for small divisors, the second multiply
3749    can also be a low-part only multiply or even be completely left out.
3750    E.g. to calculate the remainder of a division by 3 with a 32 bit
3751    multiply, multiply with 0x55555556 and extract the upper two bits;
3752    the result is exact for inputs up to 0x1fffffff.
3753    The input range can be reduced by using cross-sum rules.
3754    For odd divisors >= 3, the following table gives right shift counts
3755    so that if a number is shifted by an integer multiple of the given
3756    amount, the remainder stays the same:
3757    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3758    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3759    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3760    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3761    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3762
3763    Cross-sum rules for even numbers can be derived by leaving as many bits
3764    to the right alone as the divisor has zeros to the right.
3765    E.g. if x is an unsigned 32 bit number:
3766    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3767    */
3768
3769 rtx
3770 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3771                rtx op0, rtx op1, rtx target, int unsignedp)
3772 {
3773   enum machine_mode compute_mode;
3774   rtx tquotient;
3775   rtx quotient = 0, remainder = 0;
3776   rtx last;
3777   int size;
3778   rtx insn, set;
3779   optab optab1, optab2;
3780   int op1_is_constant, op1_is_pow2 = 0;
3781   int max_cost, extra_cost;
3782   static HOST_WIDE_INT last_div_const = 0;
3783   static HOST_WIDE_INT ext_op1;
3784
3785   op1_is_constant = GET_CODE (op1) == CONST_INT;
3786   if (op1_is_constant)
3787     {
3788       ext_op1 = INTVAL (op1);
3789       if (unsignedp)
3790         ext_op1 &= GET_MODE_MASK (mode);
3791       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3792                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3793     }
3794
3795   /*
3796      This is the structure of expand_divmod:
3797
3798      First comes code to fix up the operands so we can perform the operations
3799      correctly and efficiently.
3800
3801      Second comes a switch statement with code specific for each rounding mode.
3802      For some special operands this code emits all RTL for the desired
3803      operation, for other cases, it generates only a quotient and stores it in
3804      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3805      to indicate that it has not done anything.
3806
3807      Last comes code that finishes the operation.  If QUOTIENT is set and
3808      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3809      QUOTIENT is not set, it is computed using trunc rounding.
3810
3811      We try to generate special code for division and remainder when OP1 is a
3812      constant.  If |OP1| = 2**n we can use shifts and some other fast
3813      operations.  For other values of OP1, we compute a carefully selected
3814      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3815      by m.
3816
3817      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3818      half of the product.  Different strategies for generating the product are
3819      implemented in expand_mult_highpart.
3820
3821      If what we actually want is the remainder, we generate that by another
3822      by-constant multiplication and a subtraction.  */
3823
3824   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3825      code below will malfunction if we are, so check here and handle
3826      the special case if so.  */
3827   if (op1 == const1_rtx)
3828     return rem_flag ? const0_rtx : op0;
3829
3830     /* When dividing by -1, we could get an overflow.
3831      negv_optab can handle overflows.  */
3832   if (! unsignedp && op1 == constm1_rtx)
3833     {
3834       if (rem_flag)
3835         return const0_rtx;
3836       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3837                           ? negv_optab : neg_optab, op0, target, 0);
3838     }
3839
3840   if (target
3841       /* Don't use the function value register as a target
3842          since we have to read it as well as write it,
3843          and function-inlining gets confused by this.  */
3844       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3845           /* Don't clobber an operand while doing a multi-step calculation.  */
3846           || ((rem_flag || op1_is_constant)
3847               && (reg_mentioned_p (target, op0)
3848                   || (MEM_P (op0) && MEM_P (target))))
3849           || reg_mentioned_p (target, op1)
3850           || (MEM_P (op1) && MEM_P (target))))
3851     target = 0;
3852
3853   /* Get the mode in which to perform this computation.  Normally it will
3854      be MODE, but sometimes we can't do the desired operation in MODE.
3855      If so, pick a wider mode in which we can do the operation.  Convert
3856      to that mode at the start to avoid repeated conversions.
3857
3858      First see what operations we need.  These depend on the expression
3859      we are evaluating.  (We assume that divxx3 insns exist under the
3860      same conditions that modxx3 insns and that these insns don't normally
3861      fail.  If these assumptions are not correct, we may generate less
3862      efficient code in some cases.)
3863
3864      Then see if we find a mode in which we can open-code that operation
3865      (either a division, modulus, or shift).  Finally, check for the smallest
3866      mode for which we can do the operation with a library call.  */
3867
3868   /* We might want to refine this now that we have division-by-constant
3869      optimization.  Since expand_mult_highpart tries so many variants, it is
3870      not straightforward to generalize this.  Maybe we should make an array
3871      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3872
3873   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3874             ? (unsignedp ? lshr_optab : ashr_optab)
3875             : (unsignedp ? udiv_optab : sdiv_optab));
3876   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3877             ? optab1
3878             : (unsignedp ? udivmod_optab : sdivmod_optab));
3879
3880   for (compute_mode = mode; compute_mode != VOIDmode;
3881        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3882     if (optab1->handlers[compute_mode].insn_code != CODE_FOR_nothing
3883         || optab2->handlers[compute_mode].insn_code != CODE_FOR_nothing)
3884       break;
3885
3886   if (compute_mode == VOIDmode)
3887     for (compute_mode = mode; compute_mode != VOIDmode;
3888          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3889       if (optab1->handlers[compute_mode].libfunc
3890           || optab2->handlers[compute_mode].libfunc)
3891         break;
3892
3893   /* If we still couldn't find a mode, use MODE, but expand_binop will
3894      probably die.  */
3895   if (compute_mode == VOIDmode)
3896     compute_mode = mode;
3897
3898   if (target && GET_MODE (target) == compute_mode)
3899     tquotient = target;
3900   else
3901     tquotient = gen_reg_rtx (compute_mode);
3902
3903   size = GET_MODE_BITSIZE (compute_mode);
3904 #if 0
3905   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3906      (mode), and thereby get better code when OP1 is a constant.  Do that
3907      later.  It will require going over all usages of SIZE below.  */
3908   size = GET_MODE_BITSIZE (mode);
3909 #endif
3910
3911   /* Only deduct something for a REM if the last divide done was
3912      for a different constant.   Then set the constant of the last
3913      divide.  */
3914   max_cost = div_cost[compute_mode]
3915     - (rem_flag && ! (last_div_const != 0 && op1_is_constant
3916                       && INTVAL (op1) == last_div_const)
3917        ? mul_cost[compute_mode] + add_cost[compute_mode]
3918        : 0);
3919
3920   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3921
3922   /* Now convert to the best mode to use.  */
3923   if (compute_mode != mode)
3924     {
3925       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3926       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3927
3928       /* convert_modes may have placed op1 into a register, so we
3929          must recompute the following.  */
3930       op1_is_constant = GET_CODE (op1) == CONST_INT;
3931       op1_is_pow2 = (op1_is_constant
3932                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3933                           || (! unsignedp
3934                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3935     }
3936
3937   /* If one of the operands is a volatile MEM, copy it into a register.  */
3938
3939   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3940     op0 = force_reg (compute_mode, op0);
3941   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3942     op1 = force_reg (compute_mode, op1);
3943
3944   /* If we need the remainder or if OP1 is constant, we need to
3945      put OP0 in a register in case it has any queued subexpressions.  */
3946   if (rem_flag || op1_is_constant)
3947     op0 = force_reg (compute_mode, op0);
3948
3949   last = get_last_insn ();
3950
3951   /* Promote floor rounding to trunc rounding for unsigned operations.  */
3952   if (unsignedp)
3953     {
3954       if (code == FLOOR_DIV_EXPR)
3955         code = TRUNC_DIV_EXPR;
3956       if (code == FLOOR_MOD_EXPR)
3957         code = TRUNC_MOD_EXPR;
3958       if (code == EXACT_DIV_EXPR && op1_is_pow2)
3959         code = TRUNC_DIV_EXPR;
3960     }
3961
3962   if (op1 != const0_rtx)
3963     switch (code)
3964       {
3965       case TRUNC_MOD_EXPR:
3966       case TRUNC_DIV_EXPR:
3967         if (op1_is_constant)
3968           {
3969             if (unsignedp)
3970               {
3971                 unsigned HOST_WIDE_INT mh;
3972                 int pre_shift, post_shift;
3973                 int dummy;
3974                 rtx ml;
3975                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
3976                                             & GET_MODE_MASK (compute_mode));
3977
3978                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3979                   {
3980                     pre_shift = floor_log2 (d);
3981                     if (rem_flag)
3982                       {
3983                         remainder
3984                           = expand_binop (compute_mode, and_optab, op0,
3985                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3986                                           remainder, 1,
3987                                           OPTAB_LIB_WIDEN);
3988                         if (remainder)
3989                           return gen_lowpart (mode, remainder);
3990                       }
3991                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3992                                              build_int_cst (NULL_TREE,
3993                                                             pre_shift),
3994                                              tquotient, 1);
3995                   }
3996                 else if (size <= HOST_BITS_PER_WIDE_INT)
3997                   {
3998                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
3999                       {
4000                         /* Most significant bit of divisor is set; emit an scc
4001                            insn.  */
4002                         quotient = emit_store_flag (tquotient, GEU, op0, op1,
4003                                                     compute_mode, 1, 1);
4004                         if (quotient == 0)
4005                           goto fail1;
4006                       }
4007                     else
4008                       {
4009                         /* Find a suitable multiplier and right shift count
4010                            instead of multiplying with D.  */
4011
4012                         mh = choose_multiplier (d, size, size,
4013                                                 &ml, &post_shift, &dummy);
4014
4015                         /* If the suggested multiplier is more than SIZE bits,
4016                            we can do better for even divisors, using an
4017                            initial right shift.  */
4018                         if (mh != 0 && (d & 1) == 0)
4019                           {
4020                             pre_shift = floor_log2 (d & -d);
4021                             mh = choose_multiplier (d >> pre_shift, size,
4022                                                     size - pre_shift,
4023                                                     &ml, &post_shift, &dummy);
4024                             gcc_assert (!mh);
4025                           }
4026                         else
4027                           pre_shift = 0;
4028
4029                         if (mh != 0)
4030                           {
4031                             rtx t1, t2, t3, t4;
4032
4033                             if (post_shift - 1 >= BITS_PER_WORD)
4034                               goto fail1;
4035
4036                             extra_cost
4037                               = (shift_cost[compute_mode][post_shift - 1]
4038                                  + shift_cost[compute_mode][1]
4039                                  + 2 * add_cost[compute_mode]);
4040                             t1 = expand_mult_highpart (compute_mode, op0, ml,
4041                                                        NULL_RTX, 1,
4042                                                        max_cost - extra_cost);
4043                             if (t1 == 0)
4044                               goto fail1;
4045                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4046                                                                op0, t1),
4047                                                 NULL_RTX);
4048                             t3 = expand_shift
4049                               (RSHIFT_EXPR, compute_mode, t2,
4050                                build_int_cst (NULL_TREE, 1),
4051                                NULL_RTX,1);
4052                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4053                                                               t1, t3),
4054                                                 NULL_RTX);
4055                             quotient = expand_shift
4056                               (RSHIFT_EXPR, compute_mode, t4,
4057                                build_int_cst (NULL_TREE, post_shift - 1),
4058                                tquotient, 1);
4059                           }
4060                         else
4061                           {
4062                             rtx t1, t2;
4063
4064                             if (pre_shift >= BITS_PER_WORD
4065                                 || post_shift >= BITS_PER_WORD)
4066                               goto fail1;
4067
4068                             t1 = expand_shift
4069                               (RSHIFT_EXPR, compute_mode, op0,
4070                                build_int_cst (NULL_TREE, pre_shift),
4071                                NULL_RTX, 1);
4072                             extra_cost
4073                               = (shift_cost[compute_mode][pre_shift]
4074                                  + shift_cost[compute_mode][post_shift]);
4075                             t2 = expand_mult_highpart (compute_mode, t1, ml,
4076                                                        NULL_RTX, 1,
4077                                                        max_cost - extra_cost);
4078                             if (t2 == 0)
4079                               goto fail1;
4080                             quotient = expand_shift
4081                               (RSHIFT_EXPR, compute_mode, t2,
4082                                build_int_cst (NULL_TREE, post_shift),
4083                                tquotient, 1);
4084                           }
4085                       }
4086                   }
4087                 else            /* Too wide mode to use tricky code */
4088                   break;
4089
4090                 insn = get_last_insn ();
4091                 if (insn != last
4092                     && (set = single_set (insn)) != 0
4093                     && SET_DEST (set) == quotient)
4094                   set_unique_reg_note (insn,
4095                                        REG_EQUAL,
4096                                        gen_rtx_UDIV (compute_mode, op0, op1));
4097               }
4098             else                /* TRUNC_DIV, signed */
4099               {
4100                 unsigned HOST_WIDE_INT ml;
4101                 int lgup, post_shift;
4102                 rtx mlr;
4103                 HOST_WIDE_INT d = INTVAL (op1);
4104                 unsigned HOST_WIDE_INT abs_d = d >= 0 ? d : -d;
4105
4106                 /* n rem d = n rem -d */
4107                 if (rem_flag && d < 0)
4108                   {
4109                     d = abs_d;
4110                     op1 = gen_int_mode (abs_d, compute_mode);
4111                   }
4112
4113                 if (d == 1)
4114                   quotient = op0;
4115                 else if (d == -1)
4116                   quotient = expand_unop (compute_mode, neg_optab, op0,
4117                                           tquotient, 0);
4118                 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4119                   {
4120                     /* This case is not handled correctly below.  */
4121                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4122                                                 compute_mode, 1, 1);
4123                     if (quotient == 0)
4124                       goto fail1;
4125                   }
4126                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4127                          && (rem_flag ? smod_pow2_cheap[compute_mode]
4128                                       : sdiv_pow2_cheap[compute_mode])
4129                          /* We assume that cheap metric is true if the
4130                             optab has an expander for this mode.  */
4131                          && (((rem_flag ? smod_optab : sdiv_optab)
4132                               ->handlers[compute_mode].insn_code
4133                               != CODE_FOR_nothing)
4134                              || (sdivmod_optab->handlers[compute_mode]
4135                                  .insn_code != CODE_FOR_nothing)))
4136                   ;
4137                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4138                   {
4139                     if (rem_flag)
4140                       {
4141                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4142                         if (remainder)
4143                           return gen_lowpart (mode, remainder);
4144                       }
4145
4146                     if (sdiv_pow2_cheap[compute_mode]
4147                         && ((sdiv_optab->handlers[compute_mode].insn_code
4148                              != CODE_FOR_nothing)
4149                             || (sdivmod_optab->handlers[compute_mode].insn_code
4150                                 != CODE_FOR_nothing)))
4151                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4152                                                 compute_mode, op0,
4153                                                 gen_int_mode (abs_d,
4154                                                               compute_mode),
4155                                                 NULL_RTX, 0);
4156                     else
4157                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4158
4159                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4160                        negate the quotient.  */
4161                     if (d < 0)
4162                       {
4163                         insn = get_last_insn ();
4164                         if (insn != last
4165                             && (set = single_set (insn)) != 0
4166                             && SET_DEST (set) == quotient
4167                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4168                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4169                           set_unique_reg_note (insn,
4170                                                REG_EQUAL,
4171                                                gen_rtx_DIV (compute_mode,
4172                                                             op0,
4173                                                             GEN_INT
4174                                                             (trunc_int_for_mode
4175                                                              (abs_d,
4176                                                               compute_mode))));
4177
4178                         quotient = expand_unop (compute_mode, neg_optab,
4179                                                 quotient, quotient, 0);
4180                       }
4181                   }
4182                 else if (size <= HOST_BITS_PER_WIDE_INT)
4183                   {
4184                     choose_multiplier (abs_d, size, size - 1,
4185                                        &mlr, &post_shift, &lgup);
4186                     ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
4187                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4188                       {
4189                         rtx t1, t2, t3;
4190
4191                         if (post_shift >= BITS_PER_WORD
4192                             || size - 1 >= BITS_PER_WORD)
4193                           goto fail1;
4194
4195                         extra_cost = (shift_cost[compute_mode][post_shift]
4196                                       + shift_cost[compute_mode][size - 1]
4197                                       + add_cost[compute_mode]);
4198                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4199                                                    NULL_RTX, 0,
4200                                                    max_cost - extra_cost);
4201                         if (t1 == 0)
4202                           goto fail1;
4203                         t2 = expand_shift
4204                           (RSHIFT_EXPR, compute_mode, t1,
4205                            build_int_cst (NULL_TREE, post_shift),
4206                            NULL_RTX, 0);
4207                         t3 = expand_shift
4208                           (RSHIFT_EXPR, compute_mode, op0,
4209                            build_int_cst (NULL_TREE, size - 1),
4210                            NULL_RTX, 0);
4211                         if (d < 0)
4212                           quotient
4213                             = force_operand (gen_rtx_MINUS (compute_mode,
4214                                                             t3, t2),
4215                                              tquotient);
4216                         else
4217                           quotient
4218                             = force_operand (gen_rtx_MINUS (compute_mode,
4219                                                             t2, t3),
4220                                              tquotient);
4221                       }
4222                     else
4223                       {
4224                         rtx t1, t2, t3, t4;
4225
4226                         if (post_shift >= BITS_PER_WORD
4227                             || size - 1 >= BITS_PER_WORD)
4228                           goto fail1;
4229
4230                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4231                         mlr = gen_int_mode (ml, compute_mode);
4232                         extra_cost = (shift_cost[compute_mode][post_shift]
4233                                       + shift_cost[compute_mode][size - 1]
4234                                       + 2 * add_cost[compute_mode]);
4235                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4236                                                    NULL_RTX, 0,
4237                                                    max_cost - extra_cost);
4238                         if (t1 == 0)
4239                           goto fail1;
4240                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4241                                                           t1, op0),
4242                                             NULL_RTX);
4243                         t3 = expand_shift
4244                           (RSHIFT_EXPR, compute_mode, t2,
4245                            build_int_cst (NULL_TREE, post_shift),
4246                            NULL_RTX, 0);
4247                         t4 = expand_shift
4248                           (RSHIFT_EXPR, compute_mode, op0,
4249                            build_int_cst (NULL_TREE, size - 1),
4250                            NULL_RTX, 0);
4251                         if (d < 0)
4252                           quotient
4253                             = force_operand (gen_rtx_MINUS (compute_mode,
4254                                                             t4, t3),
4255                                              tquotient);
4256                         else
4257                           quotient
4258                             = force_operand (gen_rtx_MINUS (compute_mode,
4259                                                             t3, t4),
4260                                              tquotient);
4261                       }
4262                   }
4263                 else            /* Too wide mode to use tricky code */
4264                   break;
4265
4266                 insn = get_last_insn ();
4267                 if (insn != last
4268                     && (set = single_set (insn)) != 0
4269                     && SET_DEST (set) == quotient)
4270                   set_unique_reg_note (insn,
4271                                        REG_EQUAL,
4272                                        gen_rtx_DIV (compute_mode, op0, op1));
4273               }
4274             break;
4275           }
4276       fail1:
4277         delete_insns_since (last);
4278         break;
4279
4280       case FLOOR_DIV_EXPR:
4281       case FLOOR_MOD_EXPR:
4282       /* We will come here only for signed operations.  */
4283         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4284           {
4285             unsigned HOST_WIDE_INT mh;
4286             int pre_shift, lgup, post_shift;
4287             HOST_WIDE_INT d = INTVAL (op1);
4288             rtx ml;
4289
4290             if (d > 0)
4291               {
4292                 /* We could just as easily deal with negative constants here,
4293                    but it does not seem worth the trouble for GCC 2.6.  */
4294                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4295                   {
4296                     pre_shift = floor_log2 (d);
4297                     if (rem_flag)
4298                       {
4299                         remainder = expand_binop (compute_mode, and_optab, op0,
4300                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4301                                                   remainder, 0, OPTAB_LIB_WIDEN);
4302                         if (remainder)
4303                           return gen_lowpart (mode, remainder);
4304                       }
4305                     quotient = expand_shift
4306                       (RSHIFT_EXPR, compute_mode, op0,
4307                        build_int_cst (NULL_TREE, pre_shift),
4308                        tquotient, 0);
4309                   }
4310                 else
4311                   {
4312                     rtx t1, t2, t3, t4;
4313
4314                     mh = choose_multiplier (d, size, size - 1,
4315                                             &ml, &post_shift, &lgup);
4316                     gcc_assert (!mh);
4317
4318                     if (post_shift < BITS_PER_WORD
4319                         && size - 1 < BITS_PER_WORD)
4320                       {
4321                         t1 = expand_shift
4322                           (RSHIFT_EXPR, compute_mode, op0,
4323                            build_int_cst (NULL_TREE, size - 1),
4324                            NULL_RTX, 0);
4325                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4326                                            NULL_RTX, 0, OPTAB_WIDEN);
4327                         extra_cost = (shift_cost[compute_mode][post_shift]
4328                                       + shift_cost[compute_mode][size - 1]
4329                                       + 2 * add_cost[compute_mode]);
4330                         t3 = expand_mult_highpart (compute_mode, t2, ml,
4331                                                    NULL_RTX, 1,
4332                                                    max_cost - extra_cost);
4333                         if (t3 != 0)
4334                           {
4335                             t4 = expand_shift
4336                               (RSHIFT_EXPR, compute_mode, t3,
4337                                build_int_cst (NULL_TREE, post_shift),
4338                                NULL_RTX, 1);
4339                             quotient = expand_binop (compute_mode, xor_optab,
4340                                                      t4, t1, tquotient, 0,
4341                                                      OPTAB_WIDEN);
4342                           }
4343                       }
4344                   }
4345               }
4346             else
4347               {
4348                 rtx nsign, t1, t2, t3, t4;
4349                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4350                                                   op0, constm1_rtx), NULL_RTX);
4351                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4352                                    0, OPTAB_WIDEN);
4353                 nsign = expand_shift
4354                   (RSHIFT_EXPR, compute_mode, t2,
4355                    build_int_cst (NULL_TREE, size - 1),
4356                    NULL_RTX, 0);
4357                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4358                                     NULL_RTX);
4359                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4360                                     NULL_RTX, 0);
4361                 if (t4)
4362                   {
4363                     rtx t5;
4364                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4365                                       NULL_RTX, 0);
4366                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4367                                                             t4, t5),
4368                                               tquotient);
4369                   }
4370               }
4371           }
4372
4373         if (quotient != 0)
4374           break;
4375         delete_insns_since (last);
4376
4377         /* Try using an instruction that produces both the quotient and
4378            remainder, using truncation.  We can easily compensate the quotient
4379            or remainder to get floor rounding, once we have the remainder.
4380            Notice that we compute also the final remainder value here,
4381            and return the result right away.  */
4382         if (target == 0 || GET_MODE (target) != compute_mode)
4383           target = gen_reg_rtx (compute_mode);
4384
4385         if (rem_flag)
4386           {
4387             remainder
4388               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4389             quotient = gen_reg_rtx (compute_mode);
4390           }
4391         else
4392           {
4393             quotient
4394               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4395             remainder = gen_reg_rtx (compute_mode);
4396           }
4397
4398         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4399                                  quotient, remainder, 0))
4400           {
4401             /* This could be computed with a branch-less sequence.
4402                Save that for later.  */
4403             rtx tem;
4404             rtx label = gen_label_rtx ();
4405             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4406             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4407                                 NULL_RTX, 0, OPTAB_WIDEN);
4408             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4409             expand_dec (quotient, const1_rtx);
4410             expand_inc (remainder, op1);
4411             emit_label (label);
4412             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4413           }
4414
4415         /* No luck with division elimination or divmod.  Have to do it
4416            by conditionally adjusting op0 *and* the result.  */
4417         {
4418           rtx label1, label2, label3, label4, label5;
4419           rtx adjusted_op0;
4420           rtx tem;
4421
4422           quotient = gen_reg_rtx (compute_mode);
4423           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4424           label1 = gen_label_rtx ();
4425           label2 = gen_label_rtx ();
4426           label3 = gen_label_rtx ();
4427           label4 = gen_label_rtx ();
4428           label5 = gen_label_rtx ();
4429           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4430           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4431           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4432                               quotient, 0, OPTAB_LIB_WIDEN);
4433           if (tem != quotient)
4434             emit_move_insn (quotient, tem);
4435           emit_jump_insn (gen_jump (label5));
4436           emit_barrier ();
4437           emit_label (label1);
4438           expand_inc (adjusted_op0, const1_rtx);
4439           emit_jump_insn (gen_jump (label4));
4440           emit_barrier ();
4441           emit_label (label2);
4442           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4443           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4444                               quotient, 0, OPTAB_LIB_WIDEN);
4445           if (tem != quotient)
4446             emit_move_insn (quotient, tem);
4447           emit_jump_insn (gen_jump (label5));
4448           emit_barrier ();
4449           emit_label (label3);
4450           expand_dec (adjusted_op0, const1_rtx);
4451           emit_label (label4);
4452           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4453                               quotient, 0, OPTAB_LIB_WIDEN);
4454           if (tem != quotient)
4455             emit_move_insn (quotient, tem);
4456           expand_dec (quotient, const1_rtx);
4457           emit_label (label5);
4458         }
4459         break;
4460
4461       case CEIL_DIV_EXPR:
4462       case CEIL_MOD_EXPR:
4463         if (unsignedp)
4464           {
4465             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4466               {
4467                 rtx t1, t2, t3;
4468                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4469                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4470                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4471                                    tquotient, 1);
4472                 t2 = expand_binop (compute_mode, and_optab, op0,
4473                                    GEN_INT (d - 1),
4474                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4475                 t3 = gen_reg_rtx (compute_mode);
4476                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4477                                       compute_mode, 1, 1);
4478                 if (t3 == 0)
4479                   {
4480                     rtx lab;
4481                     lab = gen_label_rtx ();
4482                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4483                     expand_inc (t1, const1_rtx);
4484                     emit_label (lab);
4485                     quotient = t1;
4486                   }
4487                 else
4488                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4489                                                           t1, t3),
4490                                             tquotient);
4491                 break;
4492               }
4493
4494             /* Try using an instruction that produces both the quotient and
4495                remainder, using truncation.  We can easily compensate the
4496                quotient or remainder to get ceiling rounding, once we have the
4497                remainder.  Notice that we compute also the final remainder
4498                value here, and return the result right away.  */
4499             if (target == 0 || GET_MODE (target) != compute_mode)
4500               target = gen_reg_rtx (compute_mode);
4501
4502             if (rem_flag)
4503               {
4504                 remainder = (REG_P (target)
4505                              ? target : gen_reg_rtx (compute_mode));
4506                 quotient = gen_reg_rtx (compute_mode);
4507               }
4508             else
4509               {
4510                 quotient = (REG_P (target)
4511                             ? target : gen_reg_rtx (compute_mode));
4512                 remainder = gen_reg_rtx (compute_mode);
4513               }
4514
4515             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4516                                      remainder, 1))
4517               {
4518                 /* This could be computed with a branch-less sequence.
4519                    Save that for later.  */
4520                 rtx label = gen_label_rtx ();
4521                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4522                                  compute_mode, label);
4523                 expand_inc (quotient, const1_rtx);
4524                 expand_dec (remainder, op1);
4525                 emit_label (label);
4526                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4527               }
4528
4529             /* No luck with division elimination or divmod.  Have to do it
4530                by conditionally adjusting op0 *and* the result.  */
4531             {
4532               rtx label1, label2;
4533               rtx adjusted_op0, tem;
4534
4535               quotient = gen_reg_rtx (compute_mode);
4536               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4537               label1 = gen_label_rtx ();
4538               label2 = gen_label_rtx ();
4539               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4540                                compute_mode, label1);
4541               emit_move_insn  (quotient, const0_rtx);
4542               emit_jump_insn (gen_jump (label2));
4543               emit_barrier ();
4544               emit_label (label1);
4545               expand_dec (adjusted_op0, const1_rtx);
4546               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4547                                   quotient, 1, OPTAB_LIB_WIDEN);
4548               if (tem != quotient)
4549                 emit_move_insn (quotient, tem);
4550               expand_inc (quotient, const1_rtx);
4551               emit_label (label2);
4552             }
4553           }
4554         else /* signed */
4555           {
4556             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4557                 && INTVAL (op1) >= 0)
4558               {
4559                 /* This is extremely similar to the code for the unsigned case
4560                    above.  For 2.7 we should merge these variants, but for
4561                    2.6.1 I don't want to touch the code for unsigned since that
4562                    get used in C.  The signed case will only be used by other
4563                    languages (Ada).  */
4564
4565                 rtx t1, t2, t3;
4566                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4567                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4568                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4569                                    tquotient, 0);
4570                 t2 = expand_binop (compute_mode, and_optab, op0,
4571                                    GEN_INT (d - 1),
4572                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4573                 t3 = gen_reg_rtx (compute_mode);
4574                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4575                                       compute_mode, 1, 1);
4576                 if (t3 == 0)
4577                   {
4578                     rtx lab;
4579                     lab = gen_label_rtx ();
4580                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4581                     expand_inc (t1, const1_rtx);
4582                     emit_label (lab);
4583                     quotient = t1;
4584                   }
4585                 else
4586                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4587                                                           t1, t3),
4588                                             tquotient);
4589                 break;
4590               }
4591
4592             /* Try using an instruction that produces both the quotient and
4593                remainder, using truncation.  We can easily compensate the
4594                quotient or remainder to get ceiling rounding, once we have the
4595                remainder.  Notice that we compute also the final remainder
4596                value here, and return the result right away.  */
4597             if (target == 0 || GET_MODE (target) != compute_mode)
4598               target = gen_reg_rtx (compute_mode);
4599             if (rem_flag)
4600               {
4601                 remainder= (REG_P (target)
4602                             ? target : gen_reg_rtx (compute_mode));
4603                 quotient = gen_reg_rtx (compute_mode);
4604               }
4605             else
4606               {
4607                 quotient = (REG_P (target)
4608                             ? target : gen_reg_rtx (compute_mode));
4609                 remainder = gen_reg_rtx (compute_mode);
4610               }
4611
4612             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4613                                      remainder, 0))
4614               {
4615                 /* This could be computed with a branch-less sequence.
4616                    Save that for later.  */
4617                 rtx tem;
4618                 rtx label = gen_label_rtx ();
4619                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4620                                  compute_mode, label);
4621                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4622                                     NULL_RTX, 0, OPTAB_WIDEN);
4623                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4624                 expand_inc (quotient, const1_rtx);
4625                 expand_dec (remainder, op1);
4626                 emit_label (label);
4627                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4628               }
4629
4630             /* No luck with division elimination or divmod.  Have to do it
4631                by conditionally adjusting op0 *and* the result.  */
4632             {
4633               rtx label1, label2, label3, label4, label5;
4634               rtx adjusted_op0;
4635               rtx tem;
4636
4637               quotient = gen_reg_rtx (compute_mode);
4638               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4639               label1 = gen_label_rtx ();
4640               label2 = gen_label_rtx ();
4641               label3 = gen_label_rtx ();
4642               label4 = gen_label_rtx ();
4643               label5 = gen_label_rtx ();
4644               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4645               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4646                                compute_mode, label1);
4647               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4648                                   quotient, 0, OPTAB_LIB_WIDEN);
4649               if (tem != quotient)
4650                 emit_move_insn (quotient, tem);
4651               emit_jump_insn (gen_jump (label5));
4652               emit_barrier ();
4653               emit_label (label1);
4654               expand_dec (adjusted_op0, const1_rtx);
4655               emit_jump_insn (gen_jump (label4));
4656               emit_barrier ();
4657               emit_label (label2);
4658               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4659                                compute_mode, label3);
4660               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4661                                   quotient, 0, OPTAB_LIB_WIDEN);
4662               if (tem != quotient)
4663                 emit_move_insn (quotient, tem);
4664               emit_jump_insn (gen_jump (label5));
4665               emit_barrier ();
4666               emit_label (label3);
4667               expand_inc (adjusted_op0, const1_rtx);
4668               emit_label (label4);
4669               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4670                                   quotient, 0, OPTAB_LIB_WIDEN);
4671               if (tem != quotient)
4672                 emit_move_insn (quotient, tem);
4673               expand_inc (quotient, const1_rtx);
4674               emit_label (label5);
4675             }
4676           }
4677         break;
4678
4679       case EXACT_DIV_EXPR:
4680         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4681           {
4682             HOST_WIDE_INT d = INTVAL (op1);
4683             unsigned HOST_WIDE_INT ml;
4684             int pre_shift;
4685             rtx t1;
4686
4687             pre_shift = floor_log2 (d & -d);
4688             ml = invert_mod2n (d >> pre_shift, size);
4689             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4690                                build_int_cst (NULL_TREE, pre_shift),
4691                                NULL_RTX, unsignedp);
4692             quotient = expand_mult (compute_mode, t1,
4693                                     gen_int_mode (ml, compute_mode),
4694                                     NULL_RTX, 1);
4695
4696             insn = get_last_insn ();
4697             set_unique_reg_note (insn,
4698                                  REG_EQUAL,
4699                                  gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4700                                                  compute_mode,
4701                                                  op0, op1));
4702           }
4703         break;
4704
4705       case ROUND_DIV_EXPR:
4706       case ROUND_MOD_EXPR:
4707         if (unsignedp)
4708           {
4709             rtx tem;
4710             rtx label;
4711             label = gen_label_rtx ();
4712             quotient = gen_reg_rtx (compute_mode);
4713             remainder = gen_reg_rtx (compute_mode);
4714             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4715               {
4716                 rtx tem;
4717                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4718                                          quotient, 1, OPTAB_LIB_WIDEN);
4719                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4720                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4721                                           remainder, 1, OPTAB_LIB_WIDEN);
4722               }
4723             tem = plus_constant (op1, -1);
4724             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4725                                 build_int_cst (NULL_TREE, 1),
4726                                 NULL_RTX, 1);
4727             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4728             expand_inc (quotient, const1_rtx);
4729             expand_dec (remainder, op1);
4730             emit_label (label);
4731           }
4732         else
4733           {
4734             rtx abs_rem, abs_op1, tem, mask;
4735             rtx label;
4736             label = gen_label_rtx ();
4737             quotient = gen_reg_rtx (compute_mode);
4738             remainder = gen_reg_rtx (compute_mode);
4739             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4740               {
4741                 rtx tem;
4742                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4743                                          quotient, 0, OPTAB_LIB_WIDEN);
4744                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4745                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4746                                           remainder, 0, OPTAB_LIB_WIDEN);
4747               }
4748             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4749             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4750             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4751                                 build_int_cst (NULL_TREE, 1),
4752                                 NULL_RTX, 1);
4753             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4754             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4755                                 NULL_RTX, 0, OPTAB_WIDEN);
4756             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4757                                  build_int_cst (NULL_TREE, size - 1),
4758                                  NULL_RTX, 0);
4759             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4760                                 NULL_RTX, 0, OPTAB_WIDEN);
4761             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4762                                 NULL_RTX, 0, OPTAB_WIDEN);
4763             expand_inc (quotient, tem);
4764             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4765                                 NULL_RTX, 0, OPTAB_WIDEN);
4766             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4767                                 NULL_RTX, 0, OPTAB_WIDEN);
4768             expand_dec (remainder, tem);
4769             emit_label (label);
4770           }
4771         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4772
4773       default:
4774         gcc_unreachable ();
4775       }
4776
4777   if (quotient == 0)
4778     {
4779       if (target && GET_MODE (target) != compute_mode)
4780         target = 0;
4781
4782       if (rem_flag)
4783         {
4784           /* Try to produce the remainder without producing the quotient.
4785              If we seem to have a divmod pattern that does not require widening,
4786              don't try widening here.  We should really have a WIDEN argument
4787              to expand_twoval_binop, since what we'd really like to do here is
4788              1) try a mod insn in compute_mode
4789              2) try a divmod insn in compute_mode
4790              3) try a div insn in compute_mode and multiply-subtract to get
4791                 remainder
4792              4) try the same things with widening allowed.  */
4793           remainder
4794             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4795                                  op0, op1, target,
4796                                  unsignedp,
4797                                  ((optab2->handlers[compute_mode].insn_code
4798                                    != CODE_FOR_nothing)
4799                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4800           if (remainder == 0)
4801             {
4802               /* No luck there.  Can we do remainder and divide at once
4803                  without a library call?  */
4804               remainder = gen_reg_rtx (compute_mode);
4805               if (! expand_twoval_binop ((unsignedp
4806                                           ? udivmod_optab
4807                                           : sdivmod_optab),
4808                                          op0, op1,
4809                                          NULL_RTX, remainder, unsignedp))
4810                 remainder = 0;
4811             }
4812
4813           if (remainder)
4814             return gen_lowpart (mode, remainder);
4815         }
4816
4817       /* Produce the quotient.  Try a quotient insn, but not a library call.
4818          If we have a divmod in this mode, use it in preference to widening
4819          the div (for this test we assume it will not fail). Note that optab2
4820          is set to the one of the two optabs that the call below will use.  */
4821       quotient
4822         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4823                              op0, op1, rem_flag ? NULL_RTX : target,
4824                              unsignedp,
4825                              ((optab2->handlers[compute_mode].insn_code
4826                                != CODE_FOR_nothing)
4827                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4828
4829       if (quotient == 0)
4830         {
4831           /* No luck there.  Try a quotient-and-remainder insn,
4832              keeping the quotient alone.  */
4833           quotient = gen_reg_rtx (compute_mode);
4834           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4835                                      op0, op1,
4836                                      quotient, NULL_RTX, unsignedp))
4837             {
4838               quotient = 0;
4839               if (! rem_flag)
4840                 /* Still no luck.  If we are not computing the remainder,
4841                    use a library call for the quotient.  */
4842                 quotient = sign_expand_binop (compute_mode,
4843                                               udiv_optab, sdiv_optab,
4844                                               op0, op1, target,
4845                                               unsignedp, OPTAB_LIB_WIDEN);
4846             }
4847         }
4848     }
4849
4850   if (rem_flag)
4851     {
4852       if (target && GET_MODE (target) != compute_mode)
4853         target = 0;
4854
4855       if (quotient == 0)
4856         {
4857           /* No divide instruction either.  Use library for remainder.  */
4858           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4859                                          op0, op1, target,
4860                                          unsignedp, OPTAB_LIB_WIDEN);
4861           /* No remainder function.  Try a quotient-and-remainder
4862              function, keeping the remainder.  */
4863           if (!remainder)
4864             {
4865               remainder = gen_reg_rtx (compute_mode);
4866               if (!expand_twoval_binop_libfunc
4867                   (unsignedp ? udivmod_optab : sdivmod_optab,
4868                    op0, op1,
4869                    NULL_RTX, remainder,
4870                    unsignedp ? UMOD : MOD))
4871                 remainder = NULL_RTX;
4872             }
4873         }
4874       else
4875         {
4876           /* We divided.  Now finish doing X - Y * (X / Y).  */
4877           remainder = expand_mult (compute_mode, quotient, op1,
4878                                    NULL_RTX, unsignedp);
4879           remainder = expand_binop (compute_mode, sub_optab, op0,
4880                                     remainder, target, unsignedp,
4881                                     OPTAB_LIB_WIDEN);
4882         }
4883     }
4884
4885   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4886 }
4887 \f
4888 /* Return a tree node with data type TYPE, describing the value of X.
4889    Usually this is an VAR_DECL, if there is no obvious better choice.
4890    X may be an expression, however we only support those expressions
4891    generated by loop.c.  */
4892
4893 tree
4894 make_tree (tree type, rtx x)
4895 {
4896   tree t;
4897
4898   switch (GET_CODE (x))
4899     {
4900     case CONST_INT:
4901       {
4902         HOST_WIDE_INT hi = 0;
4903
4904         if (INTVAL (x) < 0
4905             && !(TYPE_UNSIGNED (type)
4906                  && (GET_MODE_BITSIZE (TYPE_MODE (type))
4907                      < HOST_BITS_PER_WIDE_INT)))
4908           hi = -1;
4909
4910         t = build_int_cst_wide (type, INTVAL (x), hi);
4911
4912         return t;
4913       }
4914
4915     case CONST_DOUBLE:
4916       if (GET_MODE (x) == VOIDmode)
4917         t = build_int_cst_wide (type,
4918                                 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4919       else
4920         {
4921           REAL_VALUE_TYPE d;
4922
4923           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4924           t = build_real (type, d);
4925         }
4926
4927       return t;
4928
4929     case CONST_VECTOR:
4930       {
4931         int i, units;
4932         rtx elt;
4933         tree t = NULL_TREE;
4934
4935         units = CONST_VECTOR_NUNITS (x);
4936
4937         /* Build a tree with vector elements.  */
4938         for (i = units - 1; i >= 0; --i)
4939           {
4940             elt = CONST_VECTOR_ELT (x, i);
4941             t = tree_cons (NULL_TREE, make_tree (type, elt), t);
4942           }
4943
4944         return build_vector (type, t);
4945       }
4946
4947     case PLUS:
4948       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4949                           make_tree (type, XEXP (x, 1)));
4950
4951     case MINUS:
4952       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4953                           make_tree (type, XEXP (x, 1)));
4954
4955     case NEG:
4956       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
4957
4958     case MULT:
4959       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4960                           make_tree (type, XEXP (x, 1)));
4961
4962     case ASHIFT:
4963       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4964                           make_tree (type, XEXP (x, 1)));
4965
4966     case LSHIFTRT:
4967       t = lang_hooks.types.unsigned_type (type);
4968       return fold_convert (type, build2 (RSHIFT_EXPR, t,
4969                                          make_tree (t, XEXP (x, 0)),
4970                                          make_tree (type, XEXP (x, 1))));
4971
4972     case ASHIFTRT:
4973       t = lang_hooks.types.signed_type (type);
4974       return fold_convert (type, build2 (RSHIFT_EXPR, t,
4975                                          make_tree (t, XEXP (x, 0)),
4976                                          make_tree (type, XEXP (x, 1))));
4977
4978     case DIV:
4979       if (TREE_CODE (type) != REAL_TYPE)
4980         t = lang_hooks.types.signed_type (type);
4981       else
4982         t = type;
4983
4984       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
4985                                          make_tree (t, XEXP (x, 0)),
4986                                          make_tree (t, XEXP (x, 1))));
4987     case UDIV:
4988       t = lang_hooks.types.unsigned_type (type);
4989       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
4990                                          make_tree (t, XEXP (x, 0)),
4991                                          make_tree (t, XEXP (x, 1))));
4992
4993     case SIGN_EXTEND:
4994     case ZERO_EXTEND:
4995       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
4996                                           GET_CODE (x) == ZERO_EXTEND);
4997       return fold_convert (type, make_tree (t, XEXP (x, 0)));
4998
4999     default:
5000       t = build_decl (VAR_DECL, NULL_TREE, type);
5001
5002       /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
5003          ptr_mode.  So convert.  */
5004       if (POINTER_TYPE_P (type))
5005         x = convert_memory_address (TYPE_MODE (type), x);
5006
5007       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5008          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5009       t->decl_with_rtl.rtl = x;
5010
5011       return t;
5012     }
5013 }
5014
5015 /* Check whether the multiplication X * MULT + ADD overflows.
5016    X, MULT and ADD must be CONST_*.
5017    MODE is the machine mode for the computation.
5018    X and MULT must have mode MODE.  ADD may have a different mode.
5019    So can X (defaults to same as MODE).
5020    UNSIGNEDP is nonzero to do unsigned multiplication.  */
5021
5022 bool
5023 const_mult_add_overflow_p (rtx x, rtx mult, rtx add,
5024                            enum machine_mode mode, int unsignedp)
5025 {
5026   tree type, mult_type, add_type, result;
5027
5028   type = lang_hooks.types.type_for_mode (mode, unsignedp);
5029
5030   /* In order to get a proper overflow indication from an unsigned
5031      type, we have to pretend that it's a sizetype.  */
5032   mult_type = type;
5033   if (unsignedp)
5034     {
5035       /* FIXME:It would be nice if we could step directly from this
5036          type to its sizetype equivalent.  */
5037       mult_type = build_distinct_type_copy (type);
5038       TYPE_IS_SIZETYPE (mult_type) = 1;
5039     }
5040
5041   add_type = (GET_MODE (add) == VOIDmode ? mult_type
5042               : lang_hooks.types.type_for_mode (GET_MODE (add), unsignedp));
5043
5044   result = fold_build2 (PLUS_EXPR, mult_type,
5045                         fold_build2 (MULT_EXPR, mult_type,
5046                                      make_tree (mult_type, x),
5047                                      make_tree (mult_type, mult)),
5048                         make_tree (add_type, add));
5049
5050   return TREE_CONSTANT_OVERFLOW (result);
5051 }
5052
5053 /* Return an rtx representing the value of X * MULT + ADD.
5054    TARGET is a suggestion for where to store the result (an rtx).
5055    MODE is the machine mode for the computation.
5056    X and MULT must have mode MODE.  ADD may have a different mode.
5057    So can X (defaults to same as MODE).
5058    UNSIGNEDP is nonzero to do unsigned multiplication.
5059    This may emit insns.  */
5060
5061 rtx
5062 expand_mult_add (rtx x, rtx target, rtx mult, rtx add, enum machine_mode mode,
5063                  int unsignedp)
5064 {
5065   tree type = lang_hooks.types.type_for_mode (mode, unsignedp);
5066   tree add_type = (GET_MODE (add) == VOIDmode
5067                    ? type: lang_hooks.types.type_for_mode (GET_MODE (add),
5068                                                            unsignedp));
5069   tree result = fold_build2 (PLUS_EXPR, type,
5070                              fold_build2 (MULT_EXPR, type,
5071                                           make_tree (type, x),
5072                                           make_tree (type, mult)),
5073                              make_tree (add_type, add));
5074
5075   return expand_expr (result, target, VOIDmode, 0);
5076 }
5077 \f
5078 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5079    and returning TARGET.
5080
5081    If TARGET is 0, a pseudo-register or constant is returned.  */
5082
5083 rtx
5084 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5085 {
5086   rtx tem = 0;
5087
5088   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5089     tem = simplify_binary_operation (AND, mode, op0, op1);
5090   if (tem == 0)
5091     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5092
5093   if (target == 0)
5094     target = tem;
5095   else if (tem != target)
5096     emit_move_insn (target, tem);
5097   return target;
5098 }
5099 \f
5100 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5101    and storing in TARGET.  Normally return TARGET.
5102    Return 0 if that cannot be done.
5103
5104    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5105    it is VOIDmode, they cannot both be CONST_INT.
5106
5107    UNSIGNEDP is for the case where we have to widen the operands
5108    to perform the operation.  It says to use zero-extension.
5109
5110    NORMALIZEP is 1 if we should convert the result to be either zero
5111    or one.  Normalize is -1 if we should convert the result to be
5112    either zero or -1.  If NORMALIZEP is zero, the result will be left
5113    "raw" out of the scc insn.  */
5114
5115 rtx
5116 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5117                  enum machine_mode mode, int unsignedp, int normalizep)
5118 {
5119   rtx subtarget;
5120   enum insn_code icode;
5121   enum machine_mode compare_mode;
5122   enum machine_mode target_mode = GET_MODE (target);
5123   rtx tem;
5124   rtx last = get_last_insn ();
5125   rtx pattern, comparison;
5126
5127   if (unsignedp)
5128     code = unsigned_condition (code);
5129
5130   /* If one operand is constant, make it the second one.  Only do this
5131      if the other operand is not constant as well.  */
5132
5133   if (swap_commutative_operands_p (op0, op1))
5134     {
5135       tem = op0;
5136       op0 = op1;
5137       op1 = tem;
5138       code = swap_condition (code);
5139     }
5140
5141   if (mode == VOIDmode)
5142     mode = GET_MODE (op0);
5143
5144   /* For some comparisons with 1 and -1, we can convert this to
5145      comparisons with zero.  This will often produce more opportunities for
5146      store-flag insns.  */
5147
5148   switch (code)
5149     {
5150     case LT:
5151       if (op1 == const1_rtx)
5152         op1 = const0_rtx, code = LE;
5153       break;
5154     case LE:
5155       if (op1 == constm1_rtx)
5156         op1 = const0_rtx, code = LT;
5157       break;
5158     case GE:
5159       if (op1 == const1_rtx)
5160         op1 = const0_rtx, code = GT;
5161       break;
5162     case GT:
5163       if (op1 == constm1_rtx)
5164         op1 = const0_rtx, code = GE;
5165       break;
5166     case GEU:
5167       if (op1 == const1_rtx)
5168         op1 = const0_rtx, code = NE;
5169       break;
5170     case LTU:
5171       if (op1 == const1_rtx)
5172         op1 = const0_rtx, code = EQ;
5173       break;
5174     default:
5175       break;
5176     }
5177
5178   /* If we are comparing a double-word integer with zero or -1, we can
5179      convert the comparison into one involving a single word.  */
5180   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5181       && GET_MODE_CLASS (mode) == MODE_INT
5182       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5183     {
5184       if ((code == EQ || code == NE)
5185           && (op1 == const0_rtx || op1 == constm1_rtx))
5186         {
5187           rtx op00, op01, op0both;
5188
5189           /* Do a logical OR or AND of the two words and compare the result.  */
5190           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5191           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5192           op0both = expand_binop (word_mode,
5193                                   op1 == const0_rtx ? ior_optab : and_optab,
5194                                   op00, op01, NULL_RTX, unsignedp, OPTAB_DIRECT);
5195
5196           if (op0both != 0)
5197             return emit_store_flag (target, code, op0both, op1, word_mode,
5198                                     unsignedp, normalizep);
5199         }
5200       else if ((code == LT || code == GE) && op1 == const0_rtx)
5201         {
5202           rtx op0h;
5203
5204           /* If testing the sign bit, can just test on high word.  */
5205           op0h = simplify_gen_subreg (word_mode, op0, mode,
5206                                       subreg_highpart_offset (word_mode, mode));
5207           return emit_store_flag (target, code, op0h, op1, word_mode,
5208                                   unsignedp, normalizep);
5209         }
5210     }
5211
5212   /* From now on, we won't change CODE, so set ICODE now.  */
5213   icode = setcc_gen_code[(int) code];
5214
5215   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5216      complement of A (for GE) and shifting the sign bit to the low bit.  */
5217   if (op1 == const0_rtx && (code == LT || code == GE)
5218       && GET_MODE_CLASS (mode) == MODE_INT
5219       && (normalizep || STORE_FLAG_VALUE == 1
5220           || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5221               && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5222                   == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
5223     {
5224       subtarget = target;
5225
5226       /* If the result is to be wider than OP0, it is best to convert it
5227          first.  If it is to be narrower, it is *incorrect* to convert it
5228          first.  */
5229       if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5230         {
5231           op0 = convert_modes (target_mode, mode, op0, 0);
5232           mode = target_mode;
5233         }
5234
5235       if (target_mode != mode)
5236         subtarget = 0;
5237
5238       if (code == GE)
5239         op0 = expand_unop (mode, one_cmpl_optab, op0,
5240                            ((STORE_FLAG_VALUE == 1 || normalizep)
5241                             ? 0 : subtarget), 0);
5242
5243       if (STORE_FLAG_VALUE == 1 || normalizep)
5244         /* If we are supposed to produce a 0/1 value, we want to do
5245            a logical shift from the sign bit to the low-order bit; for
5246            a -1/0 value, we do an arithmetic shift.  */
5247         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5248                             size_int (GET_MODE_BITSIZE (mode) - 1),
5249                             subtarget, normalizep != -1);
5250
5251       if (mode != target_mode)
5252         op0 = convert_modes (target_mode, mode, op0, 0);
5253
5254       return op0;
5255     }
5256
5257   if (icode != CODE_FOR_nothing)
5258     {
5259       insn_operand_predicate_fn pred;
5260
5261       /* We think we may be able to do this with a scc insn.  Emit the
5262          comparison and then the scc insn.  */
5263
5264       do_pending_stack_adjust ();
5265       last = get_last_insn ();
5266
5267       comparison
5268         = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX);
5269       if (CONSTANT_P (comparison))
5270         {
5271           switch (GET_CODE (comparison))
5272             {
5273             case CONST_INT:
5274               if (comparison == const0_rtx)
5275                 return const0_rtx;
5276               break;
5277
5278 #ifdef FLOAT_STORE_FLAG_VALUE
5279             case CONST_DOUBLE:
5280               if (comparison == CONST0_RTX (GET_MODE (comparison)))
5281                 return const0_rtx;
5282               break;
5283 #endif
5284             default:
5285               gcc_unreachable ();
5286             }
5287
5288           if (normalizep == 1)
5289             return const1_rtx;
5290           if (normalizep == -1)
5291             return constm1_rtx;
5292           return const_true_rtx;
5293         }
5294
5295       /* The code of COMPARISON may not match CODE if compare_from_rtx
5296          decided to swap its operands and reverse the original code.
5297
5298          We know that compare_from_rtx returns either a CONST_INT or
5299          a new comparison code, so it is safe to just extract the
5300          code from COMPARISON.  */
5301       code = GET_CODE (comparison);
5302
5303       /* Get a reference to the target in the proper mode for this insn.  */
5304       compare_mode = insn_data[(int) icode].operand[0].mode;
5305       subtarget = target;
5306       pred = insn_data[(int) icode].operand[0].predicate;
5307       if (optimize || ! (*pred) (subtarget, compare_mode))
5308         subtarget = gen_reg_rtx (compare_mode);
5309
5310       pattern = GEN_FCN (icode) (subtarget);
5311       if (pattern)
5312         {
5313           emit_insn (pattern);
5314
5315           /* If we are converting to a wider mode, first convert to
5316              TARGET_MODE, then normalize.  This produces better combining
5317              opportunities on machines that have a SIGN_EXTRACT when we are
5318              testing a single bit.  This mostly benefits the 68k.
5319
5320              If STORE_FLAG_VALUE does not have the sign bit set when
5321              interpreted in COMPARE_MODE, we can do this conversion as
5322              unsigned, which is usually more efficient.  */
5323           if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (compare_mode))
5324             {
5325               convert_move (target, subtarget,
5326                             (GET_MODE_BITSIZE (compare_mode)
5327                              <= HOST_BITS_PER_WIDE_INT)
5328                             && 0 == (STORE_FLAG_VALUE
5329                                      & ((HOST_WIDE_INT) 1
5330                                         << (GET_MODE_BITSIZE (compare_mode) -1))));
5331               op0 = target;
5332               compare_mode = target_mode;
5333             }
5334           else
5335             op0 = subtarget;
5336
5337           /* If we want to keep subexpressions around, don't reuse our
5338              last target.  */
5339
5340           if (optimize)
5341             subtarget = 0;
5342
5343           /* Now normalize to the proper value in COMPARE_MODE.  Sometimes
5344              we don't have to do anything.  */
5345           if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5346             ;
5347           /* STORE_FLAG_VALUE might be the most negative number, so write
5348              the comparison this way to avoid a compiler-time warning.  */
5349           else if (- normalizep == STORE_FLAG_VALUE)
5350             op0 = expand_unop (compare_mode, neg_optab, op0, subtarget, 0);
5351
5352           /* We don't want to use STORE_FLAG_VALUE < 0 below since this
5353              makes it hard to use a value of just the sign bit due to
5354              ANSI integer constant typing rules.  */
5355           else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_WIDE_INT
5356                    && (STORE_FLAG_VALUE
5357                        & ((HOST_WIDE_INT) 1
5358                           << (GET_MODE_BITSIZE (compare_mode) - 1))))
5359             op0 = expand_shift (RSHIFT_EXPR, compare_mode, op0,
5360                                 size_int (GET_MODE_BITSIZE (compare_mode) - 1),
5361                                 subtarget, normalizep == 1);
5362           else
5363             {
5364               gcc_assert (STORE_FLAG_VALUE & 1);
5365
5366               op0 = expand_and (compare_mode, op0, const1_rtx, subtarget);
5367               if (normalizep == -1)
5368                 op0 = expand_unop (compare_mode, neg_optab, op0, op0, 0);
5369             }
5370
5371           /* If we were converting to a smaller mode, do the
5372              conversion now.  */
5373           if (target_mode != compare_mode)
5374             {
5375               convert_move (target, op0, 0);
5376               return target;
5377             }
5378           else
5379             return op0;
5380         }
5381     }
5382
5383   delete_insns_since (last);
5384
5385   /* If optimizing, use different pseudo registers for each insn, instead
5386      of reusing the same pseudo.  This leads to better CSE, but slows
5387      down the compiler, since there are more pseudos */
5388   subtarget = (!optimize
5389                && (target_mode == mode)) ? target : NULL_RTX;
5390
5391   /* If we reached here, we can't do this with a scc insn.  However, there
5392      are some comparisons that can be done directly.  For example, if
5393      this is an equality comparison of integers, we can try to exclusive-or
5394      (or subtract) the two operands and use a recursive call to try the
5395      comparison with zero.  Don't do any of these cases if branches are
5396      very cheap.  */
5397
5398   if (BRANCH_COST > 0
5399       && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
5400       && op1 != const0_rtx)
5401     {
5402       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5403                           OPTAB_WIDEN);
5404
5405       if (tem == 0)
5406         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5407                             OPTAB_WIDEN);
5408       if (tem != 0)
5409         tem = emit_store_flag (target, code, tem, const0_rtx,
5410                                mode, unsignedp, normalizep);
5411       if (tem == 0)
5412         delete_insns_since (last);
5413       return tem;
5414     }
5415
5416   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5417      the constant zero.  Reject all other comparisons at this point.  Only
5418      do LE and GT if branches are expensive since they are expensive on
5419      2-operand machines.  */
5420
5421   if (BRANCH_COST == 0
5422       || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
5423       || (code != EQ && code != NE
5424           && (BRANCH_COST <= 1 || (code != LE && code != GT))))
5425     return 0;
5426
5427   /* See what we need to return.  We can only return a 1, -1, or the
5428      sign bit.  */
5429
5430   if (normalizep == 0)
5431     {
5432       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5433         normalizep = STORE_FLAG_VALUE;
5434
5435       else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5436                && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5437                    == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
5438         ;
5439       else
5440         return 0;
5441     }
5442
5443   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5444      do the necessary operation below.  */
5445
5446   tem = 0;
5447
5448   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5449      the sign bit set.  */
5450
5451   if (code == LE)
5452     {
5453       /* This is destructive, so SUBTARGET can't be OP0.  */
5454       if (rtx_equal_p (subtarget, op0))
5455         subtarget = 0;
5456
5457       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5458                           OPTAB_WIDEN);
5459       if (tem)
5460         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5461                             OPTAB_WIDEN);
5462     }
5463
5464   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5465      number of bits in the mode of OP0, minus one.  */
5466
5467   if (code == GT)
5468     {
5469       if (rtx_equal_p (subtarget, op0))
5470         subtarget = 0;
5471
5472       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5473                           size_int (GET_MODE_BITSIZE (mode) - 1),
5474                           subtarget, 0);
5475       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5476                           OPTAB_WIDEN);
5477     }
5478
5479   if (code == EQ || code == NE)
5480     {
5481       /* For EQ or NE, one way to do the comparison is to apply an operation
5482          that converts the operand into a positive number if it is nonzero
5483          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5484          for NE we negate.  This puts the result in the sign bit.  Then we
5485          normalize with a shift, if needed.
5486
5487          Two operations that can do the above actions are ABS and FFS, so try
5488          them.  If that doesn't work, and MODE is smaller than a full word,
5489          we can use zero-extension to the wider mode (an unsigned conversion)
5490          as the operation.  */
5491
5492       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5493          that is compensated by the subsequent overflow when subtracting
5494          one / negating.  */
5495
5496       if (abs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
5497         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5498       else if (ffs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
5499         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5500       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5501         {
5502           tem = convert_modes (word_mode, mode, op0, 1);
5503           mode = word_mode;
5504         }
5505
5506       if (tem != 0)
5507         {
5508           if (code == EQ)
5509             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5510                                 0, OPTAB_WIDEN);
5511           else
5512             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5513         }
5514
5515       /* If we couldn't do it that way, for NE we can "or" the two's complement
5516          of the value with itself.  For EQ, we take the one's complement of
5517          that "or", which is an extra insn, so we only handle EQ if branches
5518          are expensive.  */
5519
5520       if (tem == 0 && (code == NE || BRANCH_COST > 1))
5521         {
5522           if (rtx_equal_p (subtarget, op0))
5523             subtarget = 0;
5524
5525           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5526           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5527                               OPTAB_WIDEN);
5528
5529           if (tem && code == EQ)
5530             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5531         }
5532     }
5533
5534   if (tem && normalizep)
5535     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5536                         size_int (GET_MODE_BITSIZE (mode) - 1),
5537                         subtarget, normalizep == 1);
5538
5539   if (tem)
5540     {
5541       if (GET_MODE (tem) != target_mode)
5542         {
5543           convert_move (target, tem, 0);
5544           tem = target;
5545         }
5546       else if (!subtarget)
5547         {
5548           emit_move_insn (target, tem);
5549           tem = target;
5550         }
5551     }
5552   else
5553     delete_insns_since (last);
5554
5555   return tem;
5556 }
5557
5558 /* Like emit_store_flag, but always succeeds.  */
5559
5560 rtx
5561 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5562                        enum machine_mode mode, int unsignedp, int normalizep)
5563 {
5564   rtx tem, label;
5565
5566   /* First see if emit_store_flag can do the job.  */
5567   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5568   if (tem != 0)
5569     return tem;
5570
5571   if (normalizep == 0)
5572     normalizep = 1;
5573
5574   /* If this failed, we have to do this with set/compare/jump/set code.  */
5575
5576   if (!REG_P (target)
5577       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5578     target = gen_reg_rtx (GET_MODE (target));
5579
5580   emit_move_insn (target, const1_rtx);
5581   label = gen_label_rtx ();
5582   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5583                            NULL_RTX, label);
5584
5585   emit_move_insn (target, const0_rtx);
5586   emit_label (label);
5587
5588   return target;
5589 }
5590 \f
5591 /* Perform possibly multi-word comparison and conditional jump to LABEL
5592    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE
5593
5594    The algorithm is based on the code in expr.c:do_jump.
5595
5596    Note that this does not perform a general comparison.  Only
5597    variants generated within expmed.c are correctly handled, others
5598    could be handled if needed.  */
5599
5600 static void
5601 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5602                  rtx label)
5603 {
5604   /* If this mode is an integer too wide to compare properly,
5605      compare word by word.  Rely on cse to optimize constant cases.  */
5606
5607   if (GET_MODE_CLASS (mode) == MODE_INT
5608       && ! can_compare_p (op, mode, ccp_jump))
5609     {
5610       rtx label2 = gen_label_rtx ();
5611
5612       switch (op)
5613         {
5614         case LTU:
5615           do_jump_by_parts_greater_rtx (mode, 1, arg2, arg1, label2, label);
5616           break;
5617
5618         case LEU:
5619           do_jump_by_parts_greater_rtx (mode, 1, arg1, arg2, label, label2);
5620           break;
5621
5622         case LT:
5623           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label2, label);
5624           break;
5625
5626         case GT:
5627           do_jump_by_parts_greater_rtx (mode, 0, arg1, arg2, label2, label);
5628           break;
5629
5630         case GE:
5631           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label, label2);
5632           break;
5633
5634           /* do_jump_by_parts_equality_rtx compares with zero.  Luckily
5635              that's the only equality operations we do */
5636         case EQ:
5637           gcc_assert (arg2 == const0_rtx && mode == GET_MODE(arg1));
5638           do_jump_by_parts_equality_rtx (arg1, label2, label);
5639           break;
5640
5641         case NE:
5642           gcc_assert (arg2 == const0_rtx && mode == GET_MODE(arg1));
5643           do_jump_by_parts_equality_rtx (arg1, label, label2);
5644           break;
5645
5646         default:
5647           gcc_unreachable ();
5648         }
5649
5650       emit_label (label2);
5651     }
5652   else
5653     emit_cmp_and_jump_insns (arg1, arg2, op, NULL_RTX, mode, 0, label);
5654 }