gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 2, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING.  If not, write to the Free
  20 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  21 02111-1307, USA.  */
  22
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "coretypes.h"
  27 #include "tm.h"
  28 #include "toplev.h"
  29 #include "rtl.h"
  30 #include "tree.h"
  31 #include "tm_p.h"
  32 #include "flags.h"
  33 #include "insn-config.h"
  34 #include "expr.h"
  35 #include "optabs.h"
  36 #include "real.h"
  37 #include "recog.h"
  38 #include "langhooks.h"
  39
  40 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  41                                    unsigned HOST_WIDE_INT,
  42                                    unsigned HOST_WIDE_INT, rtx);
  43 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  44                                    unsigned HOST_WIDE_INT, rtx);
  45 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  46                                     unsigned HOST_WIDE_INT,
  47                                     unsigned HOST_WIDE_INT,
  48                                     unsigned HOST_WIDE_INT, rtx, int);
  49 static rtx mask_rtx (enum machine_mode, int, int, int);
  50 static rtx lshift_value (enum machine_mode, rtx, int, int);
  51 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  52                                     unsigned HOST_WIDE_INT, int);
  53 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  54
  55 /* Nonzero means divides or modulus operations are relatively cheap for
  56    powers of two, so don't use branches; emit the operation instead.
  57    Usually, this will mean that the MD file will emit non-branch
  58    sequences.  */
  59
  60 static int sdiv_pow2_cheap, smod_pow2_cheap;
  61
  62 #ifndef SLOW_UNALIGNED_ACCESS
  63 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  64 #endif
  65
  66 /* For compilers that support multiple targets with different word sizes,
  67    MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
  68    is the H8/300(H) compiler.  */
  69
  70 #ifndef MAX_BITS_PER_WORD
  71 #define MAX_BITS_PER_WORD BITS_PER_WORD
  72 #endif
  73
  74 /* Reduce conditional compilation elsewhere.  */
  75 #ifndef HAVE_insv
  76 #define HAVE_insv       0
  77 #define CODE_FOR_insv   CODE_FOR_nothing
  78 #define gen_insv(a,b,c,d) NULL_RTX
  79 #endif
  80 #ifndef HAVE_extv
  81 #define HAVE_extv       0
  82 #define CODE_FOR_extv   CODE_FOR_nothing
  83 #define gen_extv(a,b,c,d) NULL_RTX
  84 #endif
  85 #ifndef HAVE_extzv
  86 #define HAVE_extzv      0
  87 #define CODE_FOR_extzv  CODE_FOR_nothing
  88 #define gen_extzv(a,b,c,d) NULL_RTX
  89 #endif
  90
  91 /* Cost of various pieces of RTL.  Note that some of these are indexed by
  92    shift count and some by mode.  */
  93 static int add_cost, negate_cost, zero_cost;
  94 static int shift_cost[MAX_BITS_PER_WORD];
  95 static int shiftadd_cost[MAX_BITS_PER_WORD];
  96 static int shiftsub_cost[MAX_BITS_PER_WORD];
  97 static int mul_cost[NUM_MACHINE_MODES];
  98 static int div_cost[NUM_MACHINE_MODES];
  99 static int mul_widen_cost[NUM_MACHINE_MODES];
 100 static int mul_highpart_cost[NUM_MACHINE_MODES];
 101
 102 void
 103 init_expmed (void)
 104 {
 105   rtx reg, shift_insn, shiftadd_insn, shiftsub_insn;
 106   int dummy;
 107   int m;
 108   enum machine_mode mode, wider_mode;
 109
 110   start_sequence ();
 111
 112   /* This is "some random pseudo register" for purposes of calling recog
 113      to see what insns exist.  */
 114   reg = gen_rtx_REG (word_mode, 10000);
 115
 116   zero_cost = rtx_cost (const0_rtx, 0);
 117   add_cost = rtx_cost (gen_rtx_PLUS (word_mode, reg, reg), SET);
 118
 119   shift_insn = emit_insn (gen_rtx_SET (VOIDmode, reg,
 120                                        gen_rtx_ASHIFT (word_mode, reg,
 121                                                        const0_rtx)));
 122
 123   shiftadd_insn
 124     = emit_insn (gen_rtx_SET (VOIDmode, reg,
 125                               gen_rtx_PLUS (word_mode,
 126                                             gen_rtx_MULT (word_mode,
 127                                                           reg, const0_rtx),
 128                                             reg)));
 129
 130   shiftsub_insn
 131     = emit_insn (gen_rtx_SET (VOIDmode, reg,
 132                               gen_rtx_MINUS (word_mode,
 133                                              gen_rtx_MULT (word_mode,
 134                                                            reg, const0_rtx),
 135                                              reg)));
 136
 137   init_recog ();
 138
 139   shift_cost[0] = 0;
 140   shiftadd_cost[0] = shiftsub_cost[0] = add_cost;
 141
 142   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 143     {
 144       rtx c_int = GEN_INT ((HOST_WIDE_INT) 1 << m);
 145       shift_cost[m] = shiftadd_cost[m] = shiftsub_cost[m] = 32000;
 146
 147       XEXP (SET_SRC (PATTERN (shift_insn)), 1) = GEN_INT (m);
 148       if (recog (PATTERN (shift_insn), shift_insn, &dummy) >= 0)
 149         shift_cost[m] = rtx_cost (SET_SRC (PATTERN (shift_insn)), SET);
 150
 151       XEXP (XEXP (SET_SRC (PATTERN (shiftadd_insn)), 0), 1) = c_int;
 152       if (recog (PATTERN (shiftadd_insn), shiftadd_insn, &dummy) >= 0)
 153         shiftadd_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftadd_insn)), SET);
 154
 155       XEXP (XEXP (SET_SRC (PATTERN (shiftsub_insn)), 0), 1) = c_int;
 156       if (recog (PATTERN (shiftsub_insn), shiftsub_insn, &dummy) >= 0)
 157         shiftsub_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftsub_insn)), SET);
 158     }
 159
 160   negate_cost = rtx_cost (gen_rtx_NEG (word_mode, reg), SET);
 161
 162   sdiv_pow2_cheap
 163     = (rtx_cost (gen_rtx_DIV (word_mode, reg, GEN_INT (32)), SET)
 164        <= 2 * add_cost);
 165   smod_pow2_cheap
 166     = (rtx_cost (gen_rtx_MOD (word_mode, reg, GEN_INT (32)), SET)
 167        <= 2 * add_cost);
 168
 169   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 170        mode != VOIDmode;
 171        mode = GET_MODE_WIDER_MODE (mode))
 172     {
 173       reg = gen_rtx_REG (mode, 10000);
 174       div_cost[(int) mode] = rtx_cost (gen_rtx_UDIV (mode, reg, reg), SET);
 175       mul_cost[(int) mode] = rtx_cost (gen_rtx_MULT (mode, reg, reg), SET);
 176       wider_mode = GET_MODE_WIDER_MODE (mode);
 177       if (wider_mode != VOIDmode)
 178         {
 179           mul_widen_cost[(int) wider_mode]
 180             = rtx_cost (gen_rtx_MULT (wider_mode,
 181                                       gen_rtx_ZERO_EXTEND (wider_mode, reg),
 182                                       gen_rtx_ZERO_EXTEND (wider_mode, reg)),
 183                         SET);
 184           mul_highpart_cost[(int) mode]
 185             = rtx_cost (gen_rtx_TRUNCATE
 186                         (mode,
 187                          gen_rtx_LSHIFTRT (wider_mode,
 188                                            gen_rtx_MULT (wider_mode,
 189                                                          gen_rtx_ZERO_EXTEND
 190                                                          (wider_mode, reg),
 191                                                          gen_rtx_ZERO_EXTEND
 192                                                          (wider_mode, reg)),
 193                                            GEN_INT (GET_MODE_BITSIZE (mode)))),
 194                         SET);
 195         }
 196     }
 197
 198   end_sequence ();
 199 }
 200
 201 /* Return an rtx representing minus the value of X.
 202    MODE is the intended mode of the result,
 203    useful if X is a CONST_INT.  */
 204
 205 rtx
 206 negate_rtx (enum machine_mode mode, rtx x)
 207 {
 208   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 209
 210   if (result == 0)
 211     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 212
 213   return result;
 214 }
 215
 216 /* Report on the availability of insv/extv/extzv and the desired mode
 217    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
 218    is false; else the mode of the specified operand.  If OPNO is -1,
 219    all the caller cares about is whether the insn is available.  */
 220 enum machine_mode
 221 mode_for_extraction (enum extraction_pattern pattern, int opno)
 222 {
 223   const struct insn_data *data;
 224
 225   switch (pattern)
 226     {
 227     case EP_insv:
 228       if (HAVE_insv)
 229         {
 230           data = &insn_data[CODE_FOR_insv];
 231           break;
 232         }
 233       return MAX_MACHINE_MODE;
 234
 235     case EP_extv:
 236       if (HAVE_extv)
 237         {
 238           data = &insn_data[CODE_FOR_extv];
 239           break;
 240         }
 241       return MAX_MACHINE_MODE;
 242
 243     case EP_extzv:
 244       if (HAVE_extzv)
 245         {
 246           data = &insn_data[CODE_FOR_extzv];
 247           break;
 248         }
 249       return MAX_MACHINE_MODE;
 250
 251     default:
 252       abort ();
 253     }
 254
 255   if (opno == -1)
 256     return VOIDmode;
 257
 258   /* Everyone who uses this function used to follow it with
 259      if (result == VOIDmode) result = word_mode; */
 260   if (data->operand[opno].mode == VOIDmode)
 261     return word_mode;
 262   return data->operand[opno].mode;
 263 }
 264
 265 \f
 266 /* Generate code to store value from rtx VALUE
 267    into a bit-field within structure STR_RTX
 268    containing BITSIZE bits starting at bit BITNUM.
 269    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
 270    ALIGN is the alignment that STR_RTX is known to have.
 271    TOTAL_SIZE is the size of the structure in bytes, or -1 if varying.  */
 272
 273 /* ??? Note that there are two different ideas here for how
 274    to determine the size to count bits within, for a register.
 275    One is BITS_PER_WORD, and the other is the size of operand 3
 276    of the insv pattern.
 277
 278    If operand 3 of the insv pattern is VOIDmode, then we will use BITS_PER_WORD
 279    else, we use the mode of operand 3.  */
 280
 281 rtx
 282 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 283                  unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
 284                  rtx value, HOST_WIDE_INT total_size)
 285 {
 286   unsigned int unit
 287     = (GET_CODE (str_rtx) == MEM) ? BITS_PER_UNIT : BITS_PER_WORD;
 288   unsigned HOST_WIDE_INT offset = bitnum / unit;
 289   unsigned HOST_WIDE_INT bitpos = bitnum % unit;
 290   rtx op0 = str_rtx;
 291   int byte_offset;
 292   rtx orig_value;
 293
 294   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
 295
 296   /* Discount the part of the structure before the desired byte.
 297      We need to know how many bytes are safe to reference after it.  */
 298   if (total_size >= 0)
 299     total_size -= (bitpos / BIGGEST_ALIGNMENT
 300                    * (BIGGEST_ALIGNMENT / BITS_PER_UNIT));
 301
 302   while (GET_CODE (op0) == SUBREG)
 303     {
 304       /* The following line once was done only if WORDS_BIG_ENDIAN,
 305          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 306          meaningful at a much higher level; when structures are copied
 307          between memory and regs, the higher-numbered regs
 308          always get higher addresses.  */
 309       offset += (SUBREG_BYTE (op0) / UNITS_PER_WORD);
 310       /* We used to adjust BITPOS here, but now we do the whole adjustment
 311          right after the loop.  */
 312       op0 = SUBREG_REG (op0);
 313     }
 314
 315   value = protect_from_queue (value, 0);
 316
 317   /* Use vec_extract patterns for extracting parts of vectors whenever
 318      available.  */
 319   if (VECTOR_MODE_P (GET_MODE (op0))
 320       && GET_CODE (op0) != MEM
 321       && (vec_set_optab->handlers[(int)GET_MODE (op0)].insn_code
 322           != CODE_FOR_nothing)
 323       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 324       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 325       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 326     {
 327       enum machine_mode outermode = GET_MODE (op0);
 328       enum machine_mode innermode = GET_MODE_INNER (outermode);
 329       int icode = (int) vec_set_optab->handlers[(int) outermode].insn_code;
 330       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 331       rtx rtxpos = GEN_INT (pos);
 332       rtx src = value;
 333       rtx dest = op0;
 334       rtx pat, seq;
 335       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
 336       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
 337       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
 338
 339       start_sequence ();
 340
 341       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
 342         src = copy_to_mode_reg (mode1, src);
 343
 344       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
 345         rtxpos = copy_to_mode_reg (mode1, rtxpos);
 346
 347       /* We could handle this, but we should always be called with a pseudo
 348          for our targets and all insns should take them as outputs.  */
 349       if (! (*insn_data[icode].operand[0].predicate) (dest, mode0)
 350           || ! (*insn_data[icode].operand[1].predicate) (src, mode1)
 351           || ! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
 352         abort ();
 353       pat = GEN_FCN (icode) (dest, src, rtxpos);
 354       seq = get_insns ();
 355       end_sequence ();
 356       if (pat)
 357         {
 358           emit_insn (seq);
 359           emit_insn (pat);
 360           return dest;
 361         }
 362     }
 363
 364   if (flag_force_mem)
 365     {
 366       int old_generating_concat_p = generating_concat_p;
 367       generating_concat_p = 0;
 368       value = force_not_mem (value);
 369       generating_concat_p = old_generating_concat_p;
 370     }
 371
 372   /* If the target is a register, overwriting the entire object, or storing
 373      a full-word or multi-word field can be done with just a SUBREG.
 374
 375      If the target is memory, storing any naturally aligned field can be
 376      done with a simple store.  For targets that support fast unaligned
 377      memory, any naturally sized, unit aligned field can be done directly.  */
 378
 379   byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 380                 + (offset * UNITS_PER_WORD);
 381
 382   if (bitpos == 0
 383       && bitsize == GET_MODE_BITSIZE (fieldmode)
 384       && (GET_CODE (op0) != MEM
 385           ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 386              || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
 387              && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
 388           : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 389              || (offset * BITS_PER_UNIT % bitsize == 0
 390                  && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
 391     {
 392       if (GET_MODE (op0) != fieldmode)
 393         {
 394           if (GET_CODE (op0) == SUBREG)
 395             {
 396               if (GET_MODE (SUBREG_REG (op0)) == fieldmode
 397                   || GET_MODE_CLASS (fieldmode) == MODE_INT
 398                   || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT)
 399                 op0 = SUBREG_REG (op0);
 400               else
 401                 /* Else we've got some float mode source being extracted into
 402                    a different float mode destination -- this combination of
 403                    subregs results in Severe Tire Damage.  */
 404                 abort ();
 405             }
 406           if (GET_CODE (op0) == REG)
 407             op0 = gen_rtx_SUBREG (fieldmode, op0, byte_offset);
 408           else
 409             op0 = adjust_address (op0, fieldmode, offset);
 410         }
 411       emit_move_insn (op0, value);
 412       return value;
 413     }
 414
 415   /* Make sure we are playing with integral modes.  Pun with subregs
 416      if we aren't.  This must come after the entire register case above,
 417      since that case is valid for any mode.  The following cases are only
 418      valid for integral modes.  */
 419   {
 420     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 421     if (imode != GET_MODE (op0))
 422       {
 423         if (GET_CODE (op0) == MEM)
 424           op0 = adjust_address (op0, imode, 0);
 425         else if (imode != BLKmode)
 426           op0 = gen_lowpart (imode, op0);
 427         else
 428           abort ();
 429       }
 430   }
 431
 432   /* We may be accessing data outside the field, which means
 433      we can alias adjacent data.  */
 434   if (GET_CODE (op0) == MEM)
 435     {
 436       op0 = shallow_copy_rtx (op0);
 437       set_mem_alias_set (op0, 0);
 438       set_mem_expr (op0, 0);
 439     }
 440
 441   /* If OP0 is a register, BITPOS must count within a word.
 442      But as we have it, it counts within whatever size OP0 now has.
 443      On a bigendian machine, these are not the same, so convert.  */
 444   if (BYTES_BIG_ENDIAN
 445       && GET_CODE (op0) != MEM
 446       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 447     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 448
 449   /* Storing an lsb-aligned field in a register
 450      can be done with a movestrict instruction.  */
 451
 452   if (GET_CODE (op0) != MEM
 453       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 454       && bitsize == GET_MODE_BITSIZE (fieldmode)
 455       && (movstrict_optab->handlers[(int) fieldmode].insn_code
 456           != CODE_FOR_nothing))
 457     {
 458       int icode = movstrict_optab->handlers[(int) fieldmode].insn_code;
 459
 460       /* Get appropriate low part of the value being stored.  */
 461       if (GET_CODE (value) == CONST_INT || GET_CODE (value) == REG)
 462         value = gen_lowpart (fieldmode, value);
 463       else if (!(GET_CODE (value) == SYMBOL_REF
 464                  || GET_CODE (value) == LABEL_REF
 465                  || GET_CODE (value) == CONST))
 466         value = convert_to_mode (fieldmode, value, 0);
 467
 468       if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
 469         value = copy_to_mode_reg (fieldmode, value);
 470
 471       if (GET_CODE (op0) == SUBREG)
 472         {
 473           if (GET_MODE (SUBREG_REG (op0)) == fieldmode
 474               || GET_MODE_CLASS (fieldmode) == MODE_INT
 475               || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT)
 476             op0 = SUBREG_REG (op0);
 477           else
 478             /* Else we've got some float mode source being extracted into
 479                a different float mode destination -- this combination of
 480                subregs results in Severe Tire Damage.  */
 481             abort ();
 482         }
 483
 484       emit_insn (GEN_FCN (icode)
 485                  (gen_rtx_SUBREG (fieldmode, op0,
 486                                   (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 487                                   + (offset * UNITS_PER_WORD)),
 488                                   value));
 489
 490       return value;
 491     }
 492
 493   /* Handle fields bigger than a word.  */
 494
 495   if (bitsize > BITS_PER_WORD)
 496     {
 497       /* Here we transfer the words of the field
 498          in the order least significant first.
 499          This is because the most significant word is the one which may
 500          be less than full.
 501          However, only do that if the value is not BLKmode.  */
 502
 503       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 504       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 505       unsigned int i;
 506
 507       /* This is the mode we must force value to, so that there will be enough
 508          subwords to extract.  Note that fieldmode will often (always?) be
 509          VOIDmode, because that is what store_field uses to indicate that this
 510          is a bit field, but passing VOIDmode to operand_subword_force will
 511          result in an abort.  */
 512       fieldmode = GET_MODE (value);
 513       if (fieldmode == VOIDmode)
 514         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 515
 516       for (i = 0; i < nwords; i++)
 517         {
 518           /* If I is 0, use the low-order word in both field and target;
 519              if I is 1, use the next to lowest word; and so on.  */
 520           unsigned int wordnum = (backwards ? nwords - i - 1 : i);
 521           unsigned int bit_offset = (backwards
 522                                      ? MAX ((int) bitsize - ((int) i + 1)
 523                                             * BITS_PER_WORD,
 524                                             0)
 525                                      : (int) i * BITS_PER_WORD);
 526
 527           store_bit_field (op0, MIN (BITS_PER_WORD,
 528                                      bitsize - i * BITS_PER_WORD),
 529                            bitnum + bit_offset, word_mode,
 530                            operand_subword_force (value, wordnum, fieldmode),
 531                            total_size);
 532         }
 533       return value;
 534     }
 535
 536   /* From here on we can assume that the field to be stored in is
 537      a full-word (whatever type that is), since it is shorter than a word.  */
 538
 539   /* OFFSET is the number of words or bytes (UNIT says which)
 540      from STR_RTX to the first word or byte containing part of the field.  */
 541
 542   if (GET_CODE (op0) != MEM)
 543     {
 544       if (offset != 0
 545           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 546         {
 547           if (GET_CODE (op0) != REG)
 548             {
 549               /* Since this is a destination (lvalue), we can't copy it to a
 550                  pseudo.  We can trivially remove a SUBREG that does not
 551                  change the size of the operand.  Such a SUBREG may have been
 552                  added above.  Otherwise, abort.  */
 553               if (GET_CODE (op0) == SUBREG
 554                   && (GET_MODE_SIZE (GET_MODE (op0))
 555                       == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))))
 556                 op0 = SUBREG_REG (op0);
 557               else
 558                 abort ();
 559             }
 560           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 561                                 op0, (offset * UNITS_PER_WORD));
 562         }
 563       offset = 0;
 564     }
 565   else
 566     op0 = protect_from_queue (op0, 1);
 567
 568   /* If VALUE is a floating-point mode, access it as an integer of the
 569      corresponding size.  This can occur on a machine with 64 bit registers
 570      that uses SFmode for float.  This can also occur for unaligned float
 571      structure fields.  */
 572   orig_value = value;
 573   if (GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 574       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 575     value = gen_lowpart ((GET_MODE (value) == VOIDmode
 576                           ? word_mode : int_mode_for_mode (GET_MODE (value))),
 577                          value);
 578
 579   /* Now OFFSET is nonzero only if OP0 is memory
 580      and is therefore always measured in bytes.  */
 581
 582   if (HAVE_insv
 583       && GET_MODE (value) != BLKmode
 584       && !(bitsize == 1 && GET_CODE (value) == CONST_INT)
 585       /* Ensure insv's size is wide enough for this field.  */
 586       && (GET_MODE_BITSIZE (op_mode) >= bitsize)
 587       && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 588             && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))))
 589     {
 590       int xbitpos = bitpos;
 591       rtx value1;
 592       rtx xop0 = op0;
 593       rtx last = get_last_insn ();
 594       rtx pat;
 595       enum machine_mode maxmode = mode_for_extraction (EP_insv, 3);
 596       int save_volatile_ok = volatile_ok;
 597
 598       volatile_ok = 1;
 599
 600       /* If this machine's insv can only insert into a register, copy OP0
 601          into a register and save it back later.  */
 602       /* This used to check flag_force_mem, but that was a serious
 603          de-optimization now that flag_force_mem is enabled by -O2.  */
 604       if (GET_CODE (op0) == MEM
 605           && ! ((*insn_data[(int) CODE_FOR_insv].operand[0].predicate)
 606                 (op0, VOIDmode)))
 607         {
 608           rtx tempreg;
 609           enum machine_mode bestmode;
 610
 611           /* Get the mode to use for inserting into this field.  If OP0 is
 612              BLKmode, get the smallest mode consistent with the alignment. If
 613              OP0 is a non-BLKmode object that is no wider than MAXMODE, use its
 614              mode. Otherwise, use the smallest mode containing the field.  */
 615
 616           if (GET_MODE (op0) == BLKmode
 617               || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (maxmode))
 618             bestmode
 619               = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0), maxmode,
 620                                MEM_VOLATILE_P (op0));
 621           else
 622             bestmode = GET_MODE (op0);
 623
 624           if (bestmode == VOIDmode
 625               || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
 626                   && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
 627             goto insv_loses;
 628
 629           /* Adjust address to point to the containing unit of that mode.
 630              Compute offset as multiple of this unit, counting in bytes.  */
 631           unit = GET_MODE_BITSIZE (bestmode);
 632           offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 633           bitpos = bitnum % unit;
 634           op0 = adjust_address (op0, bestmode,  offset);
 635
 636           /* Fetch that unit, store the bitfield in it, then store
 637              the unit.  */
 638           tempreg = copy_to_reg (op0);
 639           store_bit_field (tempreg, bitsize, bitpos, fieldmode, orig_value,
 640                            total_size);
 641           emit_move_insn (op0, tempreg);
 642           return value;
 643         }
 644       volatile_ok = save_volatile_ok;
 645
 646       /* Add OFFSET into OP0's address.  */
 647       if (GET_CODE (xop0) == MEM)
 648         xop0 = adjust_address (xop0, byte_mode, offset);
 649
 650       /* If xop0 is a register, we need it in MAXMODE
 651          to make it acceptable to the format of insv.  */
 652       if (GET_CODE (xop0) == SUBREG)
 653         /* We can't just change the mode, because this might clobber op0,
 654            and we will need the original value of op0 if insv fails.  */
 655         xop0 = gen_rtx_SUBREG (maxmode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 656       if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
 657         xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
 658
 659       /* On big-endian machines, we count bits from the most significant.
 660          If the bit field insn does not, we must invert.  */
 661
 662       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 663         xbitpos = unit - bitsize - xbitpos;
 664
 665       /* We have been counting XBITPOS within UNIT.
 666          Count instead within the size of the register.  */
 667       if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
 668         xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
 669
 670       unit = GET_MODE_BITSIZE (maxmode);
 671
 672       /* Convert VALUE to maxmode (which insv insn wants) in VALUE1.  */
 673       value1 = value;
 674       if (GET_MODE (value) != maxmode)
 675         {
 676           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 677             {
 678               /* Optimization: Don't bother really extending VALUE
 679                  if it has all the bits we will actually use.  However,
 680                  if we must narrow it, be sure we do it correctly.  */
 681
 682               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (maxmode))
 683                 {
 684                   rtx tmp;
 685
 686                   tmp = simplify_subreg (maxmode, value1, GET_MODE (value), 0);
 687                   if (! tmp)
 688                     tmp = simplify_gen_subreg (maxmode,
 689                                                force_reg (GET_MODE (value),
 690                                                           value1),
 691                                                GET_MODE (value), 0);
 692                   value1 = tmp;
 693                 }
 694               else
 695                 value1 = gen_lowpart (maxmode, value1);
 696             }
 697           else if (GET_CODE (value) == CONST_INT)
 698             value1 = gen_int_mode (INTVAL (value), maxmode);
 699           else if (!CONSTANT_P (value))
 700             /* Parse phase is supposed to make VALUE's data type
 701                match that of the component reference, which is a type
 702                at least as wide as the field; so VALUE should have
 703                a mode that corresponds to that type.  */
 704             abort ();
 705         }
 706
 707       /* If this machine's insv insists on a register,
 708          get VALUE1 into a register.  */
 709       if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
 710              (value1, maxmode)))
 711         value1 = force_reg (maxmode, value1);
 712
 713       pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
 714       if (pat)
 715         emit_insn (pat);
 716       else
 717         {
 718           delete_insns_since (last);
 719           store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 720         }
 721     }
 722   else
 723     insv_loses:
 724     /* Insv is not available; store using shifts and boolean ops.  */
 725     store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 726   return value;
 727 }
 728 \f
 729 /* Use shifts and boolean operations to store VALUE
 730    into a bit field of width BITSIZE
 731    in a memory location specified by OP0 except offset by OFFSET bytes.
 732      (OFFSET must be 0 if OP0 is a register.)
 733    The field starts at position BITPOS within the byte.
 734     (If OP0 is a register, it may be a full word or a narrower mode,
 735      but BITPOS still counts within a full word,
 736      which is significant on bigendian machines.)
 737
 738    Note that protect_from_queue has already been done on OP0 and VALUE.  */
 739
 740 static void
 741 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
 742                        unsigned HOST_WIDE_INT bitsize,
 743                        unsigned HOST_WIDE_INT bitpos, rtx value)
 744 {
 745   enum machine_mode mode;
 746   unsigned int total_bits = BITS_PER_WORD;
 747   rtx subtarget, temp;
 748   int all_zero = 0;
 749   int all_one = 0;
 750
 751   /* There is a case not handled here:
 752      a structure with a known alignment of just a halfword
 753      and a field split across two aligned halfwords within the structure.
 754      Or likewise a structure with a known alignment of just a byte
 755      and a field split across two bytes.
 756      Such cases are not supposed to be able to occur.  */
 757
 758   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 759     {
 760       if (offset != 0)
 761         abort ();
 762       /* Special treatment for a bit field split across two registers.  */
 763       if (bitsize + bitpos > BITS_PER_WORD)
 764         {
 765           store_split_bit_field (op0, bitsize, bitpos, value);
 766           return;
 767         }
 768     }
 769   else
 770     {
 771       /* Get the proper mode to use for this field.  We want a mode that
 772          includes the entire field.  If such a mode would be larger than
 773          a word, we won't be doing the extraction the normal way.
 774          We don't want a mode bigger than the destination.  */
 775
 776       mode = GET_MODE (op0);
 777       if (GET_MODE_BITSIZE (mode) == 0
 778           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 779         mode = word_mode;
 780       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 781                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 782
 783       if (mode == VOIDmode)
 784         {
 785           /* The only way this should occur is if the field spans word
 786              boundaries.  */
 787           store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
 788                                  value);
 789           return;
 790         }
 791
 792       total_bits = GET_MODE_BITSIZE (mode);
 793
 794       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 795          be in the range 0 to total_bits-1, and put any excess bytes in
 796          OFFSET.  */
 797       if (bitpos >= total_bits)
 798         {
 799           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 800           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 801                      * BITS_PER_UNIT);
 802         }
 803
 804       /* Get ref to an aligned byte, halfword, or word containing the field.
 805          Adjust BITPOS to be position within a word,
 806          and OFFSET to be the offset of that word.
 807          Then alter OP0 to refer to that word.  */
 808       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 809       offset -= (offset % (total_bits / BITS_PER_UNIT));
 810       op0 = adjust_address (op0, mode, offset);
 811     }
 812
 813   mode = GET_MODE (op0);
 814
 815   /* Now MODE is either some integral mode for a MEM as OP0,
 816      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 817      The bit field is contained entirely within OP0.
 818      BITPOS is the starting bit number within OP0.
 819      (OP0's mode may actually be narrower than MODE.)  */
 820
 821   if (BYTES_BIG_ENDIAN)
 822       /* BITPOS is the distance between our msb
 823          and that of the containing datum.
 824          Convert it to the distance from the lsb.  */
 825       bitpos = total_bits - bitsize - bitpos;
 826
 827   /* Now BITPOS is always the distance between our lsb
 828      and that of OP0.  */
 829
 830   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 831      we must first convert its mode to MODE.  */
 832
 833   if (GET_CODE (value) == CONST_INT)
 834     {
 835       HOST_WIDE_INT v = INTVAL (value);
 836
 837       if (bitsize < HOST_BITS_PER_WIDE_INT)
 838         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 839
 840       if (v == 0)
 841         all_zero = 1;
 842       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 843                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 844                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 845         all_one = 1;
 846
 847       value = lshift_value (mode, value, bitpos, bitsize);
 848     }
 849   else
 850     {
 851       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 852                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 853
 854       if (GET_MODE (value) != mode)
 855         {
 856           if ((GET_CODE (value) == REG || GET_CODE (value) == SUBREG)
 857               && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
 858             value = gen_lowpart (mode, value);
 859           else
 860             value = convert_to_mode (mode, value, 1);
 861         }
 862
 863       if (must_and)
 864         value = expand_binop (mode, and_optab, value,
 865                               mask_rtx (mode, 0, bitsize, 0),
 866                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
 867       if (bitpos > 0)
 868         value = expand_shift (LSHIFT_EXPR, mode, value,
 869                               build_int_2 (bitpos, 0), NULL_RTX, 1);
 870     }
 871
 872   /* Now clear the chosen bits in OP0,
 873      except that if VALUE is -1 we need not bother.  */
 874
 875   subtarget = (GET_CODE (op0) == REG || ! flag_force_mem) ? op0 : 0;
 876
 877   if (! all_one)
 878     {
 879       temp = expand_binop (mode, and_optab, op0,
 880                            mask_rtx (mode, bitpos, bitsize, 1),
 881                            subtarget, 1, OPTAB_LIB_WIDEN);
 882       subtarget = temp;
 883     }
 884   else
 885     temp = op0;
 886
 887   /* Now logical-or VALUE into OP0, unless it is zero.  */
 888
 889   if (! all_zero)
 890     temp = expand_binop (mode, ior_optab, temp, value,
 891                          subtarget, 1, OPTAB_LIB_WIDEN);
 892   if (op0 != temp)
 893     emit_move_insn (op0, temp);
 894 }
 895 \f
 896 /* Store a bit field that is split across multiple accessible memory objects.
 897
 898    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
 899    BITSIZE is the field width; BITPOS the position of its first bit
 900    (within the word).
 901    VALUE is the value to store.
 902
 903    This does not yet handle fields wider than BITS_PER_WORD.  */
 904
 905 static void
 906 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
 907                        unsigned HOST_WIDE_INT bitpos, rtx value)
 908 {
 909   unsigned int unit;
 910   unsigned int bitsdone = 0;
 911
 912   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
 913      much at a time.  */
 914   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 915     unit = BITS_PER_WORD;
 916   else
 917     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
 918
 919   /* If VALUE is a constant other than a CONST_INT, get it into a register in
 920      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
 921      that VALUE might be a floating-point constant.  */
 922   if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
 923     {
 924       rtx word = gen_lowpart_common (word_mode, value);
 925
 926       if (word && (value != word))
 927         value = word;
 928       else
 929         value = gen_lowpart_common (word_mode,
 930                                     force_reg (GET_MODE (value) != VOIDmode
 931                                                ? GET_MODE (value)
 932                                                : word_mode, value));
 933     }
 934   else if (GET_CODE (value) == ADDRESSOF)
 935     value = copy_to_reg (value);
 936
 937   while (bitsdone < bitsize)
 938     {
 939       unsigned HOST_WIDE_INT thissize;
 940       rtx part, word;
 941       unsigned HOST_WIDE_INT thispos;
 942       unsigned HOST_WIDE_INT offset;
 943
 944       offset = (bitpos + bitsdone) / unit;
 945       thispos = (bitpos + bitsdone) % unit;
 946
 947       /* THISSIZE must not overrun a word boundary.  Otherwise,
 948          store_fixed_bit_field will call us again, and we will mutually
 949          recurse forever.  */
 950       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
 951       thissize = MIN (thissize, unit - thispos);
 952
 953       if (BYTES_BIG_ENDIAN)
 954         {
 955           int total_bits;
 956
 957           /* We must do an endian conversion exactly the same way as it is
 958              done in extract_bit_field, so that the two calls to
 959              extract_fixed_bit_field will have comparable arguments.  */
 960           if (GET_CODE (value) != MEM || GET_MODE (value) == BLKmode)
 961             total_bits = BITS_PER_WORD;
 962           else
 963             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
 964
 965           /* Fetch successively less significant portions.  */
 966           if (GET_CODE (value) == CONST_INT)
 967             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 968                              >> (bitsize - bitsdone - thissize))
 969                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
 970           else
 971             /* The args are chosen so that the last part includes the
 972                lsb.  Give extract_bit_field the value it needs (with
 973                endianness compensation) to fetch the piece we want.  */
 974             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
 975                                             total_bits - bitsize + bitsdone,
 976                                             NULL_RTX, 1);
 977         }
 978       else
 979         {
 980           /* Fetch successively more significant portions.  */
 981           if (GET_CODE (value) == CONST_INT)
 982             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 983                              >> bitsdone)
 984                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
 985           else
 986             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
 987                                             bitsdone, NULL_RTX, 1);
 988         }
 989
 990       /* If OP0 is a register, then handle OFFSET here.
 991
 992          When handling multiword bitfields, extract_bit_field may pass
 993          down a word_mode SUBREG of a larger REG for a bitfield that actually
 994          crosses a word boundary.  Thus, for a SUBREG, we must find
 995          the current word starting from the base register.  */
 996       if (GET_CODE (op0) == SUBREG)
 997         {
 998           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
 999           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1000                                         GET_MODE (SUBREG_REG (op0)));
1001           offset = 0;
1002         }
1003       else if (GET_CODE (op0) == REG)
1004         {
1005           word = operand_subword_force (op0, offset, GET_MODE (op0));
1006           offset = 0;
1007         }
1008       else
1009         word = op0;
1010
1011       /* OFFSET is in UNITs, and UNIT is in bits.
1012          store_fixed_bit_field wants offset in bytes.  */
1013       store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1014                              thispos, part);
1015       bitsdone += thissize;
1016     }
1017 }
1018 \f
1019 /* Generate code to extract a byte-field from STR_RTX
1020    containing BITSIZE bits, starting at BITNUM,
1021    and put it in TARGET if possible (if TARGET is nonzero).
1022    Regardless of TARGET, we return the rtx for where the value is placed.
1023    It may be a QUEUED.
1024
1025    STR_RTX is the structure containing the byte (a REG or MEM).
1026    UNSIGNEDP is nonzero if this is an unsigned bit field.
1027    MODE is the natural mode of the field value once extracted.
1028    TMODE is the mode the caller would like the value to have;
1029    but the value may be returned with type MODE instead.
1030
1031    TOTAL_SIZE is the size in bytes of the containing structure,
1032    or -1 if varying.
1033
1034    If a TARGET is specified and we can store in it at no extra cost,
1035    we do so, and return TARGET.
1036    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1037    if they are equally easy.  */
1038
1039 rtx
1040 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1041                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1042                    enum machine_mode mode, enum machine_mode tmode,
1043                    HOST_WIDE_INT total_size)
1044 {
1045   unsigned int unit
1046     = (GET_CODE (str_rtx) == MEM) ? BITS_PER_UNIT : BITS_PER_WORD;
1047   unsigned HOST_WIDE_INT offset = bitnum / unit;
1048   unsigned HOST_WIDE_INT bitpos = bitnum % unit;
1049   rtx op0 = str_rtx;
1050   rtx spec_target = target;
1051   rtx spec_target_subreg = 0;
1052   enum machine_mode int_mode;
1053   enum machine_mode extv_mode = mode_for_extraction (EP_extv, 0);
1054   enum machine_mode extzv_mode = mode_for_extraction (EP_extzv, 0);
1055   enum machine_mode mode1;
1056   int byte_offset;
1057
1058   /* Discount the part of the structure before the desired byte.
1059      We need to know how many bytes are safe to reference after it.  */
1060   if (total_size >= 0)
1061     total_size -= (bitpos / BIGGEST_ALIGNMENT
1062                    * (BIGGEST_ALIGNMENT / BITS_PER_UNIT));
1063
1064   if (tmode == VOIDmode)
1065     tmode = mode;
1066
1067   while (GET_CODE (op0) == SUBREG)
1068     {
1069       bitpos += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1070       if (bitpos > unit)
1071         {
1072           offset += (bitpos / unit);
1073           bitpos %= unit;
1074         }
1075       op0 = SUBREG_REG (op0);
1076     }
1077
1078   if (GET_CODE (op0) == REG
1079       && mode == GET_MODE (op0)
1080       && bitnum == 0
1081       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1082     {
1083       /* We're trying to extract a full register from itself.  */
1084       return op0;
1085     }
1086
1087   /* Use vec_extract patterns for extracting parts of vectors whenever
1088      available.  */
1089   if (VECTOR_MODE_P (GET_MODE (op0))
1090       && GET_CODE (op0) != MEM
1091       && (vec_extract_optab->handlers[(int)GET_MODE (op0)].insn_code
1092           != CODE_FOR_nothing)
1093       && ((bitsize + bitnum) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1094           == bitsize / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1095     {
1096       enum machine_mode outermode = GET_MODE (op0);
1097       enum machine_mode innermode = GET_MODE_INNER (outermode);
1098       int icode = (int) vec_extract_optab->handlers[(int) outermode].insn_code;
1099       int pos = bitnum / GET_MODE_BITSIZE (innermode);
1100       rtx rtxpos = GEN_INT (pos);
1101       rtx src = op0;
1102       rtx dest = NULL, pat, seq;
1103       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
1104       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
1105       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
1106
1107       if (innermode == tmode || innermode == mode)
1108         dest = target;
1109
1110       if (!dest)
1111         dest = gen_reg_rtx (innermode);
1112
1113       start_sequence ();
1114
1115       if (! (*insn_data[icode].operand[0].predicate) (dest, mode0))
1116         dest = copy_to_mode_reg (mode0, dest);
1117
1118       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
1119         src = copy_to_mode_reg (mode1, src);
1120
1121       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1122         rtxpos = copy_to_mode_reg (mode1, rtxpos);
1123
1124       /* We could handle this, but we should always be called with a pseudo
1125          for our targets and all insns should take them as outputs.  */
1126       if (! (*insn_data[icode].operand[0].predicate) (dest, mode0)
1127           || ! (*insn_data[icode].operand[1].predicate) (src, mode1)
1128           || ! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1129         abort ();
1130
1131       pat = GEN_FCN (icode) (dest, src, rtxpos);
1132       seq = get_insns ();
1133       end_sequence ();
1134       if (pat)
1135         {
1136           emit_insn (seq);
1137           emit_insn (pat);
1138           return dest;
1139         }
1140     }
1141
1142   /* Make sure we are playing with integral modes.  Pun with subregs
1143      if we aren't.  */
1144   {
1145     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1146     if (imode != GET_MODE (op0))
1147       {
1148         if (GET_CODE (op0) == MEM)
1149           op0 = adjust_address (op0, imode, 0);
1150         else if (imode != BLKmode)
1151           op0 = gen_lowpart (imode, op0);
1152         else
1153           abort ();
1154       }
1155   }
1156
1157   /* We may be accessing data outside the field, which means
1158      we can alias adjacent data.  */
1159   if (GET_CODE (op0) == MEM)
1160     {
1161       op0 = shallow_copy_rtx (op0);
1162       set_mem_alias_set (op0, 0);
1163       set_mem_expr (op0, 0);
1164     }
1165
1166   /* Extraction of a full-word or multi-word value from a structure
1167      in a register or aligned memory can be done with just a SUBREG.
1168      A subword value in the least significant part of a register
1169      can also be extracted with a SUBREG.  For this, we need the
1170      byte offset of the value in op0.  */
1171
1172   byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1173
1174   /* If OP0 is a register, BITPOS must count within a word.
1175      But as we have it, it counts within whatever size OP0 now has.
1176      On a bigendian machine, these are not the same, so convert.  */
1177   if (BYTES_BIG_ENDIAN
1178       && GET_CODE (op0) != MEM
1179       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1180     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1181
1182   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1183      If that's wrong, the solution is to test for it and set TARGET to 0
1184      if needed.  */
1185
1186   /* Only scalar integer modes can be converted via subregs.  There is an
1187      additional problem for FP modes here in that they can have a precision
1188      which is different from the size.  mode_for_size uses precision, but
1189      we want a mode based on the size, so we must avoid calling it for FP
1190      modes.  */
1191   mode1  = (SCALAR_INT_MODE_P (tmode)
1192             ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1193             : mode);
1194
1195   if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1196         && bitpos % BITS_PER_WORD == 0)
1197        || (mode1 != BLKmode
1198            /* ??? The big endian test here is wrong.  This is correct
1199               if the value is in a register, and if mode_for_size is not
1200               the same mode as op0.  This causes us to get unnecessarily
1201               inefficient code from the Thumb port when -mbig-endian.  */
1202            && (BYTES_BIG_ENDIAN
1203                ? bitpos + bitsize == BITS_PER_WORD
1204                : bitpos == 0)))
1205       && ((GET_CODE (op0) != MEM
1206            && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
1207                                      GET_MODE_BITSIZE (GET_MODE (op0)))
1208            && GET_MODE_SIZE (mode1) != 0
1209            && byte_offset % GET_MODE_SIZE (mode1) == 0)
1210           || (GET_CODE (op0) == MEM
1211               && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1212                   || (offset * BITS_PER_UNIT % bitsize == 0
1213                       && MEM_ALIGN (op0) % bitsize == 0)))))
1214     {
1215       if (mode1 != GET_MODE (op0))
1216         {
1217           if (GET_CODE (op0) == SUBREG)
1218             {
1219               if (GET_MODE (SUBREG_REG (op0)) == mode1
1220                   || GET_MODE_CLASS (mode1) == MODE_INT
1221                   || GET_MODE_CLASS (mode1) == MODE_PARTIAL_INT)
1222                 op0 = SUBREG_REG (op0);
1223               else
1224                 /* Else we've got some float mode source being extracted into
1225                    a different float mode destination -- this combination of
1226                    subregs results in Severe Tire Damage.  */
1227                 goto no_subreg_mode_swap;
1228             }
1229           if (GET_CODE (op0) == REG)
1230             op0 = gen_rtx_SUBREG (mode1, op0, byte_offset);
1231           else
1232             op0 = adjust_address (op0, mode1, offset);
1233         }
1234       if (mode1 != mode)
1235         return convert_to_mode (tmode, op0, unsignedp);
1236       return op0;
1237     }
1238  no_subreg_mode_swap:
1239
1240   /* Handle fields bigger than a word.  */
1241
1242   if (bitsize > BITS_PER_WORD)
1243     {
1244       /* Here we transfer the words of the field
1245          in the order least significant first.
1246          This is because the most significant word is the one which may
1247          be less than full.  */
1248
1249       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1250       unsigned int i;
1251
1252       if (target == 0 || GET_CODE (target) != REG)
1253         target = gen_reg_rtx (mode);
1254
1255       /* Indicate for flow that the entire target reg is being set.  */
1256       emit_insn (gen_rtx_CLOBBER (VOIDmode, target));
1257
1258       for (i = 0; i < nwords; i++)
1259         {
1260           /* If I is 0, use the low-order word in both field and target;
1261              if I is 1, use the next to lowest word; and so on.  */
1262           /* Word number in TARGET to use.  */
1263           unsigned int wordnum
1264             = (WORDS_BIG_ENDIAN
1265                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1266                : i);
1267           /* Offset from start of field in OP0.  */
1268           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1269                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1270                                                 * (int) BITS_PER_WORD))
1271                                      : (int) i * BITS_PER_WORD);
1272           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1273           rtx result_part
1274             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1275                                            bitsize - i * BITS_PER_WORD),
1276                                  bitnum + bit_offset, 1, target_part, mode,
1277                                  word_mode, total_size);
1278
1279           if (target_part == 0)
1280             abort ();
1281
1282           if (result_part != target_part)
1283             emit_move_insn (target_part, result_part);
1284         }
1285
1286       if (unsignedp)
1287         {
1288           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1289              need to be zero'd out.  */
1290           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1291             {
1292               unsigned int i, total_words;
1293
1294               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1295               for (i = nwords; i < total_words; i++)
1296                 emit_move_insn
1297                   (operand_subword (target,
1298                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1299                                     1, VOIDmode),
1300                    const0_rtx);
1301             }
1302           return target;
1303         }
1304
1305       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1306       target = expand_shift (LSHIFT_EXPR, mode, target,
1307                              build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1308                              NULL_RTX, 0);
1309       return expand_shift (RSHIFT_EXPR, mode, target,
1310                            build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1311                            NULL_RTX, 0);
1312     }
1313
1314   /* From here on we know the desired field is smaller than a word.  */
1315
1316   /* Check if there is a correspondingly-sized integer field, so we can
1317      safely extract it as one size of integer, if necessary; then
1318      truncate or extend to the size that is wanted; then use SUBREGs or
1319      convert_to_mode to get one of the modes we really wanted.  */
1320
1321   int_mode = int_mode_for_mode (tmode);
1322   if (int_mode == BLKmode)
1323     int_mode = int_mode_for_mode (mode);
1324   if (int_mode == BLKmode)
1325     abort ();    /* Should probably push op0 out to memory and then
1326                     do a load.  */
1327
1328   /* OFFSET is the number of words or bytes (UNIT says which)
1329      from STR_RTX to the first word or byte containing part of the field.  */
1330
1331   if (GET_CODE (op0) != MEM)
1332     {
1333       if (offset != 0
1334           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1335         {
1336           if (GET_CODE (op0) != REG)
1337             op0 = copy_to_reg (op0);
1338           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1339                                 op0, (offset * UNITS_PER_WORD));
1340         }
1341       offset = 0;
1342     }
1343   else
1344     op0 = protect_from_queue (str_rtx, 1);
1345
1346   /* Now OFFSET is nonzero only for memory operands.  */
1347
1348   if (unsignedp)
1349     {
1350       if (HAVE_extzv
1351           && (GET_MODE_BITSIZE (extzv_mode) >= bitsize)
1352           && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1353                 && (bitsize + bitpos > GET_MODE_BITSIZE (extzv_mode))))
1354         {
1355           unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1356           rtx bitsize_rtx, bitpos_rtx;
1357           rtx last = get_last_insn ();
1358           rtx xop0 = op0;
1359           rtx xtarget = target;
1360           rtx xspec_target = spec_target;
1361           rtx xspec_target_subreg = spec_target_subreg;
1362           rtx pat;
1363           enum machine_mode maxmode = mode_for_extraction (EP_extzv, 0);
1364
1365           if (GET_CODE (xop0) == MEM)
1366             {
1367               int save_volatile_ok = volatile_ok;
1368               volatile_ok = 1;
1369
1370               /* Is the memory operand acceptable?  */
1371               if (! ((*insn_data[(int) CODE_FOR_extzv].operand[1].predicate)
1372                      (xop0, GET_MODE (xop0))))
1373                 {
1374                   /* No, load into a reg and extract from there.  */
1375                   enum machine_mode bestmode;
1376
1377                   /* Get the mode to use for inserting into this field.  If
1378                      OP0 is BLKmode, get the smallest mode consistent with the
1379                      alignment. If OP0 is a non-BLKmode object that is no
1380                      wider than MAXMODE, use its mode. Otherwise, use the
1381                      smallest mode containing the field.  */
1382
1383                   if (GET_MODE (xop0) == BLKmode
1384                       || (GET_MODE_SIZE (GET_MODE (op0))
1385                           > GET_MODE_SIZE (maxmode)))
1386                     bestmode = get_best_mode (bitsize, bitnum,
1387                                               MEM_ALIGN (xop0), maxmode,
1388                                               MEM_VOLATILE_P (xop0));
1389                   else
1390                     bestmode = GET_MODE (xop0);
1391
1392                   if (bestmode == VOIDmode
1393                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1394                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1395                     goto extzv_loses;
1396
1397                   /* Compute offset as multiple of this unit,
1398                      counting in bytes.  */
1399                   unit = GET_MODE_BITSIZE (bestmode);
1400                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1401                   xbitpos = bitnum % unit;
1402                   xop0 = adjust_address (xop0, bestmode, xoffset);
1403
1404                   /* Fetch it to a register in that size.  */
1405                   xop0 = force_reg (bestmode, xop0);
1406
1407                   /* XBITPOS counts within UNIT, which is what is expected.  */
1408                 }
1409               else
1410                 /* Get ref to first byte containing part of the field.  */
1411                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1412
1413               volatile_ok = save_volatile_ok;
1414             }
1415
1416           /* If op0 is a register, we need it in MAXMODE (which is usually
1417              SImode). to make it acceptable to the format of extzv.  */
1418           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1419             goto extzv_loses;
1420           if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
1421             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1422
1423           /* On big-endian machines, we count bits from the most significant.
1424              If the bit field insn does not, we must invert.  */
1425           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1426             xbitpos = unit - bitsize - xbitpos;
1427
1428           /* Now convert from counting within UNIT to counting in MAXMODE.  */
1429           if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
1430             xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
1431
1432           unit = GET_MODE_BITSIZE (maxmode);
1433
1434           if (xtarget == 0
1435               || (flag_force_mem && GET_CODE (xtarget) == MEM))
1436             xtarget = xspec_target = gen_reg_rtx (tmode);
1437
1438           if (GET_MODE (xtarget) != maxmode)
1439             {
1440               if (GET_CODE (xtarget) == REG)
1441                 {
1442                   int wider = (GET_MODE_SIZE (maxmode)
1443                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1444                   xtarget = gen_lowpart (maxmode, xtarget);
1445                   if (wider)
1446                     xspec_target_subreg = xtarget;
1447                 }
1448               else
1449                 xtarget = gen_reg_rtx (maxmode);
1450             }
1451
1452           /* If this machine's extzv insists on a register target,
1453              make sure we have one.  */
1454           if (! ((*insn_data[(int) CODE_FOR_extzv].operand[0].predicate)
1455                  (xtarget, maxmode)))
1456             xtarget = gen_reg_rtx (maxmode);
1457
1458           bitsize_rtx = GEN_INT (bitsize);
1459           bitpos_rtx = GEN_INT (xbitpos);
1460
1461           pat = gen_extzv (protect_from_queue (xtarget, 1),
1462                            xop0, bitsize_rtx, bitpos_rtx);
1463           if (pat)
1464             {
1465               emit_insn (pat);
1466               target = xtarget;
1467               spec_target = xspec_target;
1468               spec_target_subreg = xspec_target_subreg;
1469             }
1470           else
1471             {
1472               delete_insns_since (last);
1473               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1474                                                 bitpos, target, 1);
1475             }
1476         }
1477       else
1478       extzv_loses:
1479         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1480                                           bitpos, target, 1);
1481     }
1482   else
1483     {
1484       if (HAVE_extv
1485           && (GET_MODE_BITSIZE (extv_mode) >= bitsize)
1486           && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1487                 && (bitsize + bitpos > GET_MODE_BITSIZE (extv_mode))))
1488         {
1489           int xbitpos = bitpos, xoffset = offset;
1490           rtx bitsize_rtx, bitpos_rtx;
1491           rtx last = get_last_insn ();
1492           rtx xop0 = op0, xtarget = target;
1493           rtx xspec_target = spec_target;
1494           rtx xspec_target_subreg = spec_target_subreg;
1495           rtx pat;
1496           enum machine_mode maxmode = mode_for_extraction (EP_extv, 0);
1497
1498           if (GET_CODE (xop0) == MEM)
1499             {
1500               /* Is the memory operand acceptable?  */
1501               if (! ((*insn_data[(int) CODE_FOR_extv].operand[1].predicate)
1502                      (xop0, GET_MODE (xop0))))
1503                 {
1504                   /* No, load into a reg and extract from there.  */
1505                   enum machine_mode bestmode;
1506
1507                   /* Get the mode to use for inserting into this field.  If
1508                      OP0 is BLKmode, get the smallest mode consistent with the
1509                      alignment. If OP0 is a non-BLKmode object that is no
1510                      wider than MAXMODE, use its mode. Otherwise, use the
1511                      smallest mode containing the field.  */
1512
1513                   if (GET_MODE (xop0) == BLKmode
1514                       || (GET_MODE_SIZE (GET_MODE (op0))
1515                           > GET_MODE_SIZE (maxmode)))
1516                     bestmode = get_best_mode (bitsize, bitnum,
1517                                               MEM_ALIGN (xop0), maxmode,
1518                                               MEM_VOLATILE_P (xop0));
1519                   else
1520                     bestmode = GET_MODE (xop0);
1521
1522                   if (bestmode == VOIDmode
1523                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1524                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1525                     goto extv_loses;
1526
1527                   /* Compute offset as multiple of this unit,
1528                      counting in bytes.  */
1529                   unit = GET_MODE_BITSIZE (bestmode);
1530                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1531                   xbitpos = bitnum % unit;
1532                   xop0 = adjust_address (xop0, bestmode, xoffset);
1533
1534                   /* Fetch it to a register in that size.  */
1535                   xop0 = force_reg (bestmode, xop0);
1536
1537                   /* XBITPOS counts within UNIT, which is what is expected.  */
1538                 }
1539               else
1540                 /* Get ref to first byte containing part of the field.  */
1541                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1542             }
1543
1544           /* If op0 is a register, we need it in MAXMODE (which is usually
1545              SImode) to make it acceptable to the format of extv.  */
1546           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1547             goto extv_loses;
1548           if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
1549             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1550
1551           /* On big-endian machines, we count bits from the most significant.
1552              If the bit field insn does not, we must invert.  */
1553           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1554             xbitpos = unit - bitsize - xbitpos;
1555
1556           /* XBITPOS counts within a size of UNIT.
1557              Adjust to count within a size of MAXMODE.  */
1558           if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
1559             xbitpos += (GET_MODE_BITSIZE (maxmode) - unit);
1560
1561           unit = GET_MODE_BITSIZE (maxmode);
1562
1563           if (xtarget == 0
1564               || (flag_force_mem && GET_CODE (xtarget) == MEM))
1565             xtarget = xspec_target = gen_reg_rtx (tmode);
1566
1567           if (GET_MODE (xtarget) != maxmode)
1568             {
1569               if (GET_CODE (xtarget) == REG)
1570                 {
1571                   int wider = (GET_MODE_SIZE (maxmode)
1572                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1573                   xtarget = gen_lowpart (maxmode, xtarget);
1574                   if (wider)
1575                     xspec_target_subreg = xtarget;
1576                 }
1577               else
1578                 xtarget = gen_reg_rtx (maxmode);
1579             }
1580
1581           /* If this machine's extv insists on a register target,
1582              make sure we have one.  */
1583           if (! ((*insn_data[(int) CODE_FOR_extv].operand[0].predicate)
1584                  (xtarget, maxmode)))
1585             xtarget = gen_reg_rtx (maxmode);
1586
1587           bitsize_rtx = GEN_INT (bitsize);
1588           bitpos_rtx = GEN_INT (xbitpos);
1589
1590           pat = gen_extv (protect_from_queue (xtarget, 1),
1591                           xop0, bitsize_rtx, bitpos_rtx);
1592           if (pat)
1593             {
1594               emit_insn (pat);
1595               target = xtarget;
1596               spec_target = xspec_target;
1597               spec_target_subreg = xspec_target_subreg;
1598             }
1599           else
1600             {
1601               delete_insns_since (last);
1602               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1603                                                 bitpos, target, 0);
1604             }
1605         }
1606       else
1607       extv_loses:
1608         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1609                                           bitpos, target, 0);
1610     }
1611   if (target == spec_target)
1612     return target;
1613   if (target == spec_target_subreg)
1614     return spec_target;
1615   if (GET_MODE (target) != tmode && GET_MODE (target) != mode)
1616     {
1617       /* If the target mode is floating-point, first convert to the
1618          integer mode of that size and then access it as a floating-point
1619          value via a SUBREG.  */
1620       if (GET_MODE_CLASS (tmode) != MODE_INT
1621           && GET_MODE_CLASS (tmode) != MODE_PARTIAL_INT)
1622         {
1623           target = convert_to_mode (mode_for_size (GET_MODE_BITSIZE (tmode),
1624                                                    MODE_INT, 0),
1625                                     target, unsignedp);
1626           return gen_lowpart (tmode, target);
1627         }
1628       else
1629         return convert_to_mode (tmode, target, unsignedp);
1630     }
1631   return target;
1632 }
1633 \f
1634 /* Extract a bit field using shifts and boolean operations
1635    Returns an rtx to represent the value.
1636    OP0 addresses a register (word) or memory (byte).
1637    BITPOS says which bit within the word or byte the bit field starts in.
1638    OFFSET says how many bytes farther the bit field starts;
1639     it is 0 if OP0 is a register.
1640    BITSIZE says how many bits long the bit field is.
1641     (If OP0 is a register, it may be narrower than a full word,
1642      but BITPOS still counts within a full word,
1643      which is significant on bigendian machines.)
1644
1645    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1646    If TARGET is nonzero, attempts to store the value there
1647    and return TARGET, but this is not guaranteed.
1648    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1649
1650 static rtx
1651 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1652                          unsigned HOST_WIDE_INT offset,
1653                          unsigned HOST_WIDE_INT bitsize,
1654                          unsigned HOST_WIDE_INT bitpos, rtx target,
1655                          int unsignedp)
1656 {
1657   unsigned int total_bits = BITS_PER_WORD;
1658   enum machine_mode mode;
1659
1660   if (GET_CODE (op0) == SUBREG || GET_CODE (op0) == REG)
1661     {
1662       /* Special treatment for a bit field split across two registers.  */
1663       if (bitsize + bitpos > BITS_PER_WORD)
1664         return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1665     }
1666   else
1667     {
1668       /* Get the proper mode to use for this field.  We want a mode that
1669          includes the entire field.  If such a mode would be larger than
1670          a word, we won't be doing the extraction the normal way.  */
1671
1672       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1673                             MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1674
1675       if (mode == VOIDmode)
1676         /* The only way this should occur is if the field spans word
1677            boundaries.  */
1678         return extract_split_bit_field (op0, bitsize,
1679                                         bitpos + offset * BITS_PER_UNIT,
1680                                         unsignedp);
1681
1682       total_bits = GET_MODE_BITSIZE (mode);
1683
1684       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1685          be in the range 0 to total_bits-1, and put any excess bytes in
1686          OFFSET.  */
1687       if (bitpos >= total_bits)
1688         {
1689           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1690           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1691                      * BITS_PER_UNIT);
1692         }
1693
1694       /* Get ref to an aligned byte, halfword, or word containing the field.
1695          Adjust BITPOS to be position within a word,
1696          and OFFSET to be the offset of that word.
1697          Then alter OP0 to refer to that word.  */
1698       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1699       offset -= (offset % (total_bits / BITS_PER_UNIT));
1700       op0 = adjust_address (op0, mode, offset);
1701     }
1702
1703   mode = GET_MODE (op0);
1704
1705   if (BYTES_BIG_ENDIAN)
1706     /* BITPOS is the distance between our msb and that of OP0.
1707        Convert it to the distance from the lsb.  */
1708     bitpos = total_bits - bitsize - bitpos;
1709
1710   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1711      We have reduced the big-endian case to the little-endian case.  */
1712
1713   if (unsignedp)
1714     {
1715       if (bitpos)
1716         {
1717           /* If the field does not already start at the lsb,
1718              shift it so it does.  */
1719           tree amount = build_int_2 (bitpos, 0);
1720           /* Maybe propagate the target for the shift.  */
1721           /* But not if we will return it--could confuse integrate.c.  */
1722           rtx subtarget = (target != 0 && GET_CODE (target) == REG
1723                            && !REG_FUNCTION_VALUE_P (target)
1724                            ? target : 0);
1725           if (tmode != mode) subtarget = 0;
1726           op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1727         }
1728       /* Convert the value to the desired mode.  */
1729       if (mode != tmode)
1730         op0 = convert_to_mode (tmode, op0, 1);
1731
1732       /* Unless the msb of the field used to be the msb when we shifted,
1733          mask out the upper bits.  */
1734
1735       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1736         return expand_binop (GET_MODE (op0), and_optab, op0,
1737                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1738                              target, 1, OPTAB_LIB_WIDEN);
1739       return op0;
1740     }
1741
1742   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1743      then arithmetic-shift its lsb to the lsb of the word.  */
1744   op0 = force_reg (mode, op0);
1745   if (mode != tmode)
1746     target = 0;
1747
1748   /* Find the narrowest integer mode that contains the field.  */
1749
1750   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1751        mode = GET_MODE_WIDER_MODE (mode))
1752     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1753       {
1754         op0 = convert_to_mode (mode, op0, 0);
1755         break;
1756       }
1757
1758   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1759     {
1760       tree amount
1761         = build_int_2 (GET_MODE_BITSIZE (mode) - (bitsize + bitpos), 0);
1762       /* Maybe propagate the target for the shift.  */
1763       /* But not if we will return the result--could confuse integrate.c.  */
1764       rtx subtarget = (target != 0 && GET_CODE (target) == REG
1765                        && ! REG_FUNCTION_VALUE_P (target)
1766                        ? target : 0);
1767       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1768     }
1769
1770   return expand_shift (RSHIFT_EXPR, mode, op0,
1771                        build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1772                        target, 0);
1773 }
1774 \f
1775 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1776    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1777    complement of that if COMPLEMENT.  The mask is truncated if
1778    necessary to the width of mode MODE.  The mask is zero-extended if
1779    BITSIZE+BITPOS is too small for MODE.  */
1780
1781 static rtx
1782 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1783 {
1784   HOST_WIDE_INT masklow, maskhigh;
1785
1786   if (bitsize == 0)
1787     masklow = 0;
1788   else if (bitpos < HOST_BITS_PER_WIDE_INT)
1789     masklow = (HOST_WIDE_INT) -1 << bitpos;
1790   else
1791     masklow = 0;
1792
1793   if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1794     masklow &= ((unsigned HOST_WIDE_INT) -1
1795                 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1796
1797   if (bitpos <= HOST_BITS_PER_WIDE_INT)
1798     maskhigh = -1;
1799   else
1800     maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1801
1802   if (bitsize == 0)
1803     maskhigh = 0;
1804   else if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1805     maskhigh &= ((unsigned HOST_WIDE_INT) -1
1806                  >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1807   else
1808     maskhigh = 0;
1809
1810   if (complement)
1811     {
1812       maskhigh = ~maskhigh;
1813       masklow = ~masklow;
1814     }
1815
1816   return immed_double_const (masklow, maskhigh, mode);
1817 }
1818
1819 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1820    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1821
1822 static rtx
1823 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1824 {
1825   unsigned HOST_WIDE_INT v = INTVAL (value);
1826   HOST_WIDE_INT low, high;
1827
1828   if (bitsize < HOST_BITS_PER_WIDE_INT)
1829     v &= ~((HOST_WIDE_INT) -1 << bitsize);
1830
1831   if (bitpos < HOST_BITS_PER_WIDE_INT)
1832     {
1833       low = v << bitpos;
1834       high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1835     }
1836   else
1837     {
1838       low = 0;
1839       high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1840     }
1841
1842   return immed_double_const (low, high, mode);
1843 }
1844 \f
1845 /* Extract a bit field that is split across two words
1846    and return an RTX for the result.
1847
1848    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1849    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1850    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1851
1852 static rtx
1853 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1854                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1855 {
1856   unsigned int unit;
1857   unsigned int bitsdone = 0;
1858   rtx result = NULL_RTX;
1859   int first = 1;
1860
1861   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1862      much at a time.  */
1863   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1864     unit = BITS_PER_WORD;
1865   else
1866     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1867
1868   while (bitsdone < bitsize)
1869     {
1870       unsigned HOST_WIDE_INT thissize;
1871       rtx part, word;
1872       unsigned HOST_WIDE_INT thispos;
1873       unsigned HOST_WIDE_INT offset;
1874
1875       offset = (bitpos + bitsdone) / unit;
1876       thispos = (bitpos + bitsdone) % unit;
1877
1878       /* THISSIZE must not overrun a word boundary.  Otherwise,
1879          extract_fixed_bit_field will call us again, and we will mutually
1880          recurse forever.  */
1881       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1882       thissize = MIN (thissize, unit - thispos);
1883
1884       /* If OP0 is a register, then handle OFFSET here.
1885
1886          When handling multiword bitfields, extract_bit_field may pass
1887          down a word_mode SUBREG of a larger REG for a bitfield that actually
1888          crosses a word boundary.  Thus, for a SUBREG, we must find
1889          the current word starting from the base register.  */
1890       if (GET_CODE (op0) == SUBREG)
1891         {
1892           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1893           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1894                                         GET_MODE (SUBREG_REG (op0)));
1895           offset = 0;
1896         }
1897       else if (GET_CODE (op0) == REG)
1898         {
1899           word = operand_subword_force (op0, offset, GET_MODE (op0));
1900           offset = 0;
1901         }
1902       else
1903         word = op0;
1904
1905       /* Extract the parts in bit-counting order,
1906          whose meaning is determined by BYTES_PER_UNIT.
1907          OFFSET is in UNITs, and UNIT is in bits.
1908          extract_fixed_bit_field wants offset in bytes.  */
1909       part = extract_fixed_bit_field (word_mode, word,
1910                                       offset * unit / BITS_PER_UNIT,
1911                                       thissize, thispos, 0, 1);
1912       bitsdone += thissize;
1913
1914       /* Shift this part into place for the result.  */
1915       if (BYTES_BIG_ENDIAN)
1916         {
1917           if (bitsize != bitsdone)
1918             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1919                                  build_int_2 (bitsize - bitsdone, 0), 0, 1);
1920         }
1921       else
1922         {
1923           if (bitsdone != thissize)
1924             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1925                                  build_int_2 (bitsdone - thissize, 0), 0, 1);
1926         }
1927
1928       if (first)
1929         result = part;
1930       else
1931         /* Combine the parts with bitwise or.  This works
1932            because we extracted each part as an unsigned bit field.  */
1933         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1934                                OPTAB_LIB_WIDEN);
1935
1936       first = 0;
1937     }
1938
1939   /* Unsigned bit field: we are done.  */
1940   if (unsignedp)
1941     return result;
1942   /* Signed bit field: sign-extend with two arithmetic shifts.  */
1943   result = expand_shift (LSHIFT_EXPR, word_mode, result,
1944                          build_int_2 (BITS_PER_WORD - bitsize, 0),
1945                          NULL_RTX, 0);
1946   return expand_shift (RSHIFT_EXPR, word_mode, result,
1947                        build_int_2 (BITS_PER_WORD - bitsize, 0), NULL_RTX, 0);
1948 }
1949 \f
1950 /* Add INC into TARGET.  */
1951
1952 void
1953 expand_inc (rtx target, rtx inc)
1954 {
1955   rtx value = expand_binop (GET_MODE (target), add_optab,
1956                             target, inc,
1957                             target, 0, OPTAB_LIB_WIDEN);
1958   if (value != target)
1959     emit_move_insn (target, value);
1960 }
1961
1962 /* Subtract DEC from TARGET.  */
1963
1964 void
1965 expand_dec (rtx target, rtx dec)
1966 {
1967   rtx value = expand_binop (GET_MODE (target), sub_optab,
1968                             target, dec,
1969                             target, 0, OPTAB_LIB_WIDEN);
1970   if (value != target)
1971     emit_move_insn (target, value);
1972 }
1973 \f
1974 /* Output a shift instruction for expression code CODE,
1975    with SHIFTED being the rtx for the value to shift,
1976    and AMOUNT the tree for the amount to shift by.
1977    Store the result in the rtx TARGET, if that is convenient.
1978    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
1979    Return the rtx for where the value is.  */
1980
1981 rtx
1982 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
1983               tree amount, rtx target, int unsignedp)
1984 {
1985   rtx op1, temp = 0;
1986   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
1987   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
1988   int try;
1989
1990   /* Previously detected shift-counts computed by NEGATE_EXPR
1991      and shifted in the other direction; but that does not work
1992      on all machines.  */
1993
1994   op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0);
1995
1996 #ifdef SHIFT_COUNT_TRUNCATED
1997   if (SHIFT_COUNT_TRUNCATED)
1998     {
1999       if (GET_CODE (op1) == CONST_INT
2000           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2001               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2002         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2003                        % GET_MODE_BITSIZE (mode));
2004       else if (GET_CODE (op1) == SUBREG
2005                && subreg_lowpart_p (op1))
2006         op1 = SUBREG_REG (op1);
2007     }
2008 #endif
2009
2010   if (op1 == const0_rtx)
2011     return shifted;
2012
2013   for (try = 0; temp == 0 && try < 3; try++)
2014     {
2015       enum optab_methods methods;
2016
2017       if (try == 0)
2018         methods = OPTAB_DIRECT;
2019       else if (try == 1)
2020         methods = OPTAB_WIDEN;
2021       else
2022         methods = OPTAB_LIB_WIDEN;
2023
2024       if (rotate)
2025         {
2026           /* Widening does not work for rotation.  */
2027           if (methods == OPTAB_WIDEN)
2028             continue;
2029           else if (methods == OPTAB_LIB_WIDEN)
2030             {
2031               /* If we have been unable to open-code this by a rotation,
2032                  do it as the IOR of two shifts.  I.e., to rotate A
2033                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2034                  where C is the bitsize of A.
2035
2036                  It is theoretically possible that the target machine might
2037                  not be able to perform either shift and hence we would
2038                  be making two libcalls rather than just the one for the
2039                  shift (similarly if IOR could not be done).  We will allow
2040                  this extremely unlikely lossage to avoid complicating the
2041                  code below.  */
2042
2043               rtx subtarget = target == shifted ? 0 : target;
2044               rtx temp1;
2045               tree type = TREE_TYPE (amount);
2046               tree new_amount = make_tree (type, op1);
2047               tree other_amount
2048                 = fold (build (MINUS_EXPR, type,
2049                                convert (type,
2050                                         build_int_2 (GET_MODE_BITSIZE (mode),
2051                                                      0)),
2052                                amount));
2053
2054               shifted = force_reg (mode, shifted);
2055
2056               temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2057                                    mode, shifted, new_amount, subtarget, 1);
2058               temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2059                                     mode, shifted, other_amount, 0, 1);
2060               return expand_binop (mode, ior_optab, temp, temp1, target,
2061                                    unsignedp, methods);
2062             }
2063
2064           temp = expand_binop (mode,
2065                                left ? rotl_optab : rotr_optab,
2066                                shifted, op1, target, unsignedp, methods);
2067
2068           /* If we don't have the rotate, but we are rotating by a constant
2069              that is in range, try a rotate in the opposite direction.  */
2070
2071           if (temp == 0 && GET_CODE (op1) == CONST_INT
2072               && INTVAL (op1) > 0
2073               && (unsigned int) INTVAL (op1) < GET_MODE_BITSIZE (mode))
2074             temp = expand_binop (mode,
2075                                  left ? rotr_optab : rotl_optab,
2076                                  shifted,
2077                                  GEN_INT (GET_MODE_BITSIZE (mode)
2078                                           - INTVAL (op1)),
2079                                  target, unsignedp, methods);
2080         }
2081       else if (unsignedp)
2082         temp = expand_binop (mode,
2083                              left ? ashl_optab : lshr_optab,
2084                              shifted, op1, target, unsignedp, methods);
2085
2086       /* Do arithmetic shifts.
2087          Also, if we are going to widen the operand, we can just as well
2088          use an arithmetic right-shift instead of a logical one.  */
2089       if (temp == 0 && ! rotate
2090           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2091         {
2092           enum optab_methods methods1 = methods;
2093
2094           /* If trying to widen a log shift to an arithmetic shift,
2095              don't accept an arithmetic shift of the same size.  */
2096           if (unsignedp)
2097             methods1 = OPTAB_MUST_WIDEN;
2098
2099           /* Arithmetic shift */
2100
2101           temp = expand_binop (mode,
2102                                left ? ashl_optab : ashr_optab,
2103                                shifted, op1, target, unsignedp, methods1);
2104         }
2105
2106       /* We used to try extzv here for logical right shifts, but that was
2107          only useful for one machine, the VAX, and caused poor code
2108          generation there for lshrdi3, so the code was deleted and a
2109          define_expand for lshrsi3 was added to vax.md.  */
2110     }
2111
2112   if (temp == 0)
2113     abort ();
2114   return temp;
2115 }
2116 \f
2117 enum alg_code { alg_zero, alg_m, alg_shift,
2118                   alg_add_t_m2, alg_sub_t_m2,
2119                   alg_add_factor, alg_sub_factor,
2120                   alg_add_t2_m, alg_sub_t2_m,
2121                   alg_add, alg_subtract, alg_factor, alg_shiftop };
2122
2123 /* This structure records a sequence of operations.
2124    `ops' is the number of operations recorded.
2125    `cost' is their total cost.
2126    The operations are stored in `op' and the corresponding
2127    logarithms of the integer coefficients in `log'.
2128
2129    These are the operations:
2130    alg_zero             total := 0;
2131    alg_m                total := multiplicand;
2132    alg_shift            total := total * coeff
2133    alg_add_t_m2         total := total + multiplicand * coeff;
2134    alg_sub_t_m2         total := total - multiplicand * coeff;
2135    alg_add_factor       total := total * coeff + total;
2136    alg_sub_factor       total := total * coeff - total;
2137    alg_add_t2_m         total := total * coeff + multiplicand;
2138    alg_sub_t2_m         total := total * coeff - multiplicand;
2139
2140    The first operand must be either alg_zero or alg_m.  */
2141
2142 struct algorithm
2143 {
2144   short cost;
2145   short ops;
2146   /* The size of the OP and LOG fields are not directly related to the
2147      word size, but the worst-case algorithms will be if we have few
2148      consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2149      In that case we will generate shift-by-2, add, shift-by-2, add,...,
2150      in total wordsize operations.  */
2151   enum alg_code op[MAX_BITS_PER_WORD];
2152   char log[MAX_BITS_PER_WORD];
2153 };
2154
2155 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT, int);
2156 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2157                                                  int, unsigned HOST_WIDE_INT *,
2158                                                  int *, int *);
2159 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2160 /* Compute and return the best algorithm for multiplying by T.
2161    The algorithm must cost less than cost_limit
2162    If retval.cost >= COST_LIMIT, no algorithm was found and all
2163    other field of the returned struct are undefined.  */
2164
2165 static void
2166 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2167             int cost_limit)
2168 {
2169   int m;
2170   struct algorithm *alg_in, *best_alg;
2171   int cost;
2172   unsigned HOST_WIDE_INT q;
2173
2174   /* Indicate that no algorithm is yet found.  If no algorithm
2175      is found, this value will be returned and indicate failure.  */
2176   alg_out->cost = cost_limit;
2177
2178   if (cost_limit <= 0)
2179     return;
2180
2181   /* t == 1 can be done in zero cost.  */
2182   if (t == 1)
2183     {
2184       alg_out->ops = 1;
2185       alg_out->cost = 0;
2186       alg_out->op[0] = alg_m;
2187       return;
2188     }
2189
2190   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2191      fail now.  */
2192   if (t == 0)
2193     {
2194       if (zero_cost >= cost_limit)
2195         return;
2196       else
2197         {
2198           alg_out->ops = 1;
2199           alg_out->cost = zero_cost;
2200           alg_out->op[0] = alg_zero;
2201           return;
2202         }
2203     }
2204
2205   /* We'll be needing a couple extra algorithm structures now.  */
2206
2207   alg_in = alloca (sizeof (struct algorithm));
2208   best_alg = alloca (sizeof (struct algorithm));
2209
2210   /* If we have a group of zero bits at the low-order part of T, try
2211      multiplying by the remaining bits and then doing a shift.  */
2212
2213   if ((t & 1) == 0)
2214     {
2215       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2216       if (m < BITS_PER_WORD)
2217         {
2218           q = t >> m;
2219           cost = shift_cost[m];
2220           synth_mult (alg_in, q, cost_limit - cost);
2221
2222           cost += alg_in->cost;
2223           if (cost < cost_limit)
2224             {
2225               struct algorithm *x;
2226               x = alg_in, alg_in = best_alg, best_alg = x;
2227               best_alg->log[best_alg->ops] = m;
2228               best_alg->op[best_alg->ops] = alg_shift;
2229               cost_limit = cost;
2230             }
2231         }
2232     }
2233
2234   /* If we have an odd number, add or subtract one.  */
2235   if ((t & 1) != 0)
2236     {
2237       unsigned HOST_WIDE_INT w;
2238
2239       for (w = 1; (w & t) != 0; w <<= 1)
2240         ;
2241       /* If T was -1, then W will be zero after the loop.  This is another
2242          case where T ends with ...111.  Handling this with (T + 1) and
2243          subtract 1 produces slightly better code and results in algorithm
2244          selection much faster than treating it like the ...0111 case
2245          below.  */
2246       if (w == 0
2247           || (w > 2
2248               /* Reject the case where t is 3.
2249                  Thus we prefer addition in that case.  */
2250               && t != 3))
2251         {
2252           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2253
2254           cost = add_cost;
2255           synth_mult (alg_in, t + 1, cost_limit - cost);
2256
2257           cost += alg_in->cost;
2258           if (cost < cost_limit)
2259             {
2260               struct algorithm *x;
2261               x = alg_in, alg_in = best_alg, best_alg = x;
2262               best_alg->log[best_alg->ops] = 0;
2263               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2264               cost_limit = cost;
2265             }
2266         }
2267       else
2268         {
2269           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2270
2271           cost = add_cost;
2272           synth_mult (alg_in, t - 1, cost_limit - cost);
2273
2274           cost += alg_in->cost;
2275           if (cost < cost_limit)
2276             {
2277               struct algorithm *x;
2278               x = alg_in, alg_in = best_alg, best_alg = x;
2279               best_alg->log[best_alg->ops] = 0;
2280               best_alg->op[best_alg->ops] = alg_add_t_m2;
2281               cost_limit = cost;
2282             }
2283         }
2284     }
2285
2286   /* Look for factors of t of the form
2287      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2288      If we find such a factor, we can multiply by t using an algorithm that
2289      multiplies by q, shift the result by m and add/subtract it to itself.
2290
2291      We search for large factors first and loop down, even if large factors
2292      are less probable than small; if we find a large factor we will find a
2293      good sequence quickly, and therefore be able to prune (by decreasing
2294      COST_LIMIT) the search.  */
2295
2296   for (m = floor_log2 (t - 1); m >= 2; m--)
2297     {
2298       unsigned HOST_WIDE_INT d;
2299
2300       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2301       if (t % d == 0 && t > d && m < BITS_PER_WORD)
2302         {
2303           cost = MIN (shiftadd_cost[m], add_cost + shift_cost[m]);
2304           synth_mult (alg_in, t / d, cost_limit - cost);
2305
2306           cost += alg_in->cost;
2307           if (cost < cost_limit)
2308             {
2309               struct algorithm *x;
2310               x = alg_in, alg_in = best_alg, best_alg = x;
2311               best_alg->log[best_alg->ops] = m;
2312               best_alg->op[best_alg->ops] = alg_add_factor;
2313               cost_limit = cost;
2314             }
2315           /* Other factors will have been taken care of in the recursion.  */
2316           break;
2317         }
2318
2319       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2320       if (t % d == 0 && t > d && m < BITS_PER_WORD)
2321         {
2322           cost = MIN (shiftsub_cost[m], add_cost + shift_cost[m]);
2323           synth_mult (alg_in, t / d, cost_limit - cost);
2324
2325           cost += alg_in->cost;
2326           if (cost < cost_limit)
2327             {
2328               struct algorithm *x;
2329               x = alg_in, alg_in = best_alg, best_alg = x;
2330               best_alg->log[best_alg->ops] = m;
2331               best_alg->op[best_alg->ops] = alg_sub_factor;
2332               cost_limit = cost;
2333             }
2334           break;
2335         }
2336     }
2337
2338   /* Try shift-and-add (load effective address) instructions,
2339      i.e. do a*3, a*5, a*9.  */
2340   if ((t & 1) != 0)
2341     {
2342       q = t - 1;
2343       q = q & -q;
2344       m = exact_log2 (q);
2345       if (m >= 0 && m < BITS_PER_WORD)
2346         {
2347           cost = shiftadd_cost[m];
2348           synth_mult (alg_in, (t - 1) >> m, cost_limit - cost);
2349
2350           cost += alg_in->cost;
2351           if (cost < cost_limit)
2352             {
2353               struct algorithm *x;
2354               x = alg_in, alg_in = best_alg, best_alg = x;
2355               best_alg->log[best_alg->ops] = m;
2356               best_alg->op[best_alg->ops] = alg_add_t2_m;
2357               cost_limit = cost;
2358             }
2359         }
2360
2361       q = t + 1;
2362       q = q & -q;
2363       m = exact_log2 (q);
2364       if (m >= 0 && m < BITS_PER_WORD)
2365         {
2366           cost = shiftsub_cost[m];
2367           synth_mult (alg_in, (t + 1) >> m, cost_limit - cost);
2368
2369           cost += alg_in->cost;
2370           if (cost < cost_limit)
2371             {
2372               struct algorithm *x;
2373               x = alg_in, alg_in = best_alg, best_alg = x;
2374               best_alg->log[best_alg->ops] = m;
2375               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2376               cost_limit = cost;
2377             }
2378         }
2379     }
2380
2381   /* If cost_limit has not decreased since we stored it in alg_out->cost,
2382      we have not found any algorithm.  */
2383   if (cost_limit == alg_out->cost)
2384     return;
2385
2386   /* If we are getting a too long sequence for `struct algorithm'
2387      to record, make this search fail.  */
2388   if (best_alg->ops == MAX_BITS_PER_WORD)
2389     return;
2390
2391   /* Copy the algorithm from temporary space to the space at alg_out.
2392      We avoid using structure assignment because the majority of
2393      best_alg is normally undefined, and this is a critical function.  */
2394   alg_out->ops = best_alg->ops + 1;
2395   alg_out->cost = cost_limit;
2396   memcpy (alg_out->op, best_alg->op,
2397           alg_out->ops * sizeof *alg_out->op);
2398   memcpy (alg_out->log, best_alg->log,
2399           alg_out->ops * sizeof *alg_out->log);
2400 }
2401 \f
2402 /* Perform a multiplication and return an rtx for the result.
2403    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
2404    TARGET is a suggestion for where to store the result (an rtx).
2405
2406    We check specially for a constant integer as OP1.
2407    If you want this check for OP0 as well, then before calling
2408    you should swap the two operands if OP0 would be constant.  */
2409
2410 rtx
2411 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
2412              int unsignedp)
2413 {
2414   rtx const_op1 = op1;
2415
2416   /* synth_mult does an `unsigned int' multiply.  As long as the mode is
2417      less than or equal in size to `unsigned int' this doesn't matter.
2418      If the mode is larger than `unsigned int', then synth_mult works only
2419      if the constant value exactly fits in an `unsigned int' without any
2420      truncation.  This means that multiplying by negative values does
2421      not work; results are off by 2^32 on a 32 bit machine.  */
2422
2423   /* If we are multiplying in DImode, it may still be a win
2424      to try to work with shifts and adds.  */
2425   if (GET_CODE (op1) == CONST_DOUBLE
2426       && GET_MODE_CLASS (GET_MODE (op1)) == MODE_INT
2427       && HOST_BITS_PER_INT >= BITS_PER_WORD
2428       && CONST_DOUBLE_HIGH (op1) == 0)
2429     const_op1 = GEN_INT (CONST_DOUBLE_LOW (op1));
2430   else if (HOST_BITS_PER_INT < GET_MODE_BITSIZE (mode)
2431            && GET_CODE (op1) == CONST_INT
2432            && INTVAL (op1) < 0)
2433     const_op1 = 0;
2434
2435   /* We used to test optimize here, on the grounds that it's better to
2436      produce a smaller program when -O is not used.
2437      But this causes such a terrible slowdown sometimes
2438      that it seems better to use synth_mult always.  */
2439
2440   if (const_op1 && GET_CODE (const_op1) == CONST_INT
2441       && (unsignedp || ! flag_trapv))
2442     {
2443       struct algorithm alg;
2444       struct algorithm alg2;
2445       HOST_WIDE_INT val = INTVAL (op1);
2446       HOST_WIDE_INT val_so_far;
2447       rtx insn;
2448       int mult_cost;
2449       enum {basic_variant, negate_variant, add_variant} variant = basic_variant;
2450
2451       /* op0 must be register to make mult_cost match the precomputed
2452          shiftadd_cost array.  */
2453       op0 = force_reg (mode, op0);
2454
2455       /* Try to do the computation three ways: multiply by the negative of OP1
2456          and then negate, do the multiplication directly, or do multiplication
2457          by OP1 - 1.  */
2458
2459       mult_cost = rtx_cost (gen_rtx_MULT (mode, op0, op1), SET);
2460       mult_cost = MIN (12 * add_cost, mult_cost);
2461
2462       synth_mult (&alg, val, mult_cost);
2463
2464       /* This works only if the inverted value actually fits in an
2465          `unsigned int' */
2466       if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2467         {
2468           synth_mult (&alg2, - val,
2469                       (alg.cost < mult_cost ? alg.cost : mult_cost) - negate_cost);
2470           if (alg2.cost + negate_cost < alg.cost)
2471             alg = alg2, variant = negate_variant;
2472         }
2473
2474       /* This proves very useful for division-by-constant.  */
2475       synth_mult (&alg2, val - 1,
2476                   (alg.cost < mult_cost ? alg.cost : mult_cost) - add_cost);
2477       if (alg2.cost + add_cost < alg.cost)
2478         alg = alg2, variant = add_variant;
2479
2480       if (alg.cost < mult_cost)
2481         {
2482           /* We found something cheaper than a multiply insn.  */
2483           int opno;
2484           rtx accum, tem;
2485           enum machine_mode nmode;
2486
2487           op0 = protect_from_queue (op0, 0);
2488
2489           /* Avoid referencing memory over and over.
2490              For speed, but also for correctness when mem is volatile.  */
2491           if (GET_CODE (op0) == MEM)
2492             op0 = force_reg (mode, op0);
2493
2494           /* ACCUM starts out either as OP0 or as a zero, depending on
2495              the first operation.  */
2496
2497           if (alg.op[0] == alg_zero)
2498             {
2499               accum = copy_to_mode_reg (mode, const0_rtx);
2500               val_so_far = 0;
2501             }
2502           else if (alg.op[0] == alg_m)
2503             {
2504               accum = copy_to_mode_reg (mode, op0);
2505               val_so_far = 1;
2506             }
2507           else
2508             abort ();
2509
2510           for (opno = 1; opno < alg.ops; opno++)
2511             {
2512               int log = alg.log[opno];
2513               int preserve = preserve_subexpressions_p ();
2514               rtx shift_subtarget = preserve ? 0 : accum;
2515               rtx add_target
2516                 = (opno == alg.ops - 1 && target != 0 && variant != add_variant
2517                    && ! preserve)
2518                   ? target : 0;
2519               rtx accum_target = preserve ? 0 : accum;
2520
2521               switch (alg.op[opno])
2522                 {
2523                 case alg_shift:
2524                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2525                                         build_int_2 (log, 0), NULL_RTX, 0);
2526                   val_so_far <<= log;
2527                   break;
2528
2529                 case alg_add_t_m2:
2530                   tem = expand_shift (LSHIFT_EXPR, mode, op0,
2531                                       build_int_2 (log, 0), NULL_RTX, 0);
2532                   accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2533                                          add_target
2534                                          ? add_target : accum_target);
2535                   val_so_far += (HOST_WIDE_INT) 1 << log;
2536                   break;
2537
2538                 case alg_sub_t_m2:
2539                   tem = expand_shift (LSHIFT_EXPR, mode, op0,
2540                                       build_int_2 (log, 0), NULL_RTX, 0);
2541                   accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2542                                          add_target
2543                                          ? add_target : accum_target);
2544                   val_so_far -= (HOST_WIDE_INT) 1 << log;
2545                   break;
2546
2547                 case alg_add_t2_m:
2548                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2549                                         build_int_2 (log, 0), shift_subtarget,
2550                                         0);
2551                   accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2552                                          add_target
2553                                          ? add_target : accum_target);
2554                   val_so_far = (val_so_far << log) + 1;
2555                   break;
2556
2557                 case alg_sub_t2_m:
2558                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2559                                         build_int_2 (log, 0), shift_subtarget,
2560                                         0);
2561                   accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2562                                          add_target
2563                                          ? add_target : accum_target);
2564                   val_so_far = (val_so_far << log) - 1;
2565                   break;
2566
2567                 case alg_add_factor:
2568                   tem = expand_shift (LSHIFT_EXPR, mode, accum,
2569                                       build_int_2 (log, 0), NULL_RTX, 0);
2570                   accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2571                                          add_target
2572                                          ? add_target : accum_target);
2573                   val_so_far += val_so_far << log;
2574                   break;
2575
2576                 case alg_sub_factor:
2577                   tem = expand_shift (LSHIFT_EXPR, mode, accum,
2578                                       build_int_2 (log, 0), NULL_RTX, 0);
2579                   accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2580                                          (add_target ? add_target
2581                                           : preserve ? 0 : tem));
2582                   val_so_far = (val_so_far << log) - val_so_far;
2583                   break;
2584
2585                 default:
2586                   abort ();
2587                 }
2588
2589               /* Write a REG_EQUAL note on the last insn so that we can cse
2590                  multiplication sequences.  Note that if ACCUM is a SUBREG,
2591                  we've set the inner register and must properly indicate
2592                  that.  */
2593
2594               tem = op0, nmode = mode;
2595               if (GET_CODE (accum) == SUBREG)
2596                 {
2597                   nmode = GET_MODE (SUBREG_REG (accum));
2598                   tem = gen_lowpart (nmode, op0);
2599                 }
2600
2601               insn = get_last_insn ();
2602               set_unique_reg_note (insn,
2603                                    REG_EQUAL,
2604                                    gen_rtx_MULT (nmode, tem,
2605                                                  GEN_INT (val_so_far)));
2606             }
2607
2608           if (variant == negate_variant)
2609             {
2610               val_so_far = - val_so_far;
2611               accum = expand_unop (mode, neg_optab, accum, target, 0);
2612             }
2613           else if (variant == add_variant)
2614             {
2615               val_so_far = val_so_far + 1;
2616               accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
2617             }
2618
2619           if (val != val_so_far)
2620             abort ();
2621
2622           return accum;
2623         }
2624     }
2625
2626   if (GET_CODE (op0) == CONST_DOUBLE)
2627     {
2628       rtx temp = op0;
2629       op0 = op1;
2630       op1 = temp;
2631     }
2632
2633   /* Expand x*2.0 as x+x.  */
2634   if (GET_CODE (op1) == CONST_DOUBLE
2635       && GET_MODE_CLASS (mode) == MODE_FLOAT)
2636     {
2637       REAL_VALUE_TYPE d;
2638       REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
2639
2640       if (REAL_VALUES_EQUAL (d, dconst2))
2641         {
2642           op0 = force_reg (GET_MODE (op0), op0);
2643           return expand_binop (mode, add_optab, op0, op0,
2644                                target, unsignedp, OPTAB_LIB_WIDEN);
2645         }
2646     }
2647
2648   /* This used to use umul_optab if unsigned, but for non-widening multiply
2649      there is no difference between signed and unsigned.  */
2650   op0 = expand_binop (mode,
2651                       ! unsignedp
2652                       && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
2653                       ? smulv_optab : smul_optab,
2654                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
2655   if (op0 == 0)
2656     abort ();
2657   return op0;
2658 }
2659 \f
2660 /* Return the smallest n such that 2**n >= X.  */
2661
2662 int
2663 ceil_log2 (unsigned HOST_WIDE_INT x)
2664 {
2665   return floor_log2 (x - 1) + 1;
2666 }
2667
2668 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
2669    replace division by D, and put the least significant N bits of the result
2670    in *MULTIPLIER_PTR and return the most significant bit.
2671
2672    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
2673    needed precision is in PRECISION (should be <= N).
2674
2675    PRECISION should be as small as possible so this function can choose
2676    multiplier more freely.
2677
2678    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
2679    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
2680
2681    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
2682    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
2683
2684 static
2685 unsigned HOST_WIDE_INT
2686 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
2687                    unsigned HOST_WIDE_INT *multiplier_ptr,
2688                    int *post_shift_ptr, int *lgup_ptr)
2689 {
2690   HOST_WIDE_INT mhigh_hi, mlow_hi;
2691   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
2692   int lgup, post_shift;
2693   int pow, pow2;
2694   unsigned HOST_WIDE_INT nl, dummy1;
2695   HOST_WIDE_INT nh, dummy2;
2696
2697   /* lgup = ceil(log2(divisor)); */
2698   lgup = ceil_log2 (d);
2699
2700   if (lgup > n)
2701     abort ();
2702
2703   pow = n + lgup;
2704   pow2 = n + lgup - precision;
2705
2706   if (pow == 2 * HOST_BITS_PER_WIDE_INT)
2707     {
2708       /* We could handle this with some effort, but this case is much better
2709          handled directly with a scc insn, so rely on caller using that.  */
2710       abort ();
2711     }
2712
2713   /* mlow = 2^(N + lgup)/d */
2714  if (pow >= HOST_BITS_PER_WIDE_INT)
2715     {
2716       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
2717       nl = 0;
2718     }
2719   else
2720     {
2721       nh = 0;
2722       nl = (unsigned HOST_WIDE_INT) 1 << pow;
2723     }
2724   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2725                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
2726
2727   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
2728   if (pow2 >= HOST_BITS_PER_WIDE_INT)
2729     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
2730   else
2731     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
2732   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2733                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
2734
2735   if (mhigh_hi && nh - d >= d)
2736     abort ();
2737   if (mhigh_hi > 1 || mlow_hi > 1)
2738     abort ();
2739   /* Assert that mlow < mhigh.  */
2740   if (! (mlow_hi < mhigh_hi || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo)))
2741     abort ();
2742
2743   /* If precision == N, then mlow, mhigh exceed 2^N
2744      (but they do not exceed 2^(N+1)).  */
2745
2746   /* Reduce to lowest terms.  */
2747   for (post_shift = lgup; post_shift > 0; post_shift--)
2748     {
2749       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
2750       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
2751       if (ml_lo >= mh_lo)
2752         break;
2753
2754       mlow_hi = 0;
2755       mlow_lo = ml_lo;
2756       mhigh_hi = 0;
2757       mhigh_lo = mh_lo;
2758     }
2759
2760   *post_shift_ptr = post_shift;
2761   *lgup_ptr = lgup;
2762   if (n < HOST_BITS_PER_WIDE_INT)
2763     {
2764       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
2765       *multiplier_ptr = mhigh_lo & mask;
2766       return mhigh_lo >= mask;
2767     }
2768   else
2769     {
2770       *multiplier_ptr = mhigh_lo;
2771       return mhigh_hi;
2772     }
2773 }
2774
2775 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
2776    congruent to 1 (mod 2**N).  */
2777
2778 static unsigned HOST_WIDE_INT
2779 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
2780 {
2781   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
2782
2783   /* The algorithm notes that the choice y = x satisfies
2784      x*y == 1 mod 2^3, since x is assumed odd.
2785      Each iteration doubles the number of bits of significance in y.  */
2786
2787   unsigned HOST_WIDE_INT mask;
2788   unsigned HOST_WIDE_INT y = x;
2789   int nbit = 3;
2790
2791   mask = (n == HOST_BITS_PER_WIDE_INT
2792           ? ~(unsigned HOST_WIDE_INT) 0
2793           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
2794
2795   while (nbit < n)
2796     {
2797       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
2798       nbit *= 2;
2799     }
2800   return y;
2801 }
2802
2803 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
2804    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
2805    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
2806    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
2807    become signed.
2808
2809    The result is put in TARGET if that is convenient.
2810
2811    MODE is the mode of operation.  */
2812
2813 rtx
2814 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
2815                              rtx op1, rtx target, int unsignedp)
2816 {
2817   rtx tem;
2818   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
2819
2820   tem = expand_shift (RSHIFT_EXPR, mode, op0,
2821                       build_int_2 (GET_MODE_BITSIZE (mode) - 1, 0),
2822                       NULL_RTX, 0);
2823   tem = expand_and (mode, tem, op1, NULL_RTX);
2824   adj_operand
2825     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
2826                      adj_operand);
2827
2828   tem = expand_shift (RSHIFT_EXPR, mode, op1,
2829                       build_int_2 (GET_MODE_BITSIZE (mode) - 1, 0),
2830                       NULL_RTX, 0);
2831   tem = expand_and (mode, tem, op0, NULL_RTX);
2832   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
2833                           target);
2834
2835   return target;
2836 }
2837
2838 /* Emit code to multiply OP0 and CNST1, putting the high half of the result
2839    in TARGET if that is convenient, and return where the result is.  If the
2840    operation can not be performed, 0 is returned.
2841
2842    MODE is the mode of operation and result.
2843
2844    UNSIGNEDP nonzero means unsigned multiply.
2845
2846    MAX_COST is the total allowed cost for the expanded RTL.  */
2847
2848 rtx
2849 expand_mult_highpart (enum machine_mode mode, rtx op0,
2850                       unsigned HOST_WIDE_INT cnst1, rtx target,
2851                       int unsignedp, int max_cost)
2852 {
2853   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
2854   optab mul_highpart_optab;
2855   optab moptab;
2856   rtx tem;
2857   int size = GET_MODE_BITSIZE (mode);
2858   rtx op1, wide_op1;
2859
2860   /* We can't support modes wider than HOST_BITS_PER_INT.  */
2861   if (size > HOST_BITS_PER_WIDE_INT)
2862     abort ();
2863
2864   op1 = gen_int_mode (cnst1, mode);
2865
2866   wide_op1
2867     = immed_double_const (cnst1,
2868                           (unsignedp
2869                            ? (HOST_WIDE_INT) 0
2870                            : -(cnst1 >> (HOST_BITS_PER_WIDE_INT - 1))),
2871                           wider_mode);
2872
2873   /* expand_mult handles constant multiplication of word_mode
2874      or narrower.  It does a poor job for large modes.  */
2875   if (size < BITS_PER_WORD
2876       && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
2877     {
2878       /* We have to do this, since expand_binop doesn't do conversion for
2879          multiply.  Maybe change expand_binop to handle widening multiply?  */
2880       op0 = convert_to_mode (wider_mode, op0, unsignedp);
2881
2882       /* We know that this can't have signed overflow, so pretend this is
2883          an unsigned multiply.  */
2884       tem = expand_mult (wider_mode, op0, wide_op1, NULL_RTX, 0);
2885       tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2886                           build_int_2 (size, 0), NULL_RTX, 1);
2887       return convert_modes (mode, wider_mode, tem, unsignedp);
2888     }
2889
2890   if (target == 0)
2891     target = gen_reg_rtx (mode);
2892
2893   /* Firstly, try using a multiplication insn that only generates the needed
2894      high part of the product, and in the sign flavor of unsignedp.  */
2895   if (mul_highpart_cost[(int) mode] < max_cost)
2896     {
2897       mul_highpart_optab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
2898       target = expand_binop (mode, mul_highpart_optab,
2899                              op0, op1, target, unsignedp, OPTAB_DIRECT);
2900       if (target)
2901         return target;
2902     }
2903
2904   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
2905      Need to adjust the result after the multiplication.  */
2906   if (size - 1 < BITS_PER_WORD
2907       && (mul_highpart_cost[(int) mode] + 2 * shift_cost[size-1] + 4 * add_cost
2908           < max_cost))
2909     {
2910       mul_highpart_optab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
2911       target = expand_binop (mode, mul_highpart_optab,
2912                              op0, op1, target, unsignedp, OPTAB_DIRECT);
2913       if (target)
2914         /* We used the wrong signedness.  Adjust the result.  */
2915         return expand_mult_highpart_adjust (mode, target, op0,
2916                                             op1, target, unsignedp);
2917     }
2918
2919   /* Try widening multiplication.  */
2920   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
2921   if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2922       && mul_widen_cost[(int) wider_mode] < max_cost)
2923     {
2924       op1 = force_reg (mode, op1);
2925       goto try;
2926     }
2927
2928   /* Try widening the mode and perform a non-widening multiplication.  */
2929   moptab = smul_optab;
2930   if (smul_optab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2931       && size - 1 < BITS_PER_WORD
2932       && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
2933     {
2934       op1 = wide_op1;
2935       goto try;
2936     }
2937
2938   /* Try widening multiplication of opposite signedness, and adjust.  */
2939   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
2940   if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2941       && size - 1 < BITS_PER_WORD
2942       && (mul_widen_cost[(int) wider_mode]
2943           + 2 * shift_cost[size-1] + 4 * add_cost < max_cost))
2944     {
2945       rtx regop1 = force_reg (mode, op1);
2946       tem = expand_binop (wider_mode, moptab, op0, regop1,
2947                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
2948       if (tem != 0)
2949         {
2950           /* Extract the high half of the just generated product.  */
2951           tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2952                               build_int_2 (size, 0), NULL_RTX, 1);
2953           tem = convert_modes (mode, wider_mode, tem, unsignedp);
2954           /* We used the wrong signedness.  Adjust the result.  */
2955           return expand_mult_highpart_adjust (mode, tem, op0, op1,
2956                                               target, unsignedp);
2957         }
2958     }
2959
2960   return 0;
2961
2962  try:
2963   /* Pass NULL_RTX as target since TARGET has wrong mode.  */
2964   tem = expand_binop (wider_mode, moptab, op0, op1,
2965                       NULL_RTX, unsignedp, OPTAB_WIDEN);
2966   if (tem == 0)
2967     return 0;
2968
2969   /* Extract the high half of the just generated product.  */
2970   if (mode == word_mode)
2971     {
2972       return gen_highpart (mode, tem);
2973     }
2974   else
2975     {
2976       tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2977                           build_int_2 (size, 0), NULL_RTX, 1);
2978       return convert_modes (mode, wider_mode, tem, unsignedp);
2979     }
2980 }
2981 \f
2982 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
2983    if that is convenient, and returning where the result is.
2984    You may request either the quotient or the remainder as the result;
2985    specify REM_FLAG nonzero to get the remainder.
2986
2987    CODE is the expression code for which kind of division this is;
2988    it controls how rounding is done.  MODE is the machine mode to use.
2989    UNSIGNEDP nonzero means do unsigned division.  */
2990
2991 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
2992    and then correct it by or'ing in missing high bits
2993    if result of ANDI is nonzero.
2994    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
2995    This could optimize to a bfexts instruction.
2996    But C doesn't use these operations, so their optimizations are
2997    left for later.  */
2998 /* ??? For modulo, we don't actually need the highpart of the first product,
2999    the low part will do nicely.  And for small divisors, the second multiply
3000    can also be a low-part only multiply or even be completely left out.
3001    E.g. to calculate the remainder of a division by 3 with a 32 bit
3002    multiply, multiply with 0x55555556 and extract the upper two bits;
3003    the result is exact for inputs up to 0x1fffffff.
3004    The input range can be reduced by using cross-sum rules.
3005    For odd divisors >= 3, the following table gives right shift counts
3006    so that if a number is shifted by an integer multiple of the given
3007    amount, the remainder stays the same:
3008    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3009    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3010    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3011    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3012    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3013
3014    Cross-sum rules for even numbers can be derived by leaving as many bits
3015    to the right alone as the divisor has zeros to the right.
3016    E.g. if x is an unsigned 32 bit number:
3017    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3018    */
3019
3020 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
3021
3022 rtx
3023 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3024                rtx op0, rtx op1, rtx target, int unsignedp)
3025 {
3026   enum machine_mode compute_mode;
3027   rtx tquotient;
3028   rtx quotient = 0, remainder = 0;
3029   rtx last;
3030   int size;
3031   rtx insn, set;
3032   optab optab1, optab2;
3033   int op1_is_constant, op1_is_pow2 = 0;
3034   int max_cost, extra_cost;
3035   static HOST_WIDE_INT last_div_const = 0;
3036   static HOST_WIDE_INT ext_op1;
3037
3038   op1_is_constant = GET_CODE (op1) == CONST_INT;
3039   if (op1_is_constant)
3040     {
3041       ext_op1 = INTVAL (op1);
3042       if (unsignedp)
3043         ext_op1 &= GET_MODE_MASK (mode);
3044       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3045                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3046     }
3047
3048   /*
3049      This is the structure of expand_divmod:
3050
3051      First comes code to fix up the operands so we can perform the operations
3052      correctly and efficiently.
3053
3054      Second comes a switch statement with code specific for each rounding mode.
3055      For some special operands this code emits all RTL for the desired
3056      operation, for other cases, it generates only a quotient and stores it in
3057      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3058      to indicate that it has not done anything.
3059
3060      Last comes code that finishes the operation.  If QUOTIENT is set and
3061      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3062      QUOTIENT is not set, it is computed using trunc rounding.
3063
3064      We try to generate special code for division and remainder when OP1 is a
3065      constant.  If |OP1| = 2**n we can use shifts and some other fast
3066      operations.  For other values of OP1, we compute a carefully selected
3067      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3068      by m.
3069
3070      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3071      half of the product.  Different strategies for generating the product are
3072      implemented in expand_mult_highpart.
3073
3074      If what we actually want is the remainder, we generate that by another
3075      by-constant multiplication and a subtraction.  */
3076
3077   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3078      code below will malfunction if we are, so check here and handle
3079      the special case if so.  */
3080   if (op1 == const1_rtx)
3081     return rem_flag ? const0_rtx : op0;
3082
3083     /* When dividing by -1, we could get an overflow.
3084      negv_optab can handle overflows.  */
3085   if (! unsignedp && op1 == constm1_rtx)
3086     {
3087       if (rem_flag)
3088         return const0_rtx;
3089       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3090                           ? negv_optab : neg_optab, op0, target, 0);
3091     }
3092
3093   if (target
3094       /* Don't use the function value register as a target
3095          since we have to read it as well as write it,
3096          and function-inlining gets confused by this.  */
3097       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3098           /* Don't clobber an operand while doing a multi-step calculation.  */
3099           || ((rem_flag || op1_is_constant)
3100               && (reg_mentioned_p (target, op0)
3101                   || (GET_CODE (op0) == MEM && GET_CODE (target) == MEM)))
3102           || reg_mentioned_p (target, op1)
3103           || (GET_CODE (op1) == MEM && GET_CODE (target) == MEM)))
3104     target = 0;
3105
3106   /* Get the mode in which to perform this computation.  Normally it will
3107      be MODE, but sometimes we can't do the desired operation in MODE.
3108      If so, pick a wider mode in which we can do the operation.  Convert
3109      to that mode at the start to avoid repeated conversions.
3110
3111      First see what operations we need.  These depend on the expression
3112      we are evaluating.  (We assume that divxx3 insns exist under the
3113      same conditions that modxx3 insns and that these insns don't normally
3114      fail.  If these assumptions are not correct, we may generate less
3115      efficient code in some cases.)
3116
3117      Then see if we find a mode in which we can open-code that operation
3118      (either a division, modulus, or shift).  Finally, check for the smallest
3119      mode for which we can do the operation with a library call.  */
3120
3121   /* We might want to refine this now that we have division-by-constant
3122      optimization.  Since expand_mult_highpart tries so many variants, it is
3123      not straightforward to generalize this.  Maybe we should make an array
3124      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3125
3126   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3127             ? (unsignedp ? lshr_optab : ashr_optab)
3128             : (unsignedp ? udiv_optab : sdiv_optab));
3129   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3130             ? optab1
3131             : (unsignedp ? udivmod_optab : sdivmod_optab));
3132
3133   for (compute_mode = mode; compute_mode != VOIDmode;
3134        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3135     if (optab1->handlers[(int) compute_mode].insn_code != CODE_FOR_nothing
3136         || optab2->handlers[(int) compute_mode].insn_code != CODE_FOR_nothing)
3137       break;
3138
3139   if (compute_mode == VOIDmode)
3140     for (compute_mode = mode; compute_mode != VOIDmode;
3141          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3142       if (optab1->handlers[(int) compute_mode].libfunc
3143           || optab2->handlers[(int) compute_mode].libfunc)
3144         break;
3145
3146   /* If we still couldn't find a mode, use MODE, but we'll probably abort
3147      in expand_binop.  */
3148   if (compute_mode == VOIDmode)
3149     compute_mode = mode;
3150
3151   if (target && GET_MODE (target) == compute_mode)
3152     tquotient = target;
3153   else
3154     tquotient = gen_reg_rtx (compute_mode);
3155
3156   size = GET_MODE_BITSIZE (compute_mode);
3157 #if 0
3158   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3159      (mode), and thereby get better code when OP1 is a constant.  Do that
3160      later.  It will require going over all usages of SIZE below.  */
3161   size = GET_MODE_BITSIZE (mode);
3162 #endif
3163
3164   /* Only deduct something for a REM if the last divide done was
3165      for a different constant.   Then set the constant of the last
3166      divide.  */
3167   max_cost = div_cost[(int) compute_mode]
3168     - (rem_flag && ! (last_div_const != 0 && op1_is_constant
3169                       && INTVAL (op1) == last_div_const)
3170        ? mul_cost[(int) compute_mode] + add_cost : 0);
3171
3172   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3173
3174   /* Now convert to the best mode to use.  */
3175   if (compute_mode != mode)
3176     {
3177       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3178       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3179
3180       /* convert_modes may have placed op1 into a register, so we
3181          must recompute the following.  */
3182       op1_is_constant = GET_CODE (op1) == CONST_INT;
3183       op1_is_pow2 = (op1_is_constant
3184                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3185                           || (! unsignedp
3186                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3187     }
3188
3189   /* If one of the operands is a volatile MEM, copy it into a register.  */
3190
3191   if (GET_CODE (op0) == MEM && MEM_VOLATILE_P (op0))
3192     op0 = force_reg (compute_mode, op0);
3193   if (GET_CODE (op1) == MEM && MEM_VOLATILE_P (op1))
3194     op1 = force_reg (compute_mode, op1);
3195
3196   /* If we need the remainder or if OP1 is constant, we need to
3197      put OP0 in a register in case it has any queued subexpressions.  */
3198   if (rem_flag || op1_is_constant)
3199     op0 = force_reg (compute_mode, op0);
3200
3201   last = get_last_insn ();
3202
3203   /* Promote floor rounding to trunc rounding for unsigned operations.  */
3204   if (unsignedp)
3205     {
3206       if (code == FLOOR_DIV_EXPR)
3207         code = TRUNC_DIV_EXPR;
3208       if (code == FLOOR_MOD_EXPR)
3209         code = TRUNC_MOD_EXPR;
3210       if (code == EXACT_DIV_EXPR && op1_is_pow2)
3211         code = TRUNC_DIV_EXPR;
3212     }
3213
3214   if (op1 != const0_rtx)
3215     switch (code)
3216       {
3217       case TRUNC_MOD_EXPR:
3218       case TRUNC_DIV_EXPR:
3219         if (op1_is_constant)
3220           {
3221             if (unsignedp)
3222               {
3223                 unsigned HOST_WIDE_INT mh, ml;
3224                 int pre_shift, post_shift;
3225                 int dummy;
3226                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
3227                                             & GET_MODE_MASK (compute_mode));
3228
3229                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3230                   {
3231                     pre_shift = floor_log2 (d);
3232                     if (rem_flag)
3233                       {
3234                         remainder
3235                           = expand_binop (compute_mode, and_optab, op0,
3236                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3237                                           remainder, 1,
3238                                           OPTAB_LIB_WIDEN);
3239                         if (remainder)
3240                           return gen_lowpart (mode, remainder);
3241                       }
3242                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3243                                              build_int_2 (pre_shift, 0),
3244                                              tquotient, 1);
3245                   }
3246                 else if (size <= HOST_BITS_PER_WIDE_INT)
3247                   {
3248                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
3249                       {
3250                         /* Most significant bit of divisor is set; emit an scc
3251                            insn.  */
3252                         quotient = emit_store_flag (tquotient, GEU, op0, op1,
3253                                                     compute_mode, 1, 1);
3254                         if (quotient == 0)
3255                           goto fail1;
3256                       }
3257                     else
3258                       {
3259                         /* Find a suitable multiplier and right shift count
3260                            instead of multiplying with D.  */
3261
3262                         mh = choose_multiplier (d, size, size,
3263                                                 &ml, &post_shift, &dummy);
3264
3265                         /* If the suggested multiplier is more than SIZE bits,
3266                            we can do better for even divisors, using an
3267                            initial right shift.  */
3268                         if (mh != 0 && (d & 1) == 0)
3269                           {
3270                             pre_shift = floor_log2 (d & -d);
3271                             mh = choose_multiplier (d >> pre_shift, size,
3272                                                     size - pre_shift,
3273                                                     &ml, &post_shift, &dummy);
3274                             if (mh)
3275                               abort ();
3276                           }
3277                         else
3278                           pre_shift = 0;
3279
3280                         if (mh != 0)
3281                           {
3282                             rtx t1, t2, t3, t4;
3283
3284                             if (post_shift - 1 >= BITS_PER_WORD)
3285                               goto fail1;
3286
3287                             extra_cost = (shift_cost[post_shift - 1]
3288                                           + shift_cost[1] + 2 * add_cost);
3289                             t1 = expand_mult_highpart (compute_mode, op0, ml,
3290                                                        NULL_RTX, 1,
3291                                                        max_cost - extra_cost);
3292                             if (t1 == 0)
3293                               goto fail1;
3294                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
3295                                                                op0, t1),
3296                                                 NULL_RTX);
3297                             t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3298                                                build_int_2 (1, 0), NULL_RTX,1);
3299                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
3300                                                               t1, t3),
3301                                                 NULL_RTX);
3302                             quotient
3303                               = expand_shift (RSHIFT_EXPR, compute_mode, t4,
3304                                               build_int_2 (post_shift - 1, 0),
3305                                               tquotient, 1);
3306                           }
3307                         else
3308                           {
3309                             rtx t1, t2;
3310
3311                             if (pre_shift >= BITS_PER_WORD
3312                                 || post_shift >= BITS_PER_WORD)
3313                               goto fail1;
3314
3315                             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3316                                                build_int_2 (pre_shift, 0),
3317                                                NULL_RTX, 1);
3318                             extra_cost = (shift_cost[pre_shift]
3319                                           + shift_cost[post_shift]);
3320                             t2 = expand_mult_highpart (compute_mode, t1, ml,
3321                                                        NULL_RTX, 1,
3322                                                        max_cost - extra_cost);
3323                             if (t2 == 0)
3324                               goto fail1;
3325                             quotient
3326                               = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3327                                               build_int_2 (post_shift, 0),
3328                                               tquotient, 1);
3329                           }
3330                       }
3331                   }
3332                 else            /* Too wide mode to use tricky code */
3333                   break;
3334
3335                 insn = get_last_insn ();
3336                 if (insn != last
3337                     && (set = single_set (insn)) != 0
3338                     && SET_DEST (set) == quotient)
3339                   set_unique_reg_note (insn,
3340                                        REG_EQUAL,
3341                                        gen_rtx_UDIV (compute_mode, op0, op1));
3342               }
3343             else                /* TRUNC_DIV, signed */
3344               {
3345                 unsigned HOST_WIDE_INT ml;
3346                 int lgup, post_shift;
3347                 HOST_WIDE_INT d = INTVAL (op1);
3348                 unsigned HOST_WIDE_INT abs_d = d >= 0 ? d : -d;
3349
3350                 /* n rem d = n rem -d */
3351                 if (rem_flag && d < 0)
3352                   {
3353                     d = abs_d;
3354                     op1 = gen_int_mode (abs_d, compute_mode);
3355                   }
3356
3357                 if (d == 1)
3358                   quotient = op0;
3359                 else if (d == -1)
3360                   quotient = expand_unop (compute_mode, neg_optab, op0,
3361                                           tquotient, 0);
3362                 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
3363                   {
3364                     /* This case is not handled correctly below.  */
3365                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
3366                                                 compute_mode, 1, 1);
3367                     if (quotient == 0)
3368                       goto fail1;
3369                   }
3370                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
3371                          && (rem_flag ? smod_pow2_cheap : sdiv_pow2_cheap)
3372                          /* ??? The cheap metric is computed only for
3373                             word_mode.  If this operation is wider, this may
3374                             not be so.  Assume true if the optab has an
3375                             expander for this mode.  */
3376                          && (((rem_flag ? smod_optab : sdiv_optab)
3377                               ->handlers[(int) compute_mode].insn_code
3378                               != CODE_FOR_nothing)
3379                              || (sdivmod_optab->handlers[(int) compute_mode]
3380                                  .insn_code != CODE_FOR_nothing)))
3381                   ;
3382                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
3383                   {
3384                     lgup = floor_log2 (abs_d);
3385                     if (BRANCH_COST < 1 || (abs_d != 2 && BRANCH_COST < 3))
3386                       {
3387                         rtx label = gen_label_rtx ();
3388                         rtx t1;
3389
3390                         t1 = copy_to_mode_reg (compute_mode, op0);
3391                         do_cmp_and_jump (t1, const0_rtx, GE,
3392                                          compute_mode, label);
3393                         expand_inc (t1, gen_int_mode (abs_d - 1,
3394                                                       compute_mode));
3395                         emit_label (label);
3396                         quotient = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3397                                                  build_int_2 (lgup, 0),
3398                                                  tquotient, 0);
3399                       }
3400                     else
3401                       {
3402                         rtx t1, t2, t3;
3403                         t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3404                                            build_int_2 (size - 1, 0),
3405                                            NULL_RTX, 0);
3406                         t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3407                                            build_int_2 (size - lgup, 0),
3408                                            NULL_RTX, 1);
3409                         t3 = force_operand (gen_rtx_PLUS (compute_mode,
3410                                                           op0, t2),
3411                                             NULL_RTX);
3412                         quotient = expand_shift (RSHIFT_EXPR, compute_mode, t3,
3413                                                  build_int_2 (lgup, 0),
3414                                                  tquotient, 0);
3415                       }
3416
3417                     /* We have computed OP0 / abs(OP1).  If OP1 is negative, negate
3418                        the quotient.  */
3419                     if (d < 0)
3420                       {
3421                         insn = get_last_insn ();
3422                         if (insn != last
3423                             && (set = single_set (insn)) != 0
3424                             && SET_DEST (set) == quotient
3425                             && abs_d < ((unsigned HOST_WIDE_INT) 1
3426                                         << (HOST_BITS_PER_WIDE_INT - 1)))
3427                           set_unique_reg_note (insn,
3428                                                REG_EQUAL,
3429                                                gen_rtx_DIV (compute_mode,
3430                                                             op0,
3431                                                             GEN_INT
3432                                                             (trunc_int_for_mode
3433                                                              (abs_d,
3434                                                               compute_mode))));
3435
3436                         quotient = expand_unop (compute_mode, neg_optab,
3437                                                 quotient, quotient, 0);
3438                       }
3439                   }
3440                 else if (size <= HOST_BITS_PER_WIDE_INT)
3441                   {
3442                     choose_multiplier (abs_d, size, size - 1,
3443                                        &ml, &post_shift, &lgup);
3444                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
3445                       {
3446                         rtx t1, t2, t3;
3447
3448                         if (post_shift >= BITS_PER_WORD
3449                             || size - 1 >= BITS_PER_WORD)
3450                           goto fail1;
3451
3452                         extra_cost = (shift_cost[post_shift]
3453                                       + shift_cost[size - 1] + add_cost);
3454                         t1 = expand_mult_highpart (compute_mode, op0, ml,
3455                                                    NULL_RTX, 0,
3456                                                    max_cost - extra_cost);
3457                         if (t1 == 0)
3458                           goto fail1;
3459                         t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3460                                            build_int_2 (post_shift, 0), NULL_RTX, 0);
3461                         t3 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3462                                            build_int_2 (size - 1, 0), NULL_RTX, 0);
3463                         if (d < 0)
3464                           quotient
3465                             = force_operand (gen_rtx_MINUS (compute_mode,
3466                                                             t3, t2),
3467                                              tquotient);
3468                         else
3469                           quotient
3470                             = force_operand (gen_rtx_MINUS (compute_mode,
3471                                                             t2, t3),
3472                                              tquotient);
3473                       }
3474                     else
3475                       {
3476                         rtx t1, t2, t3, t4;
3477
3478                         if (post_shift >= BITS_PER_WORD
3479                             || size - 1 >= BITS_PER_WORD)
3480                           goto fail1;
3481
3482                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
3483                         extra_cost = (shift_cost[post_shift]
3484                                       + shift_cost[size - 1] + 2 * add_cost);
3485                         t1 = expand_mult_highpart (compute_mode, op0, ml,
3486                                                    NULL_RTX, 0,
3487                                                    max_cost - extra_cost);
3488                         if (t1 == 0)
3489                           goto fail1;
3490                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
3491                                                           t1, op0),
3492                                             NULL_RTX);
3493                         t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3494                                            build_int_2 (post_shift, 0),
3495                                            NULL_RTX, 0);
3496                         t4 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3497                                            build_int_2 (size - 1, 0),
3498                                            NULL_RTX, 0);
3499                         if (d < 0)
3500                           quotient
3501                             = force_operand (gen_rtx_MINUS (compute_mode,
3502                                                             t4, t3),
3503                                              tquotient);
3504                         else
3505                           quotient
3506                             = force_operand (gen_rtx_MINUS (compute_mode,
3507                                                             t3, t4),
3508                                              tquotient);
3509                       }
3510                   }
3511                 else            /* Too wide mode to use tricky code */
3512                   break;
3513
3514                 insn = get_last_insn ();
3515                 if (insn != last
3516                     && (set = single_set (insn)) != 0
3517                     && SET_DEST (set) == quotient)
3518                   set_unique_reg_note (insn,
3519                                        REG_EQUAL,
3520                                        gen_rtx_DIV (compute_mode, op0, op1));
3521               }
3522             break;
3523           }
3524       fail1:
3525         delete_insns_since (last);
3526         break;
3527
3528       case FLOOR_DIV_EXPR:
3529       case FLOOR_MOD_EXPR:
3530       /* We will come here only for signed operations.  */
3531         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
3532           {
3533             unsigned HOST_WIDE_INT mh, ml;
3534             int pre_shift, lgup, post_shift;
3535             HOST_WIDE_INT d = INTVAL (op1);
3536
3537             if (d > 0)
3538               {
3539                 /* We could just as easily deal with negative constants here,
3540                    but it does not seem worth the trouble for GCC 2.6.  */
3541                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3542                   {
3543                     pre_shift = floor_log2 (d);
3544                     if (rem_flag)
3545                       {
3546                         remainder = expand_binop (compute_mode, and_optab, op0,
3547                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3548                                                   remainder, 0, OPTAB_LIB_WIDEN);
3549                         if (remainder)
3550                           return gen_lowpart (mode, remainder);
3551                       }
3552                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3553                                              build_int_2 (pre_shift, 0),
3554                                              tquotient, 0);
3555                   }
3556                 else
3557                   {
3558                     rtx t1, t2, t3, t4;
3559
3560                     mh = choose_multiplier (d, size, size - 1,
3561                                             &ml, &post_shift, &lgup);
3562                     if (mh)
3563                       abort ();
3564
3565                     if (post_shift < BITS_PER_WORD
3566                         && size - 1 < BITS_PER_WORD)
3567                       {
3568                         t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3569                                            build_int_2 (size - 1, 0),
3570                                            NULL_RTX, 0);
3571                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
3572                                            NULL_RTX, 0, OPTAB_WIDEN);
3573                         extra_cost = (shift_cost[post_shift]
3574                                       + shift_cost[size - 1] + 2 * add_cost);
3575                         t3 = expand_mult_highpart (compute_mode, t2, ml,
3576                                                    NULL_RTX, 1,
3577                                                    max_cost - extra_cost);
3578                         if (t3 != 0)
3579                           {
3580                             t4 = expand_shift (RSHIFT_EXPR, compute_mode, t3,
3581                                                build_int_2 (post_shift, 0),
3582                                                NULL_RTX, 1);
3583                             quotient = expand_binop (compute_mode, xor_optab,
3584                                                      t4, t1, tquotient, 0,
3585                                                      OPTAB_WIDEN);
3586                           }
3587                       }
3588                   }
3589               }
3590             else
3591               {
3592                 rtx nsign, t1, t2, t3, t4;
3593                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
3594                                                   op0, constm1_rtx), NULL_RTX);
3595                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
3596                                    0, OPTAB_WIDEN);
3597                 nsign = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3598                                       build_int_2 (size - 1, 0), NULL_RTX, 0);
3599                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
3600                                     NULL_RTX);
3601                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
3602                                     NULL_RTX, 0);
3603                 if (t4)
3604                   {
3605                     rtx t5;
3606                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
3607                                       NULL_RTX, 0);
3608                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
3609                                                             t4, t5),
3610                                               tquotient);
3611                   }
3612               }
3613           }
3614
3615         if (quotient != 0)
3616           break;
3617         delete_insns_since (last);
3618
3619         /* Try using an instruction that produces both the quotient and
3620            remainder, using truncation.  We can easily compensate the quotient
3621            or remainder to get floor rounding, once we have the remainder.
3622            Notice that we compute also the final remainder value here,
3623            and return the result right away.  */
3624         if (target == 0 || GET_MODE (target) != compute_mode)
3625           target = gen_reg_rtx (compute_mode);
3626
3627         if (rem_flag)
3628           {
3629             remainder
3630               = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode);
3631             quotient = gen_reg_rtx (compute_mode);
3632           }
3633         else
3634           {
3635             quotient
3636               = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode);
3637             remainder = gen_reg_rtx (compute_mode);
3638           }
3639
3640         if (expand_twoval_binop (sdivmod_optab, op0, op1,
3641                                  quotient, remainder, 0))
3642           {
3643             /* This could be computed with a branch-less sequence.
3644                Save that for later.  */
3645             rtx tem;
3646             rtx label = gen_label_rtx ();
3647             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
3648             tem = expand_binop (compute_mode, xor_optab, op0, op1,
3649                                 NULL_RTX, 0, OPTAB_WIDEN);
3650             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
3651             expand_dec (quotient, const1_rtx);
3652             expand_inc (remainder, op1);
3653             emit_label (label);
3654             return gen_lowpart (mode, rem_flag ? remainder : quotient);
3655           }
3656
3657         /* No luck with division elimination or divmod.  Have to do it
3658            by conditionally adjusting op0 *and* the result.  */
3659         {
3660           rtx label1, label2, label3, label4, label5;
3661           rtx adjusted_op0;
3662           rtx tem;
3663
3664           quotient = gen_reg_rtx (compute_mode);
3665           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3666           label1 = gen_label_rtx ();
3667           label2 = gen_label_rtx ();
3668           label3 = gen_label_rtx ();
3669           label4 = gen_label_rtx ();
3670           label5 = gen_label_rtx ();
3671           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
3672           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
3673           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3674                               quotient, 0, OPTAB_LIB_WIDEN);
3675           if (tem != quotient)
3676             emit_move_insn (quotient, tem);
3677           emit_jump_insn (gen_jump (label5));
3678           emit_barrier ();
3679           emit_label (label1);
3680           expand_inc (adjusted_op0, const1_rtx);
3681           emit_jump_insn (gen_jump (label4));
3682           emit_barrier ();
3683           emit_label (label2);
3684           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
3685           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3686                               quotient, 0, OPTAB_LIB_WIDEN);
3687           if (tem != quotient)
3688             emit_move_insn (quotient, tem);
3689           emit_jump_insn (gen_jump (label5));
3690           emit_barrier ();
3691           emit_label (label3);
3692           expand_dec (adjusted_op0, const1_rtx);
3693           emit_label (label4);
3694           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3695                               quotient, 0, OPTAB_LIB_WIDEN);
3696           if (tem != quotient)
3697             emit_move_insn (quotient, tem);
3698           expand_dec (quotient, const1_rtx);
3699           emit_label (label5);
3700         }
3701         break;
3702
3703       case CEIL_DIV_EXPR:
3704       case CEIL_MOD_EXPR:
3705         if (unsignedp)
3706           {
3707             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
3708               {
3709                 rtx t1, t2, t3;
3710                 unsigned HOST_WIDE_INT d = INTVAL (op1);
3711                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3712                                    build_int_2 (floor_log2 (d), 0),
3713                                    tquotient, 1);
3714                 t2 = expand_binop (compute_mode, and_optab, op0,
3715                                    GEN_INT (d - 1),
3716                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3717                 t3 = gen_reg_rtx (compute_mode);
3718                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
3719                                       compute_mode, 1, 1);
3720                 if (t3 == 0)
3721                   {
3722                     rtx lab;
3723                     lab = gen_label_rtx ();
3724                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
3725                     expand_inc (t1, const1_rtx);
3726                     emit_label (lab);
3727                     quotient = t1;
3728                   }
3729                 else
3730                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
3731                                                           t1, t3),
3732                                             tquotient);
3733                 break;
3734               }
3735
3736             /* Try using an instruction that produces both the quotient and
3737                remainder, using truncation.  We can easily compensate the
3738                quotient or remainder to get ceiling rounding, once we have the
3739                remainder.  Notice that we compute also the final remainder
3740                value here, and return the result right away.  */
3741             if (target == 0 || GET_MODE (target) != compute_mode)
3742               target = gen_reg_rtx (compute_mode);
3743
3744             if (rem_flag)
3745               {
3746                 remainder = (GET_CODE (target) == REG
3747                              ? target : gen_reg_rtx (compute_mode));
3748                 quotient = gen_reg_rtx (compute_mode);
3749               }
3750             else
3751               {
3752                 quotient = (GET_CODE (target) == REG
3753                             ? target : gen_reg_rtx (compute_mode));
3754                 remainder = gen_reg_rtx (compute_mode);
3755               }
3756
3757             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
3758                                      remainder, 1))
3759               {
3760                 /* This could be computed with a branch-less sequence.
3761                    Save that for later.  */
3762                 rtx label = gen_label_rtx ();
3763                 do_cmp_and_jump (remainder, const0_rtx, EQ,
3764                                  compute_mode, label);
3765                 expand_inc (quotient, const1_rtx);
3766                 expand_dec (remainder, op1);
3767                 emit_label (label);
3768                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
3769               }
3770
3771             /* No luck with division elimination or divmod.  Have to do it
3772                by conditionally adjusting op0 *and* the result.  */
3773             {
3774               rtx label1, label2;
3775               rtx adjusted_op0, tem;
3776
3777               quotient = gen_reg_rtx (compute_mode);
3778               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3779               label1 = gen_label_rtx ();
3780               label2 = gen_label_rtx ();
3781               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
3782                                compute_mode, label1);
3783               emit_move_insn  (quotient, const0_rtx);
3784               emit_jump_insn (gen_jump (label2));
3785               emit_barrier ();
3786               emit_label (label1);
3787               expand_dec (adjusted_op0, const1_rtx);
3788               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
3789                                   quotient, 1, OPTAB_LIB_WIDEN);
3790               if (tem != quotient)
3791                 emit_move_insn (quotient, tem);
3792               expand_inc (quotient, const1_rtx);
3793               emit_label (label2);
3794             }
3795           }
3796         else /* signed */
3797           {
3798             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3799                 && INTVAL (op1) >= 0)
3800               {
3801                 /* This is extremely similar to the code for the unsigned case
3802                    above.  For 2.7 we should merge these variants, but for
3803                    2.6.1 I don't want to touch the code for unsigned since that
3804                    get used in C.  The signed case will only be used by other
3805                    languages (Ada).  */
3806
3807                 rtx t1, t2, t3;
3808                 unsigned HOST_WIDE_INT d = INTVAL (op1);
3809                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3810                                    build_int_2 (floor_log2 (d), 0),
3811                                    tquotient, 0);
3812                 t2 = expand_binop (compute_mode, and_optab, op0,
3813                                    GEN_INT (d - 1),
3814                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3815                 t3 = gen_reg_rtx (compute_mode);
3816                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
3817                                       compute_mode, 1, 1);
3818                 if (t3 == 0)
3819                   {
3820                     rtx lab;
3821                     lab = gen_label_rtx ();
3822                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
3823                     expand_inc (t1, const1_rtx);
3824                     emit_label (lab);
3825                     quotient = t1;
3826                   }
3827                 else
3828                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
3829                                                           t1, t3),
3830                                             tquotient);
3831                 break;
3832               }
3833
3834             /* Try using an instruction that produces both the quotient and
3835                remainder, using truncation.  We can easily compensate the
3836                quotient or remainder to get ceiling rounding, once we have the
3837                remainder.  Notice that we compute also the final remainder
3838                value here, and return the result right away.  */
3839             if (target == 0 || GET_MODE (target) != compute_mode)
3840               target = gen_reg_rtx (compute_mode);
3841             if (rem_flag)
3842               {
3843                 remainder= (GET_CODE (target) == REG
3844                             ? target : gen_reg_rtx (compute_mode));
3845                 quotient = gen_reg_rtx (compute_mode);
3846               }
3847             else
3848               {
3849                 quotient = (GET_CODE (target) == REG
3850                             ? target : gen_reg_rtx (compute_mode));
3851                 remainder = gen_reg_rtx (compute_mode);
3852               }
3853
3854             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
3855                                      remainder, 0))
3856               {
3857                 /* This could be computed with a branch-less sequence.
3858                    Save that for later.  */
3859                 rtx tem;
3860                 rtx label = gen_label_rtx ();
3861                 do_cmp_and_jump (remainder, const0_rtx, EQ,
3862                                  compute_mode, label);
3863                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
3864                                     NULL_RTX, 0, OPTAB_WIDEN);
3865                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
3866                 expand_inc (quotient, const1_rtx);
3867                 expand_dec (remainder, op1);
3868                 emit_label (label);
3869                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
3870               }
3871
3872             /* No luck with division elimination or divmod.  Have to do it
3873                by conditionally adjusting op0 *and* the result.  */
3874             {
3875               rtx label1, label2, label3, label4, label5;
3876               rtx adjusted_op0;
3877               rtx tem;
3878
3879               quotient = gen_reg_rtx (compute_mode);
3880               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3881               label1 = gen_label_rtx ();
3882               label2 = gen_label_rtx ();
3883               label3 = gen_label_rtx ();
3884               label4 = gen_label_rtx ();
3885               label5 = gen_label_rtx ();
3886               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
3887               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
3888                                compute_mode, label1);
3889               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3890                                   quotient, 0, OPTAB_LIB_WIDEN);
3891               if (tem != quotient)
3892                 emit_move_insn (quotient, tem);
3893               emit_jump_insn (gen_jump (label5));
3894               emit_barrier ();
3895               emit_label (label1);
3896               expand_dec (adjusted_op0, const1_rtx);
3897               emit_jump_insn (gen_jump (label4));
3898               emit_barrier ();
3899               emit_label (label2);
3900               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
3901                                compute_mode, label3);
3902               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3903                                   quotient, 0, OPTAB_LIB_WIDEN);
3904               if (tem != quotient)
3905                 emit_move_insn (quotient, tem);
3906               emit_jump_insn (gen_jump (label5));
3907               emit_barrier ();
3908               emit_label (label3);
3909               expand_inc (adjusted_op0, const1_rtx);
3910               emit_label (label4);
3911               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3912                                   quotient, 0, OPTAB_LIB_WIDEN);
3913               if (tem != quotient)
3914                 emit_move_insn (quotient, tem);
3915               expand_inc (quotient, const1_rtx);
3916               emit_label (label5);
3917             }
3918           }
3919         break;
3920
3921       case EXACT_DIV_EXPR:
3922         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
3923           {
3924             HOST_WIDE_INT d = INTVAL (op1);
3925             unsigned HOST_WIDE_INT ml;
3926             int pre_shift;
3927             rtx t1;
3928
3929             pre_shift = floor_log2 (d & -d);
3930             ml = invert_mod2n (d >> pre_shift, size);
3931             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3932                                build_int_2 (pre_shift, 0), NULL_RTX, unsignedp);
3933             quotient = expand_mult (compute_mode, t1,
3934                                     gen_int_mode (ml, compute_mode),
3935                                     NULL_RTX, 1);
3936
3937             insn = get_last_insn ();
3938             set_unique_reg_note (insn,
3939                                  REG_EQUAL,
3940                                  gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
3941                                                  compute_mode,
3942                                                  op0, op1));
3943           }
3944         break;
3945
3946       case ROUND_DIV_EXPR:
3947       case ROUND_MOD_EXPR:
3948         if (unsignedp)
3949           {
3950             rtx tem;
3951             rtx label;
3952             label = gen_label_rtx ();
3953             quotient = gen_reg_rtx (compute_mode);
3954             remainder = gen_reg_rtx (compute_mode);
3955             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
3956               {
3957                 rtx tem;
3958                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
3959                                          quotient, 1, OPTAB_LIB_WIDEN);
3960                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
3961                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
3962                                           remainder, 1, OPTAB_LIB_WIDEN);
3963               }
3964             tem = plus_constant (op1, -1);
3965             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
3966                                 build_int_2 (1, 0), NULL_RTX, 1);
3967             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
3968             expand_inc (quotient, const1_rtx);
3969             expand_dec (remainder, op1);
3970             emit_label (label);
3971           }
3972         else
3973           {
3974             rtx abs_rem, abs_op1, tem, mask;
3975             rtx label;
3976             label = gen_label_rtx ();
3977             quotient = gen_reg_rtx (compute_mode);
3978             remainder = gen_reg_rtx (compute_mode);
3979             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
3980               {
3981                 rtx tem;
3982                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
3983                                          quotient, 0, OPTAB_LIB_WIDEN);
3984                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
3985                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
3986                                           remainder, 0, OPTAB_LIB_WIDEN);
3987               }
3988             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
3989             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
3990             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
3991                                 build_int_2 (1, 0), NULL_RTX, 1);
3992             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
3993             tem = expand_binop (compute_mode, xor_optab, op0, op1,
3994                                 NULL_RTX, 0, OPTAB_WIDEN);
3995             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
3996                                 build_int_2 (size - 1, 0), NULL_RTX, 0);
3997             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
3998                                 NULL_RTX, 0, OPTAB_WIDEN);
3999             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4000                                 NULL_RTX, 0, OPTAB_WIDEN);
4001             expand_inc (quotient, tem);
4002             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4003                                 NULL_RTX, 0, OPTAB_WIDEN);
4004             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4005                                 NULL_RTX, 0, OPTAB_WIDEN);
4006             expand_dec (remainder, tem);
4007             emit_label (label);
4008           }
4009         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4010
4011       default:
4012         abort ();
4013       }
4014
4015   if (quotient == 0)
4016     {
4017       if (target && GET_MODE (target) != compute_mode)
4018         target = 0;
4019
4020       if (rem_flag)
4021         {
4022           /* Try to produce the remainder without producing the quotient.
4023              If we seem to have a divmod pattern that does not require widening,
4024              don't try widening here.  We should really have a WIDEN argument
4025              to expand_twoval_binop, since what we'd really like to do here is
4026              1) try a mod insn in compute_mode
4027              2) try a divmod insn in compute_mode
4028              3) try a div insn in compute_mode and multiply-subtract to get
4029                 remainder
4030              4) try the same things with widening allowed.  */
4031           remainder
4032             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4033                                  op0, op1, target,
4034                                  unsignedp,
4035                                  ((optab2->handlers[(int) compute_mode].insn_code
4036                                    != CODE_FOR_nothing)
4037                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4038           if (remainder == 0)
4039             {
4040               /* No luck there.  Can we do remainder and divide at once
4041                  without a library call?  */
4042               remainder = gen_reg_rtx (compute_mode);
4043               if (! expand_twoval_binop ((unsignedp
4044                                           ? udivmod_optab
4045                                           : sdivmod_optab),
4046                                          op0, op1,
4047                                          NULL_RTX, remainder, unsignedp))
4048                 remainder = 0;
4049             }
4050
4051           if (remainder)
4052             return gen_lowpart (mode, remainder);
4053         }
4054
4055       /* Produce the quotient.  Try a quotient insn, but not a library call.
4056          If we have a divmod in this mode, use it in preference to widening
4057          the div (for this test we assume it will not fail). Note that optab2
4058          is set to the one of the two optabs that the call below will use.  */
4059       quotient
4060         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4061                              op0, op1, rem_flag ? NULL_RTX : target,
4062                              unsignedp,
4063                              ((optab2->handlers[(int) compute_mode].insn_code
4064                                != CODE_FOR_nothing)
4065                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4066
4067       if (quotient == 0)
4068         {
4069           /* No luck there.  Try a quotient-and-remainder insn,
4070              keeping the quotient alone.  */
4071           quotient = gen_reg_rtx (compute_mode);
4072           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4073                                      op0, op1,
4074                                      quotient, NULL_RTX, unsignedp))
4075             {
4076               quotient = 0;
4077               if (! rem_flag)
4078                 /* Still no luck.  If we are not computing the remainder,
4079                    use a library call for the quotient.  */
4080                 quotient = sign_expand_binop (compute_mode,
4081                                               udiv_optab, sdiv_optab,
4082                                               op0, op1, target,
4083                                               unsignedp, OPTAB_LIB_WIDEN);
4084             }
4085         }
4086     }
4087
4088   if (rem_flag)
4089     {
4090       if (target && GET_MODE (target) != compute_mode)
4091         target = 0;
4092
4093       if (quotient == 0)
4094         /* No divide instruction either.  Use library for remainder.  */
4095         remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4096                                        op0, op1, target,
4097                                        unsignedp, OPTAB_LIB_WIDEN);
4098       else
4099         {
4100           /* We divided.  Now finish doing X - Y * (X / Y).  */
4101           remainder = expand_mult (compute_mode, quotient, op1,
4102                                    NULL_RTX, unsignedp);
4103           remainder = expand_binop (compute_mode, sub_optab, op0,
4104                                     remainder, target, unsignedp,
4105                                     OPTAB_LIB_WIDEN);
4106         }
4107     }
4108
4109   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4110 }
4111 \f
4112 /* Return a tree node with data type TYPE, describing the value of X.
4113    Usually this is an RTL_EXPR, if there is no obvious better choice.
4114    X may be an expression, however we only support those expressions
4115    generated by loop.c.  */
4116
4117 tree
4118 make_tree (tree type, rtx x)
4119 {
4120   tree t;
4121
4122   switch (GET_CODE (x))
4123     {
4124     case CONST_INT:
4125       t = build_int_2 (INTVAL (x),
4126                        (TREE_UNSIGNED (type)
4127                         && (GET_MODE_BITSIZE (TYPE_MODE (type)) < HOST_BITS_PER_WIDE_INT))
4128                        || INTVAL (x) >= 0 ? 0 : -1);
4129       TREE_TYPE (t) = type;
4130       return t;
4131
4132     case CONST_DOUBLE:
4133       if (GET_MODE (x) == VOIDmode)
4134         {
4135           t = build_int_2 (CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4136           TREE_TYPE (t) = type;
4137         }
4138       else
4139         {
4140           REAL_VALUE_TYPE d;
4141
4142           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4143           t = build_real (type, d);
4144         }
4145
4146       return t;
4147
4148     case CONST_VECTOR:
4149       {
4150         int i, units;
4151         rtx elt;
4152         tree t = NULL_TREE;
4153
4154         units = CONST_VECTOR_NUNITS (x);
4155
4156         /* Build a tree with vector elements.  */
4157         for (i = units - 1; i >= 0; --i)
4158           {
4159             elt = CONST_VECTOR_ELT (x, i);
4160             t = tree_cons (NULL_TREE, make_tree (type, elt), t);
4161           }
4162
4163         return build_vector (type, t);
4164       }
4165
4166     case PLUS:
4167       return fold (build (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4168                           make_tree (type, XEXP (x, 1))));
4169
4170     case MINUS:
4171       return fold (build (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4172                           make_tree (type, XEXP (x, 1))));
4173
4174     case NEG:
4175       return fold (build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0))));
4176
4177     case MULT:
4178       return fold (build (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4179                           make_tree (type, XEXP (x, 1))));
4180
4181     case ASHIFT:
4182       return fold (build (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4183                           make_tree (type, XEXP (x, 1))));
4184
4185     case LSHIFTRT:
4186       t = (*lang_hooks.types.unsigned_type) (type);
4187       return fold (convert (type,
4188                             build (RSHIFT_EXPR, t,
4189                                    make_tree (t, XEXP (x, 0)),
4190                                    make_tree (type, XEXP (x, 1)))));
4191
4192     case ASHIFTRT:
4193       t = (*lang_hooks.types.signed_type) (type);
4194       return fold (convert (type,
4195                             build (RSHIFT_EXPR, t,
4196                                    make_tree (t, XEXP (x, 0)),
4197                                    make_tree (type, XEXP (x, 1)))));
4198
4199     case DIV:
4200       if (TREE_CODE (type) != REAL_TYPE)
4201         t = (*lang_hooks.types.signed_type) (type);
4202       else
4203         t = type;
4204
4205       return fold (convert (type,
4206                             build (TRUNC_DIV_EXPR, t,
4207                                    make_tree (t, XEXP (x, 0)),
4208                                    make_tree (t, XEXP (x, 1)))));
4209     case UDIV:
4210       t = (*lang_hooks.types.unsigned_type) (type);
4211       return fold (convert (type,
4212                             build (TRUNC_DIV_EXPR, t,
4213                                    make_tree (t, XEXP (x, 0)),
4214                                    make_tree (t, XEXP (x, 1)))));
4215
4216     case SIGN_EXTEND:
4217     case ZERO_EXTEND:
4218       t = (*lang_hooks.types.type_for_mode) (GET_MODE (XEXP (x, 0)),
4219                                              GET_CODE (x) == ZERO_EXTEND);
4220       return fold (convert (type, make_tree (t, XEXP (x, 0))));
4221
4222    default:
4223       t = make_node (RTL_EXPR);
4224       TREE_TYPE (t) = type;
4225
4226       /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
4227          ptr_mode.  So convert.  */
4228       if (POINTER_TYPE_P (type))
4229         x = convert_memory_address (TYPE_MODE (type), x);
4230
4231       RTL_EXPR_RTL (t) = x;
4232       /* There are no insns to be output
4233          when this rtl_expr is used.  */
4234       RTL_EXPR_SEQUENCE (t) = 0;
4235       return t;
4236     }
4237 }
4238
4239 /* Check whether the multiplication X * MULT + ADD overflows.
4240    X, MULT and ADD must be CONST_*.
4241    MODE is the machine mode for the computation.
4242    X and MULT must have mode MODE.  ADD may have a different mode.
4243    So can X (defaults to same as MODE).
4244    UNSIGNEDP is nonzero to do unsigned multiplication.  */
4245
4246 bool
4247 const_mult_add_overflow_p (rtx x, rtx mult, rtx add, enum machine_mode mode, int unsignedp)
4248 {
4249   tree type, mult_type, add_type, result;
4250
4251   type = (*lang_hooks.types.type_for_mode) (mode, unsignedp);
4252
4253   /* In order to get a proper overflow indication from an unsigned
4254      type, we have to pretend that it's a sizetype.  */
4255   mult_type = type;
4256   if (unsignedp)
4257     {
4258       mult_type = copy_node (type);
4259       TYPE_IS_SIZETYPE (mult_type) = 1;
4260     }
4261
4262   add_type = (GET_MODE (add) == VOIDmode ? mult_type
4263               : (*lang_hooks.types.type_for_mode) (GET_MODE (add), unsignedp));
4264
4265   result = fold (build (PLUS_EXPR, mult_type,
4266                         fold (build (MULT_EXPR, mult_type,
4267                                      make_tree (mult_type, x),
4268                                      make_tree (mult_type, mult))),
4269                         make_tree (add_type, add)));
4270
4271   return TREE_CONSTANT_OVERFLOW (result);
4272 }
4273
4274 /* Return an rtx representing the value of X * MULT + ADD.
4275    TARGET is a suggestion for where to store the result (an rtx).
4276    MODE is the machine mode for the computation.
4277    X and MULT must have mode MODE.  ADD may have a different mode.
4278    So can X (defaults to same as MODE).
4279    UNSIGNEDP is nonzero to do unsigned multiplication.
4280    This may emit insns.  */
4281
4282 rtx
4283 expand_mult_add (rtx x, rtx target, rtx mult, rtx add, enum machine_mode mode,
4284                  int unsignedp)
4285 {
4286   tree type = (*lang_hooks.types.type_for_mode) (mode, unsignedp);
4287   tree add_type = (GET_MODE (add) == VOIDmode
4288                    ? type: (*lang_hooks.types.type_for_mode) (GET_MODE (add),
4289                                                               unsignedp));
4290   tree result =  fold (build (PLUS_EXPR, type,
4291                               fold (build (MULT_EXPR, type,
4292                                            make_tree (type, x),
4293                                            make_tree (type, mult))),
4294                               make_tree (add_type, add)));
4295
4296   return expand_expr (result, target, VOIDmode, 0);
4297 }
4298 \f
4299 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
4300    and returning TARGET.
4301
4302    If TARGET is 0, a pseudo-register or constant is returned.  */
4303
4304 rtx
4305 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
4306 {
4307   rtx tem = 0;
4308
4309   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
4310     tem = simplify_binary_operation (AND, mode, op0, op1);
4311   if (tem == 0)
4312     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
4313
4314   if (target == 0)
4315     target = tem;
4316   else if (tem != target)
4317     emit_move_insn (target, tem);
4318   return target;
4319 }
4320 \f
4321 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
4322    and storing in TARGET.  Normally return TARGET.
4323    Return 0 if that cannot be done.
4324
4325    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
4326    it is VOIDmode, they cannot both be CONST_INT.
4327
4328    UNSIGNEDP is for the case where we have to widen the operands
4329    to perform the operation.  It says to use zero-extension.
4330
4331    NORMALIZEP is 1 if we should convert the result to be either zero
4332    or one.  Normalize is -1 if we should convert the result to be
4333    either zero or -1.  If NORMALIZEP is zero, the result will be left
4334    "raw" out of the scc insn.  */
4335
4336 rtx
4337 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
4338                  enum machine_mode mode, int unsignedp, int normalizep)
4339 {
4340   rtx subtarget;
4341   enum insn_code icode;
4342   enum machine_mode compare_mode;
4343   enum machine_mode target_mode = GET_MODE (target);
4344   rtx tem;
4345   rtx last = get_last_insn ();
4346   rtx pattern, comparison;
4347
4348   /* ??? Ok to do this and then fail? */
4349   op0 = protect_from_queue (op0, 0);
4350   op1 = protect_from_queue (op1, 0);
4351
4352   if (unsignedp)
4353     code = unsigned_condition (code);
4354
4355   /* If one operand is constant, make it the second one.  Only do this
4356      if the other operand is not constant as well.  */
4357
4358   if (swap_commutative_operands_p (op0, op1))
4359     {
4360       tem = op0;
4361       op0 = op1;
4362       op1 = tem;
4363       code = swap_condition (code);
4364     }
4365
4366   if (mode == VOIDmode)
4367     mode = GET_MODE (op0);
4368
4369   /* For some comparisons with 1 and -1, we can convert this to
4370      comparisons with zero.  This will often produce more opportunities for
4371      store-flag insns.  */
4372
4373   switch (code)
4374     {
4375     case LT:
4376       if (op1 == const1_rtx)
4377         op1 = const0_rtx, code = LE;
4378       break;
4379     case LE:
4380       if (op1 == constm1_rtx)
4381         op1 = const0_rtx, code = LT;
4382       break;
4383     case GE:
4384       if (op1 == const1_rtx)
4385         op1 = const0_rtx, code = GT;
4386       break;
4387     case GT:
4388       if (op1 == constm1_rtx)
4389         op1 = const0_rtx, code = GE;
4390       break;
4391     case GEU:
4392       if (op1 == const1_rtx)
4393         op1 = const0_rtx, code = NE;
4394       break;
4395     case LTU:
4396       if (op1 == const1_rtx)
4397         op1 = const0_rtx, code = EQ;
4398       break;
4399     default:
4400       break;
4401     }
4402
4403   /* If we are comparing a double-word integer with zero, we can convert
4404      the comparison into one involving a single word.  */
4405   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
4406       && GET_MODE_CLASS (mode) == MODE_INT
4407       && op1 == const0_rtx
4408       && (GET_CODE (op0) != MEM || ! MEM_VOLATILE_P (op0)))
4409     {
4410       if (code == EQ || code == NE)
4411         {
4412           rtx op00, op01, op0both;
4413
4414           /* Do a logical OR of the two words and compare the result.  */
4415           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
4416           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
4417           op0both = expand_binop (word_mode, ior_optab, op00, op01,
4418                                   NULL_RTX, unsignedp, OPTAB_DIRECT);
4419           if (op0both != 0)
4420             return emit_store_flag (target, code, op0both, op1, word_mode,
4421                                     unsignedp, normalizep);
4422         }
4423       else if (code == LT || code == GE)
4424         {
4425           rtx op0h;
4426
4427           /* If testing the sign bit, can just test on high word.  */
4428           op0h = simplify_gen_subreg (word_mode, op0, mode,
4429                                       subreg_highpart_offset (word_mode, mode));
4430           return emit_store_flag (target, code, op0h, op1, word_mode,
4431                                   unsignedp, normalizep);
4432         }
4433     }
4434
4435   /* From now on, we won't change CODE, so set ICODE now.  */
4436   icode = setcc_gen_code[(int) code];
4437
4438   /* If this is A < 0 or A >= 0, we can do this by taking the ones
4439      complement of A (for GE) and shifting the sign bit to the low bit.  */
4440   if (op1 == const0_rtx && (code == LT || code == GE)
4441       && GET_MODE_CLASS (mode) == MODE_INT
4442       && (normalizep || STORE_FLAG_VALUE == 1
4443           || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4444               && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
4445                   == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
4446     {
4447       subtarget = target;
4448
4449       /* If the result is to be wider than OP0, it is best to convert it
4450          first.  If it is to be narrower, it is *incorrect* to convert it
4451          first.  */
4452       if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
4453         {
4454           op0 = protect_from_queue (op0, 0);
4455           op0 = convert_modes (target_mode, mode, op0, 0);
4456           mode = target_mode;
4457         }
4458
4459       if (target_mode != mode)
4460         subtarget = 0;
4461
4462       if (code == GE)
4463         op0 = expand_unop (mode, one_cmpl_optab, op0,
4464                            ((STORE_FLAG_VALUE == 1 || normalizep)
4465                             ? 0 : subtarget), 0);
4466
4467       if (STORE_FLAG_VALUE == 1 || normalizep)
4468         /* If we are supposed to produce a 0/1 value, we want to do
4469            a logical shift from the sign bit to the low-order bit; for
4470            a -1/0 value, we do an arithmetic shift.  */
4471         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
4472                             size_int (GET_MODE_BITSIZE (mode) - 1),
4473                             subtarget, normalizep != -1);
4474
4475       if (mode != target_mode)
4476         op0 = convert_modes (target_mode, mode, op0, 0);
4477
4478       return op0;
4479     }
4480
4481   if (icode != CODE_FOR_nothing)
4482     {
4483       insn_operand_predicate_fn pred;
4484
4485       /* We think we may be able to do this with a scc insn.  Emit the
4486          comparison and then the scc insn.
4487
4488          compare_from_rtx may call emit_queue, which would be deleted below
4489          if the scc insn fails.  So call it ourselves before setting LAST.
4490          Likewise for do_pending_stack_adjust.  */
4491
4492       emit_queue ();
4493       do_pending_stack_adjust ();
4494       last = get_last_insn ();
4495
4496       comparison
4497         = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX);
4498       if (GET_CODE (comparison) == CONST_INT)
4499         return (comparison == const0_rtx ? const0_rtx
4500                 : normalizep == 1 ? const1_rtx
4501                 : normalizep == -1 ? constm1_rtx
4502                 : const_true_rtx);
4503
4504       /* The code of COMPARISON may not match CODE if compare_from_rtx
4505          decided to swap its operands and reverse the original code.
4506
4507          We know that compare_from_rtx returns either a CONST_INT or
4508          a new comparison code, so it is safe to just extract the
4509          code from COMPARISON.  */
4510       code = GET_CODE (comparison);
4511
4512       /* Get a reference to the target in the proper mode for this insn.  */
4513       compare_mode = insn_data[(int) icode].operand[0].mode;
4514       subtarget = target;
4515       pred = insn_data[(int) icode].operand[0].predicate;
4516       if (preserve_subexpressions_p ()
4517           || ! (*pred) (subtarget, compare_mode))
4518         subtarget = gen_reg_rtx (compare_mode);
4519
4520       pattern = GEN_FCN (icode) (subtarget);
4521       if (pattern)
4522         {
4523           emit_insn (pattern);
4524
4525           /* If we are converting to a wider mode, first convert to
4526              TARGET_MODE, then normalize.  This produces better combining
4527              opportunities on machines that have a SIGN_EXTRACT when we are
4528              testing a single bit.  This mostly benefits the 68k.
4529
4530              If STORE_FLAG_VALUE does not have the sign bit set when
4531              interpreted in COMPARE_MODE, we can do this conversion as
4532              unsigned, which is usually more efficient.  */
4533           if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (compare_mode))
4534             {
4535               convert_move (target, subtarget,
4536                             (GET_MODE_BITSIZE (compare_mode)
4537                              <= HOST_BITS_PER_WIDE_INT)
4538                             && 0 == (STORE_FLAG_VALUE
4539                                      & ((HOST_WIDE_INT) 1
4540                                         << (GET_MODE_BITSIZE (compare_mode) -1))));
4541               op0 = target;
4542               compare_mode = target_mode;
4543             }
4544           else
4545             op0 = subtarget;
4546
4547           /* If we want to keep subexpressions around, don't reuse our
4548              last target.  */
4549
4550           if (preserve_subexpressions_p ())
4551             subtarget = 0;
4552
4553           /* Now normalize to the proper value in COMPARE_MODE.  Sometimes
4554              we don't have to do anything.  */
4555           if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
4556             ;
4557           /* STORE_FLAG_VALUE might be the most negative number, so write
4558              the comparison this way to avoid a compiler-time warning.  */
4559           else if (- normalizep == STORE_FLAG_VALUE)
4560             op0 = expand_unop (compare_mode, neg_optab, op0, subtarget, 0);
4561
4562           /* We don't want to use STORE_FLAG_VALUE < 0 below since this
4563              makes it hard to use a value of just the sign bit due to
4564              ANSI integer constant typing rules.  */
4565           else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_WIDE_INT
4566                    && (STORE_FLAG_VALUE
4567                        & ((HOST_WIDE_INT) 1
4568                           << (GET_MODE_BITSIZE (compare_mode) - 1))))
4569             op0 = expand_shift (RSHIFT_EXPR, compare_mode, op0,
4570                                 size_int (GET_MODE_BITSIZE (compare_mode) - 1),
4571                                 subtarget, normalizep == 1);
4572           else if (STORE_FLAG_VALUE & 1)
4573             {
4574               op0 = expand_and (compare_mode, op0, const1_rtx, subtarget);
4575               if (normalizep == -1)
4576                 op0 = expand_unop (compare_mode, neg_optab, op0, op0, 0);
4577             }
4578           else
4579             abort ();
4580
4581           /* If we were converting to a smaller mode, do the
4582              conversion now.  */
4583           if (target_mode != compare_mode)
4584             {
4585               convert_move (target, op0, 0);
4586               return target;
4587             }
4588           else
4589             return op0;
4590         }
4591     }
4592
4593   delete_insns_since (last);
4594
4595   /* If expensive optimizations, use different pseudo registers for each
4596      insn, instead of reusing the same pseudo.  This leads to better CSE,
4597      but slows down the compiler, since there are more pseudos */
4598   subtarget = (!flag_expensive_optimizations
4599                && (target_mode == mode)) ? target : NULL_RTX;
4600
4601   /* If we reached here, we can't do this with a scc insn.  However, there
4602      are some comparisons that can be done directly.  For example, if
4603      this is an equality comparison of integers, we can try to exclusive-or
4604      (or subtract) the two operands and use a recursive call to try the
4605      comparison with zero.  Don't do any of these cases if branches are
4606      very cheap.  */
4607
4608   if (BRANCH_COST > 0
4609       && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
4610       && op1 != const0_rtx)
4611     {
4612       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
4613                           OPTAB_WIDEN);
4614
4615       if (tem == 0)
4616         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
4617                             OPTAB_WIDEN);
4618       if (tem != 0)
4619         tem = emit_store_flag (target, code, tem, const0_rtx,
4620                                mode, unsignedp, normalizep);
4621       if (tem == 0)
4622         delete_insns_since (last);
4623       return tem;
4624     }
4625
4626   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
4627      the constant zero.  Reject all other comparisons at this point.  Only
4628      do LE and GT if branches are expensive since they are expensive on
4629      2-operand machines.  */
4630
4631   if (BRANCH_COST == 0
4632       || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
4633       || (code != EQ && code != NE
4634           && (BRANCH_COST <= 1 || (code != LE && code != GT))))
4635     return 0;
4636
4637   /* See what we need to return.  We can only return a 1, -1, or the
4638      sign bit.  */
4639
4640   if (normalizep == 0)
4641     {
4642       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
4643         normalizep = STORE_FLAG_VALUE;
4644
4645       else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4646                && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
4647                    == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
4648         ;
4649       else
4650         return 0;
4651     }
4652
4653   /* Try to put the result of the comparison in the sign bit.  Assume we can't
4654      do the necessary operation below.  */
4655
4656   tem = 0;
4657
4658   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
4659      the sign bit set.  */
4660
4661   if (code == LE)
4662     {
4663       /* This is destructive, so SUBTARGET can't be OP0.  */
4664       if (rtx_equal_p (subtarget, op0))
4665         subtarget = 0;
4666
4667       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
4668                           OPTAB_WIDEN);
4669       if (tem)
4670         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
4671                             OPTAB_WIDEN);
4672     }
4673
4674   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
4675      number of bits in the mode of OP0, minus one.  */
4676
4677   if (code == GT)
4678     {
4679       if (rtx_equal_p (subtarget, op0))
4680         subtarget = 0;
4681
4682       tem = expand_shift (RSHIFT_EXPR, mode, op0,
4683                           size_int (GET_MODE_BITSIZE (mode) - 1),
4684                           subtarget, 0);
4685       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
4686                           OPTAB_WIDEN);
4687     }
4688
4689   if (code == EQ || code == NE)
4690     {
4691       /* For EQ or NE, one way to do the comparison is to apply an operation
4692          that converts the operand into a positive number if it is nonzero
4693          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
4694          for NE we negate.  This puts the result in the sign bit.  Then we
4695          normalize with a shift, if needed.
4696
4697          Two operations that can do the above actions are ABS and FFS, so try
4698          them.  If that doesn't work, and MODE is smaller than a full word,
4699          we can use zero-extension to the wider mode (an unsigned conversion)
4700          as the operation.  */
4701
4702       /* Note that ABS doesn't yield a positive number for INT_MIN, but
4703          that is compensated by the subsequent overflow when subtracting
4704          one / negating.  */
4705
4706       if (abs_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
4707         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
4708       else if (ffs_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
4709         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
4710       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
4711         {
4712           op0 = protect_from_queue (op0, 0);
4713           tem = convert_modes (word_mode, mode, op0, 1);
4714           mode = word_mode;
4715         }
4716
4717       if (tem != 0)
4718         {
4719           if (code == EQ)
4720             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
4721                                 0, OPTAB_WIDEN);
4722           else
4723             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
4724         }
4725
4726       /* If we couldn't do it that way, for NE we can "or" the two's complement
4727          of the value with itself.  For EQ, we take the one's complement of
4728          that "or", which is an extra insn, so we only handle EQ if branches
4729          are expensive.  */
4730
4731       if (tem == 0 && (code == NE || BRANCH_COST > 1))
4732         {
4733           if (rtx_equal_p (subtarget, op0))
4734             subtarget = 0;
4735
4736           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
4737           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
4738                               OPTAB_WIDEN);
4739
4740           if (tem && code == EQ)
4741             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
4742         }
4743     }
4744
4745   if (tem && normalizep)
4746     tem = expand_shift (RSHIFT_EXPR, mode, tem,
4747                         size_int (GET_MODE_BITSIZE (mode) - 1),
4748                         subtarget, normalizep == 1);
4749
4750   if (tem)
4751     {
4752       if (GET_MODE (tem) != target_mode)
4753         {
4754           convert_move (target, tem, 0);
4755           tem = target;
4756         }
4757       else if (!subtarget)
4758         {
4759           emit_move_insn (target, tem);
4760           tem = target;
4761         }
4762     }
4763   else
4764     delete_insns_since (last);
4765
4766   return tem;
4767 }
4768
4769 /* Like emit_store_flag, but always succeeds.  */
4770
4771 rtx
4772 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
4773                        enum machine_mode mode, int unsignedp, int normalizep)
4774 {
4775   rtx tem, label;
4776
4777   /* First see if emit_store_flag can do the job.  */
4778   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
4779   if (tem != 0)
4780     return tem;
4781
4782   if (normalizep == 0)
4783     normalizep = 1;
4784
4785   /* If this failed, we have to do this with set/compare/jump/set code.  */
4786
4787   if (GET_CODE (target) != REG
4788       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
4789     target = gen_reg_rtx (GET_MODE (target));
4790
4791   emit_move_insn (target, const1_rtx);
4792   label = gen_label_rtx ();
4793   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
4794                            NULL_RTX, label);
4795
4796   emit_move_insn (target, const0_rtx);
4797   emit_label (label);
4798
4799   return target;
4800 }
4801 \f
4802 /* Perform possibly multi-word comparison and conditional jump to LABEL
4803    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE
4804
4805    The algorithm is based on the code in expr.c:do_jump.
4806
4807    Note that this does not perform a general comparison.  Only variants
4808    generated within expmed.c are correctly handled, others abort (but could
4809    be handled if needed).  */
4810
4811 static void
4812 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
4813                  rtx label)
4814 {
4815   /* If this mode is an integer too wide to compare properly,
4816      compare word by word.  Rely on cse to optimize constant cases.  */
4817
4818   if (GET_MODE_CLASS (mode) == MODE_INT
4819       && ! can_compare_p (op, mode, ccp_jump))
4820     {
4821       rtx label2 = gen_label_rtx ();
4822
4823       switch (op)
4824         {
4825         case LTU:
4826           do_jump_by_parts_greater_rtx (mode, 1, arg2, arg1, label2, label);
4827           break;
4828
4829         case LEU:
4830           do_jump_by_parts_greater_rtx (mode, 1, arg1, arg2, label, label2);
4831           break;
4832
4833         case LT:
4834           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label2, label);
4835           break;
4836
4837         case GT:
4838           do_jump_by_parts_greater_rtx (mode, 0, arg1, arg2, label2, label);
4839           break;
4840
4841         case GE:
4842           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label, label2);
4843           break;
4844
4845           /* do_jump_by_parts_equality_rtx compares with zero.  Luckily
4846              that's the only equality operations we do */
4847         case EQ:
4848           if (arg2 != const0_rtx || mode != GET_MODE(arg1))
4849             abort ();
4850           do_jump_by_parts_equality_rtx (arg1, label2, label);
4851           break;
4852
4853         case NE:
4854           if (arg2 != const0_rtx || mode != GET_MODE(arg1))
4855             abort ();
4856           do_jump_by_parts_equality_rtx (arg1, label, label2);
4857           break;
4858
4859         default:
4860           abort ();
4861         }
4862
4863       emit_label (label2);
4864     }
4865   else
4866     emit_cmp_and_jump_insns (arg1, arg2, op, NULL_RTX, mode, 0, label);
4867 }