gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000 Free Software Foundation, Inc.
   5
   6 This file is part of GNU CC.
   7
   8 GNU CC is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation; either version 2, or (at your option)
  11 any later version.
  12
  13 GNU CC is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU CC; see the file COPYING.  If not, write to
  20 the Free Software Foundation, 59 Temple Place - Suite 330,
  21 Boston, MA 02111-1307, USA.  */
  22
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "toplev.h"
  27 #include "rtl.h"
  28 #include "tree.h"
  29 #include "tm_p.h"
  30 #include "flags.h"
  31 #include "insn-flags.h"
  32 #include "insn-codes.h"
  33 #include "insn-config.h"
  34 #include "expr.h"
  35 #include "real.h"
  36 #include "recog.h"
  37
  38 static void store_fixed_bit_field       PARAMS ((rtx, unsigned HOST_WIDE_INT,
  39                                                  unsigned HOST_WIDE_INT,
  40                                                  unsigned HOST_WIDE_INT, rtx,
  41                                                  unsigned int));
  42 static void store_split_bit_field       PARAMS ((rtx, unsigned HOST_WIDE_INT,
  43                                                  unsigned HOST_WIDE_INT, rtx,
  44                                                  unsigned int));
  45 static rtx extract_fixed_bit_field      PARAMS ((enum machine_mode, rtx,
  46                                                  unsigned HOST_WIDE_INT,
  47                                                  unsigned HOST_WIDE_INT,
  48                                                  unsigned HOST_WIDE_INT,
  49                                                  rtx, int, unsigned int));
  50 static rtx mask_rtx                     PARAMS ((enum machine_mode, int,
  51                                                  int, int));
  52 static rtx lshift_value                 PARAMS ((enum machine_mode, rtx,
  53                                                  int, int));
  54 static rtx extract_split_bit_field      PARAMS ((rtx, unsigned HOST_WIDE_INT,
  55                                                  unsigned HOST_WIDE_INT, int,
  56                                                  unsigned int));
  57 static void do_cmp_and_jump             PARAMS ((rtx, rtx, enum rtx_code,
  58                                                  enum machine_mode, rtx));
  59
  60 /* Non-zero means divides or modulus operations are relatively cheap for
  61    powers of two, so don't use branches; emit the operation instead.
  62    Usually, this will mean that the MD file will emit non-branch
  63    sequences.  */
  64
  65 static int sdiv_pow2_cheap, smod_pow2_cheap;
  66
  67 #ifndef SLOW_UNALIGNED_ACCESS
  68 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  69 #endif
  70
  71 /* For compilers that support multiple targets with different word sizes,
  72    MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
  73    is the H8/300(H) compiler.  */
  74
  75 #ifndef MAX_BITS_PER_WORD
  76 #define MAX_BITS_PER_WORD BITS_PER_WORD
  77 #endif
  78
  79 /* Cost of various pieces of RTL.  Note that some of these are indexed by
  80    shift count and some by mode.  */
  81 static int add_cost, negate_cost, zero_cost;
  82 static int shift_cost[MAX_BITS_PER_WORD];
  83 static int shiftadd_cost[MAX_BITS_PER_WORD];
  84 static int shiftsub_cost[MAX_BITS_PER_WORD];
  85 static int mul_cost[NUM_MACHINE_MODES];
  86 static int div_cost[NUM_MACHINE_MODES];
  87 static int mul_widen_cost[NUM_MACHINE_MODES];
  88 static int mul_highpart_cost[NUM_MACHINE_MODES];
  89
  90 void
  91 init_expmed ()
  92 {
  93   /* This is "some random pseudo register" for purposes of calling recog
  94      to see what insns exist.  */
  95   rtx reg = gen_rtx_REG (word_mode, 10000);
  96   rtx shift_insn, shiftadd_insn, shiftsub_insn;
  97   int dummy;
  98   int m;
  99   enum machine_mode mode, wider_mode;
 100
 101   start_sequence ();
 102
 103   reg = gen_rtx_REG (word_mode, 10000);
 104
 105   zero_cost = rtx_cost (const0_rtx, 0);
 106   add_cost = rtx_cost (gen_rtx_PLUS (word_mode, reg, reg), SET);
 107
 108   shift_insn = emit_insn (gen_rtx_SET (VOIDmode, reg,
 109                                        gen_rtx_ASHIFT (word_mode, reg,
 110                                                        const0_rtx)));
 111
 112   shiftadd_insn
 113     = emit_insn (gen_rtx_SET (VOIDmode, reg,
 114                               gen_rtx_PLUS (word_mode,
 115                                             gen_rtx_MULT (word_mode,
 116                                                           reg, const0_rtx),
 117                                             reg)));
 118
 119   shiftsub_insn
 120     = emit_insn (gen_rtx_SET (VOIDmode, reg,
 121                               gen_rtx_MINUS (word_mode,
 122                                              gen_rtx_MULT (word_mode,
 123                                                            reg, const0_rtx),
 124                                              reg)));
 125
 126   init_recog ();
 127
 128   shift_cost[0] = 0;
 129   shiftadd_cost[0] = shiftsub_cost[0] = add_cost;
 130
 131   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 132     {
 133       shift_cost[m] = shiftadd_cost[m] = shiftsub_cost[m] = 32000;
 134
 135       XEXP (SET_SRC (PATTERN (shift_insn)), 1) = GEN_INT (m);
 136       if (recog (PATTERN (shift_insn), shift_insn, &dummy) >= 0)
 137         shift_cost[m] = rtx_cost (SET_SRC (PATTERN (shift_insn)), SET);
 138
 139       XEXP (XEXP (SET_SRC (PATTERN (shiftadd_insn)), 0), 1)
 140         = GEN_INT ((HOST_WIDE_INT) 1 << m);
 141       if (recog (PATTERN (shiftadd_insn), shiftadd_insn, &dummy) >= 0)
 142         shiftadd_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftadd_insn)), SET);
 143
 144       XEXP (XEXP (SET_SRC (PATTERN (shiftsub_insn)), 0), 1)
 145         = GEN_INT ((HOST_WIDE_INT) 1 << m);
 146       if (recog (PATTERN (shiftsub_insn), shiftsub_insn, &dummy) >= 0)
 147         shiftsub_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftsub_insn)), SET);
 148     }
 149
 150   negate_cost = rtx_cost (gen_rtx_NEG (word_mode, reg), SET);
 151
 152   sdiv_pow2_cheap
 153     = (rtx_cost (gen_rtx_DIV (word_mode, reg, GEN_INT (32)), SET)
 154        <= 2 * add_cost);
 155   smod_pow2_cheap
 156     = (rtx_cost (gen_rtx_MOD (word_mode, reg, GEN_INT (32)), SET)
 157        <= 2 * add_cost);
 158
 159   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 160        mode != VOIDmode;
 161        mode = GET_MODE_WIDER_MODE (mode))
 162     {
 163       reg = gen_rtx_REG (mode, 10000);
 164       div_cost[(int) mode] = rtx_cost (gen_rtx_UDIV (mode, reg, reg), SET);
 165       mul_cost[(int) mode] = rtx_cost (gen_rtx_MULT (mode, reg, reg), SET);
 166       wider_mode = GET_MODE_WIDER_MODE (mode);
 167       if (wider_mode != VOIDmode)
 168         {
 169           mul_widen_cost[(int) wider_mode]
 170             = rtx_cost (gen_rtx_MULT (wider_mode,
 171                                       gen_rtx_ZERO_EXTEND (wider_mode, reg),
 172                                       gen_rtx_ZERO_EXTEND (wider_mode, reg)),
 173                         SET);
 174           mul_highpart_cost[(int) mode]
 175             = rtx_cost (gen_rtx_TRUNCATE
 176                         (mode,
 177                          gen_rtx_LSHIFTRT (wider_mode,
 178                                            gen_rtx_MULT (wider_mode,
 179                                                          gen_rtx_ZERO_EXTEND
 180                                                          (wider_mode, reg),
 181                                                          gen_rtx_ZERO_EXTEND
 182                                                          (wider_mode, reg)),
 183                                            GEN_INT (GET_MODE_BITSIZE (mode)))),
 184                         SET);
 185         }
 186     }
 187
 188   end_sequence ();
 189 }
 190
 191 /* Return an rtx representing minus the value of X.
 192    MODE is the intended mode of the result,
 193    useful if X is a CONST_INT.  */
 194
 195 rtx
 196 negate_rtx (mode, x)
 197      enum machine_mode mode;
 198      rtx x;
 199 {
 200   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 201
 202   if (result == 0)
 203     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 204
 205   return result;
 206 }
 207 \f
 208 /* Generate code to store value from rtx VALUE
 209    into a bit-field within structure STR_RTX
 210    containing BITSIZE bits starting at bit BITNUM.
 211    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
 212    ALIGN is the alignment that STR_RTX is known to have.
 213    TOTAL_SIZE is the size of the structure in bytes, or -1 if varying.  */
 214
 215 /* ??? Note that there are two different ideas here for how
 216    to determine the size to count bits within, for a register.
 217    One is BITS_PER_WORD, and the other is the size of operand 3
 218    of the insv pattern.
 219
 220    If operand 3 of the insv pattern is VOIDmode, then we will use BITS_PER_WORD
 221    else, we use the mode of operand 3.  */
 222
 223 rtx
 224 store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, align, total_size)
 225      rtx str_rtx;
 226      unsigned HOST_WIDE_INT bitsize;
 227      unsigned HOST_WIDE_INT bitnum;
 228      enum machine_mode fieldmode;
 229      rtx value;
 230      unsigned int align;
 231      HOST_WIDE_INT total_size;
 232 {
 233   unsigned int unit
 234     = (GET_CODE (str_rtx) == MEM) ? BITS_PER_UNIT : BITS_PER_WORD;
 235   unsigned HOST_WIDE_INT offset = bitnum / unit;
 236   unsigned HOST_WIDE_INT bitpos = bitnum % unit;
 237   register rtx op0 = str_rtx;
 238 #ifdef HAVE_insv
 239   unsigned HOST_WIDE_INT insv_bitsize;
 240   enum machine_mode op_mode;
 241
 242   op_mode = insn_data[(int) CODE_FOR_insv].operand[3].mode;
 243   if (op_mode == VOIDmode)
 244     op_mode = word_mode;
 245   insv_bitsize = GET_MODE_BITSIZE (op_mode);
 246 #endif
 247
 248   if (GET_CODE (str_rtx) == MEM && ! MEM_IN_STRUCT_P (str_rtx))
 249     abort ();
 250
 251   /* Discount the part of the structure before the desired byte.
 252      We need to know how many bytes are safe to reference after it.  */
 253   if (total_size >= 0)
 254     total_size -= (bitpos / BIGGEST_ALIGNMENT
 255                    * (BIGGEST_ALIGNMENT / BITS_PER_UNIT));
 256
 257   while (GET_CODE (op0) == SUBREG)
 258     {
 259       /* The following line once was done only if WORDS_BIG_ENDIAN,
 260          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 261          meaningful at a much higher level; when structures are copied
 262          between memory and regs, the higher-numbered regs
 263          always get higher addresses.  */
 264       offset += SUBREG_WORD (op0);
 265       /* We used to adjust BITPOS here, but now we do the whole adjustment
 266          right after the loop.  */
 267       op0 = SUBREG_REG (op0);
 268     }
 269
 270   /* Make sure we are playing with integral modes.  Pun with subregs
 271      if we aren't.  */
 272   {
 273     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 274     if (imode != GET_MODE (op0))
 275       {
 276         if (GET_CODE (op0) == MEM)
 277           op0 = change_address (op0, imode, NULL_RTX);
 278         else if (imode != BLKmode)
 279           op0 = gen_lowpart (imode, op0);
 280         else
 281           abort ();
 282       }
 283   }
 284
 285   /* If OP0 is a register, BITPOS must count within a word.
 286      But as we have it, it counts within whatever size OP0 now has.
 287      On a bigendian machine, these are not the same, so convert.  */
 288   if (BYTES_BIG_ENDIAN
 289       && GET_CODE (op0) != MEM
 290       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 291     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 292
 293   value = protect_from_queue (value, 0);
 294
 295   if (flag_force_mem)
 296     value = force_not_mem (value);
 297
 298   if ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 299        || (GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode)
 300            && GET_MODE_SIZE (fieldmode) != 0))
 301       && (GET_CODE (op0) != MEM
 302           || ! SLOW_UNALIGNED_ACCESS (fieldmode, align)
 303           || (offset * BITS_PER_UNIT % bitsize == 0
 304               && align % GET_MODE_BITSIZE (fieldmode) == 0))
 305       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 306       && bitsize == GET_MODE_BITSIZE (fieldmode))
 307     {
 308       /* Storing in a full-word or multi-word field in a register
 309          can be done with just SUBREG.  Also, storing in the entire object
 310          can be done with just SUBREG.  */
 311       if (GET_MODE (op0) != fieldmode)
 312         {
 313           if (GET_CODE (op0) == SUBREG)
 314             {
 315               if (GET_MODE (SUBREG_REG (op0)) == fieldmode
 316                   || GET_MODE_CLASS (fieldmode) == MODE_INT
 317                   || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT)
 318                 op0 = SUBREG_REG (op0);
 319               else
 320                 /* Else we've got some float mode source being extracted into
 321                    a different float mode destination -- this combination of
 322                    subregs results in Severe Tire Damage.  */
 323                 abort ();
 324             }
 325           if (GET_CODE (op0) == REG)
 326             op0 = gen_rtx_SUBREG (fieldmode, op0, offset);
 327           else
 328             op0 = change_address (op0, fieldmode,
 329                                   plus_constant (XEXP (op0, 0), offset));
 330         }
 331       emit_move_insn (op0, value);
 332       return value;
 333     }
 334
 335   /* Storing an lsb-aligned field in a register
 336      can be done with a movestrict instruction.  */
 337
 338   if (GET_CODE (op0) != MEM
 339       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 340       && bitsize == GET_MODE_BITSIZE (fieldmode)
 341       && (movstrict_optab->handlers[(int) fieldmode].insn_code
 342           != CODE_FOR_nothing))
 343     {
 344       int icode = movstrict_optab->handlers[(int) fieldmode].insn_code;
 345
 346       /* Get appropriate low part of the value being stored.  */
 347       if (GET_CODE (value) == CONST_INT || GET_CODE (value) == REG)
 348         value = gen_lowpart (fieldmode, value);
 349       else if (!(GET_CODE (value) == SYMBOL_REF
 350                  || GET_CODE (value) == LABEL_REF
 351                  || GET_CODE (value) == CONST))
 352         value = convert_to_mode (fieldmode, value, 0);
 353
 354       if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
 355         value = copy_to_mode_reg (fieldmode, value);
 356
 357       if (GET_CODE (op0) == SUBREG)
 358         {
 359           if (GET_MODE (SUBREG_REG (op0)) == fieldmode
 360               || GET_MODE_CLASS (fieldmode) == MODE_INT
 361               || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT)
 362             op0 = SUBREG_REG (op0);
 363           else
 364             /* Else we've got some float mode source being extracted into
 365                a different float mode destination -- this combination of
 366                subregs results in Severe Tire Damage.  */
 367             abort ();
 368         }
 369
 370       emit_insn (GEN_FCN (icode)
 371                  (gen_rtx_SUBREG (fieldmode, op0, offset), value));
 372
 373       return value;
 374     }
 375
 376   /* Handle fields bigger than a word.  */
 377
 378   if (bitsize > BITS_PER_WORD)
 379     {
 380       /* Here we transfer the words of the field
 381          in the order least significant first.
 382          This is because the most significant word is the one which may
 383          be less than full.
 384          However, only do that if the value is not BLKmode.  */
 385
 386       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 387       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 388       unsigned int i;
 389
 390       /* This is the mode we must force value to, so that there will be enough
 391          subwords to extract.  Note that fieldmode will often (always?) be
 392          VOIDmode, because that is what store_field uses to indicate that this
 393          is a bit field, but passing VOIDmode to operand_subword_force will
 394          result in an abort.  */
 395       fieldmode = mode_for_size (nwords * BITS_PER_WORD, MODE_INT, 0);
 396
 397       for (i = 0; i < nwords; i++)
 398         {
 399           /* If I is 0, use the low-order word in both field and target;
 400              if I is 1, use the next to lowest word; and so on.  */
 401           unsigned int wordnum = (backwards ? nwords - i - 1 : i);
 402           unsigned int bit_offset = (backwards
 403                             ? MAX ((int) bitsize - ((int) i + 1)
 404                                    * BITS_PER_WORD,
 405                                    0)
 406                             : (int) i * BITS_PER_WORD);
 407
 408           store_bit_field (op0, MIN (BITS_PER_WORD,
 409                                      bitsize - i * BITS_PER_WORD),
 410                            bitnum + bit_offset, word_mode,
 411                            operand_subword_force (value, wordnum,
 412                                                   (GET_MODE (value) == VOIDmode
 413                                                    ? fieldmode
 414                                                    : GET_MODE (value))),
 415                            align, total_size);
 416         }
 417       return value;
 418     }
 419
 420   /* From here on we can assume that the field to be stored in is
 421      a full-word (whatever type that is), since it is shorter than a word.  */
 422
 423   /* OFFSET is the number of words or bytes (UNIT says which)
 424      from STR_RTX to the first word or byte containing part of the field.  */
 425
 426   if (GET_CODE (op0) != MEM)
 427     {
 428       if (offset != 0
 429           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 430         {
 431           if (GET_CODE (op0) != REG)
 432             {
 433               /* Since this is a destination (lvalue), we can't copy it to a
 434                  pseudo.  We can trivially remove a SUBREG that does not
 435                  change the size of the operand.  Such a SUBREG may have been
 436                  added above.  Otherwise, abort.  */
 437               if (GET_CODE (op0) == SUBREG
 438                   && (GET_MODE_SIZE (GET_MODE (op0))
 439                       == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))))
 440                 op0 = SUBREG_REG (op0);
 441               else
 442                 abort ();
 443             }
 444           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 445                                 op0, offset);
 446         }
 447       offset = 0;
 448     }
 449   else
 450     {
 451       op0 = protect_from_queue (op0, 1);
 452     }
 453
 454   /* If VALUE is a floating-point mode, access it as an integer of the
 455      corresponding size.  This can occur on a machine with 64 bit registers
 456      that uses SFmode for float.  This can also occur for unaligned float
 457      structure fields.  */
 458   if (GET_MODE_CLASS (GET_MODE (value)) == MODE_FLOAT)
 459     {
 460       if (GET_CODE (value) != REG)
 461         value = copy_to_reg (value);
 462       value = gen_rtx_SUBREG (word_mode, value, 0);
 463     }
 464
 465   /* Now OFFSET is nonzero only if OP0 is memory
 466      and is therefore always measured in bytes.  */
 467
 468 #ifdef HAVE_insv
 469   if (HAVE_insv
 470       && GET_MODE (value) != BLKmode
 471       && !(bitsize == 1 && GET_CODE (value) == CONST_INT)
 472       /* Ensure insv's size is wide enough for this field.  */
 473       && (insv_bitsize >= bitsize)
 474       && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 475             && (bitsize + bitpos > insv_bitsize)))
 476     {
 477       int xbitpos = bitpos;
 478       rtx value1;
 479       rtx xop0 = op0;
 480       rtx last = get_last_insn ();
 481       rtx pat;
 482       enum machine_mode maxmode;
 483       int save_volatile_ok = volatile_ok;
 484
 485       maxmode = insn_data[(int) CODE_FOR_insv].operand[3].mode;
 486       if (maxmode == VOIDmode)
 487         maxmode = word_mode;
 488
 489       volatile_ok = 1;
 490
 491       /* If this machine's insv can only insert into a register, copy OP0
 492          into a register and save it back later.  */
 493       /* This used to check flag_force_mem, but that was a serious
 494          de-optimization now that flag_force_mem is enabled by -O2.  */
 495       if (GET_CODE (op0) == MEM
 496           && ! ((*insn_data[(int) CODE_FOR_insv].operand[0].predicate)
 497                 (op0, VOIDmode)))
 498         {
 499           rtx tempreg;
 500           enum machine_mode bestmode;
 501
 502           /* Get the mode to use for inserting into this field.  If OP0 is
 503              BLKmode, get the smallest mode consistent with the alignment. If
 504              OP0 is a non-BLKmode object that is no wider than MAXMODE, use its
 505              mode. Otherwise, use the smallest mode containing the field.  */
 506
 507           if (GET_MODE (op0) == BLKmode
 508               || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (maxmode))
 509             bestmode
 510               = get_best_mode (bitsize, bitnum, align, maxmode,
 511                                MEM_VOLATILE_P (op0));
 512           else
 513             bestmode = GET_MODE (op0);
 514
 515           if (bestmode == VOIDmode
 516               || (SLOW_UNALIGNED_ACCESS (bestmode, align)
 517                   && GET_MODE_BITSIZE (bestmode) > align))
 518             goto insv_loses;
 519
 520           /* Adjust address to point to the containing unit of that mode.  */
 521           unit = GET_MODE_BITSIZE (bestmode);
 522           /* Compute offset as multiple of this unit, counting in bytes.  */
 523           offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 524           bitpos = bitnum % unit;
 525           op0 = change_address (op0, bestmode,
 526                                 plus_constant (XEXP (op0, 0), offset));
 527
 528           /* Fetch that unit, store the bitfield in it, then store
 529              the unit.  */
 530           tempreg = copy_to_reg (op0);
 531           store_bit_field (tempreg, bitsize, bitpos, fieldmode, value,
 532                            align, total_size);
 533           emit_move_insn (op0, tempreg);
 534           return value;
 535         }
 536       volatile_ok = save_volatile_ok;
 537
 538       /* Add OFFSET into OP0's address.  */
 539       if (GET_CODE (xop0) == MEM)
 540         xop0 = change_address (xop0, byte_mode,
 541                                plus_constant (XEXP (xop0, 0), offset));
 542
 543       /* If xop0 is a register, we need it in MAXMODE
 544          to make it acceptable to the format of insv.  */
 545       if (GET_CODE (xop0) == SUBREG)
 546         /* We can't just change the mode, because this might clobber op0,
 547            and we will need the original value of op0 if insv fails.  */
 548         xop0 = gen_rtx_SUBREG (maxmode, SUBREG_REG (xop0), SUBREG_WORD (xop0));
 549       if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
 550         xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
 551
 552       /* On big-endian machines, we count bits from the most significant.
 553          If the bit field insn does not, we must invert.  */
 554
 555       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 556         xbitpos = unit - bitsize - xbitpos;
 557
 558       /* We have been counting XBITPOS within UNIT.
 559          Count instead within the size of the register.  */
 560       if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
 561         xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
 562
 563       unit = GET_MODE_BITSIZE (maxmode);
 564
 565       /* Convert VALUE to maxmode (which insv insn wants) in VALUE1.  */
 566       value1 = value;
 567       if (GET_MODE (value) != maxmode)
 568         {
 569           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 570             {
 571               /* Optimization: Don't bother really extending VALUE
 572                  if it has all the bits we will actually use.  However,
 573                  if we must narrow it, be sure we do it correctly.  */
 574
 575               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (maxmode))
 576                 {
 577                   /* Avoid making subreg of a subreg, or of a mem.  */
 578                   if (GET_CODE (value1) != REG)
 579                     value1 = copy_to_reg (value1);
 580                   value1 = gen_rtx_SUBREG (maxmode, value1, 0);
 581                 }
 582               else
 583                 value1 = gen_lowpart (maxmode, value1);
 584             }
 585           else if (!CONSTANT_P (value))
 586             /* Parse phase is supposed to make VALUE's data type
 587                match that of the component reference, which is a type
 588                at least as wide as the field; so VALUE should have
 589                a mode that corresponds to that type.  */
 590             abort ();
 591         }
 592
 593       /* If this machine's insv insists on a register,
 594          get VALUE1 into a register.  */
 595       if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
 596              (value1, maxmode)))
 597         value1 = force_reg (maxmode, value1);
 598
 599       pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
 600       if (pat)
 601         emit_insn (pat);
 602       else
 603         {
 604           delete_insns_since (last);
 605           store_fixed_bit_field (op0, offset, bitsize, bitpos, value, align);
 606         }
 607     }
 608   else
 609     insv_loses:
 610 #endif
 611     /* Insv is not available; store using shifts and boolean ops.  */
 612     store_fixed_bit_field (op0, offset, bitsize, bitpos, value, align);
 613   return value;
 614 }
 615 \f
 616 /* Use shifts and boolean operations to store VALUE
 617    into a bit field of width BITSIZE
 618    in a memory location specified by OP0 except offset by OFFSET bytes.
 619      (OFFSET must be 0 if OP0 is a register.)
 620    The field starts at position BITPOS within the byte.
 621     (If OP0 is a register, it may be a full word or a narrower mode,
 622      but BITPOS still counts within a full word,
 623      which is significant on bigendian machines.)
 624    STRUCT_ALIGN is the alignment the structure is known to have.
 625
 626    Note that protect_from_queue has already been done on OP0 and VALUE.  */
 627
 628 static void
 629 store_fixed_bit_field (op0, offset, bitsize, bitpos, value, struct_align)
 630      register rtx op0;
 631      unsigned HOST_WIDE_INT offset, bitsize, bitpos;
 632      register rtx value;
 633      unsigned int struct_align;
 634 {
 635   register enum machine_mode mode;
 636   unsigned int total_bits = BITS_PER_WORD;
 637   rtx subtarget, temp;
 638   int all_zero = 0;
 639   int all_one = 0;
 640
 641   if (! SLOW_UNALIGNED_ACCESS (word_mode, struct_align))
 642     struct_align = BIGGEST_ALIGNMENT;
 643
 644   /* There is a case not handled here:
 645      a structure with a known alignment of just a halfword
 646      and a field split across two aligned halfwords within the structure.
 647      Or likewise a structure with a known alignment of just a byte
 648      and a field split across two bytes.
 649      Such cases are not supposed to be able to occur.  */
 650
 651   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 652     {
 653       if (offset != 0)
 654         abort ();
 655       /* Special treatment for a bit field split across two registers.  */
 656       if (bitsize + bitpos > BITS_PER_WORD)
 657         {
 658           store_split_bit_field (op0, bitsize, bitpos,
 659                                  value, BITS_PER_WORD);
 660           return;
 661         }
 662     }
 663   else
 664     {
 665       /* Get the proper mode to use for this field.  We want a mode that
 666          includes the entire field.  If such a mode would be larger than
 667          a word, we won't be doing the extraction the normal way.  */
 668
 669       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 670                             struct_align, word_mode,
 671                             GET_CODE (op0) == MEM && MEM_VOLATILE_P (op0));
 672
 673       if (mode == VOIDmode)
 674         {
 675           /* The only way this should occur is if the field spans word
 676              boundaries.  */
 677           store_split_bit_field (op0,
 678                                  bitsize, bitpos + offset * BITS_PER_UNIT,
 679                                  value, struct_align);
 680           return;
 681         }
 682
 683       total_bits = GET_MODE_BITSIZE (mode);
 684
 685       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 686          be in the range 0 to total_bits-1, and put any excess bytes in
 687          OFFSET.  */
 688       if (bitpos >= total_bits)
 689         {
 690           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 691           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 692                      * BITS_PER_UNIT);
 693         }
 694
 695       /* Get ref to an aligned byte, halfword, or word containing the field.
 696          Adjust BITPOS to be position within a word,
 697          and OFFSET to be the offset of that word.
 698          Then alter OP0 to refer to that word.  */
 699       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 700       offset -= (offset % (total_bits / BITS_PER_UNIT));
 701       op0 = change_address (op0, mode,
 702                             plus_constant (XEXP (op0, 0), offset));
 703     }
 704
 705   mode = GET_MODE (op0);
 706
 707   /* Now MODE is either some integral mode for a MEM as OP0,
 708      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 709      The bit field is contained entirely within OP0.
 710      BITPOS is the starting bit number within OP0.
 711      (OP0's mode may actually be narrower than MODE.)  */
 712
 713   if (BYTES_BIG_ENDIAN)
 714       /* BITPOS is the distance between our msb
 715          and that of the containing datum.
 716          Convert it to the distance from the lsb.  */
 717       bitpos = total_bits - bitsize - bitpos;
 718
 719   /* Now BITPOS is always the distance between our lsb
 720      and that of OP0.  */
 721
 722   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 723      we must first convert its mode to MODE.  */
 724
 725   if (GET_CODE (value) == CONST_INT)
 726     {
 727       register HOST_WIDE_INT v = INTVAL (value);
 728
 729       if (bitsize < HOST_BITS_PER_WIDE_INT)
 730         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 731
 732       if (v == 0)
 733         all_zero = 1;
 734       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 735                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 736                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 737         all_one = 1;
 738
 739       value = lshift_value (mode, value, bitpos, bitsize);
 740     }
 741   else
 742     {
 743       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 744                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 745
 746       if (GET_MODE (value) != mode)
 747         {
 748           if ((GET_CODE (value) == REG || GET_CODE (value) == SUBREG)
 749               && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
 750             value = gen_lowpart (mode, value);
 751           else
 752             value = convert_to_mode (mode, value, 1);
 753         }
 754
 755       if (must_and)
 756         value = expand_binop (mode, and_optab, value,
 757                               mask_rtx (mode, 0, bitsize, 0),
 758                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
 759       if (bitpos > 0)
 760         value = expand_shift (LSHIFT_EXPR, mode, value,
 761                               build_int_2 (bitpos, 0), NULL_RTX, 1);
 762     }
 763
 764   /* Now clear the chosen bits in OP0,
 765      except that if VALUE is -1 we need not bother.  */
 766
 767   subtarget = (GET_CODE (op0) == REG || ! flag_force_mem) ? op0 : 0;
 768
 769   if (! all_one)
 770     {
 771       temp = expand_binop (mode, and_optab, op0,
 772                            mask_rtx (mode, bitpos, bitsize, 1),
 773                            subtarget, 1, OPTAB_LIB_WIDEN);
 774       subtarget = temp;
 775     }
 776   else
 777     temp = op0;
 778
 779   /* Now logical-or VALUE into OP0, unless it is zero.  */
 780
 781   if (! all_zero)
 782     temp = expand_binop (mode, ior_optab, temp, value,
 783                          subtarget, 1, OPTAB_LIB_WIDEN);
 784   if (op0 != temp)
 785     emit_move_insn (op0, temp);
 786 }
 787 \f
 788 /* Store a bit field that is split across multiple accessible memory objects.
 789
 790    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
 791    BITSIZE is the field width; BITPOS the position of its first bit
 792    (within the word).
 793    VALUE is the value to store.
 794    ALIGN is the known alignment of OP0.
 795    This is also the size of the memory objects to be used.
 796
 797    This does not yet handle fields wider than BITS_PER_WORD.  */
 798
 799 static void
 800 store_split_bit_field (op0, bitsize, bitpos, value, align)
 801      rtx op0;
 802      unsigned HOST_WIDE_INT bitsize, bitpos;
 803      rtx value;
 804      unsigned int align;
 805 {
 806   unsigned int unit;
 807   unsigned int bitsdone = 0;
 808
 809   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
 810      much at a time.  */
 811   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 812     unit = BITS_PER_WORD;
 813   else
 814     unit = MIN (align, BITS_PER_WORD);
 815
 816   /* If VALUE is a constant other than a CONST_INT, get it into a register in
 817      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
 818      that VALUE might be a floating-point constant.  */
 819   if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
 820     {
 821       rtx word = gen_lowpart_common (word_mode, value);
 822
 823       if (word && (value != word))
 824         value = word;
 825       else
 826         value = gen_lowpart_common (word_mode,
 827                                     force_reg (GET_MODE (value) != VOIDmode
 828                                                ? GET_MODE (value)
 829                                                : word_mode, value));
 830     }
 831   else if (GET_CODE (value) == ADDRESSOF)
 832     value = copy_to_reg (value);
 833
 834   while (bitsdone < bitsize)
 835     {
 836       unsigned HOST_WIDE_INT thissize;
 837       rtx part, word;
 838       unsigned HOST_WIDE_INT thispos;
 839       unsigned HOST_WIDE_INT offset;
 840
 841       offset = (bitpos + bitsdone) / unit;
 842       thispos = (bitpos + bitsdone) % unit;
 843
 844       /* THISSIZE must not overrun a word boundary.  Otherwise,
 845          store_fixed_bit_field will call us again, and we will mutually
 846          recurse forever.  */
 847       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
 848       thissize = MIN (thissize, unit - thispos);
 849
 850       if (BYTES_BIG_ENDIAN)
 851         {
 852           int total_bits;
 853
 854           /* We must do an endian conversion exactly the same way as it is
 855              done in extract_bit_field, so that the two calls to
 856              extract_fixed_bit_field will have comparable arguments.  */
 857           if (GET_CODE (value) != MEM || GET_MODE (value) == BLKmode)
 858             total_bits = BITS_PER_WORD;
 859           else
 860             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
 861
 862           /* Fetch successively less significant portions.  */
 863           if (GET_CODE (value) == CONST_INT)
 864             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 865                              >> (bitsize - bitsdone - thissize))
 866                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
 867           else
 868             /* The args are chosen so that the last part includes the
 869                lsb.  Give extract_bit_field the value it needs (with
 870                endianness compensation) to fetch the piece we want.
 871
 872                ??? We have no idea what the alignment of VALUE is, so
 873                we have to use a guess.  */
 874             part
 875               = extract_fixed_bit_field
 876                 (word_mode, value, 0, thissize,
 877                  total_bits - bitsize + bitsdone, NULL_RTX, 1,
 878                  GET_MODE (value) == VOIDmode
 879                  ? UNITS_PER_WORD
 880                  : (GET_MODE (value) == BLKmode
 881                     ? 1 : GET_MODE_ALIGNMENT (GET_MODE (value))));
 882         }
 883       else
 884         {
 885           /* Fetch successively more significant portions.  */
 886           if (GET_CODE (value) == CONST_INT)
 887             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 888                              >> bitsdone)
 889                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
 890           else
 891             part
 892               = extract_fixed_bit_field
 893                 (word_mode, value, 0, thissize, bitsdone, NULL_RTX, 1,
 894                  GET_MODE (value) == VOIDmode
 895                  ? UNITS_PER_WORD
 896                  : (GET_MODE (value) == BLKmode
 897                     ? 1 : GET_MODE_ALIGNMENT (GET_MODE (value))));
 898         }
 899
 900       /* If OP0 is a register, then handle OFFSET here.
 901
 902          When handling multiword bitfields, extract_bit_field may pass
 903          down a word_mode SUBREG of a larger REG for a bitfield that actually
 904          crosses a word boundary.  Thus, for a SUBREG, we must find
 905          the current word starting from the base register.  */
 906       if (GET_CODE (op0) == SUBREG)
 907         {
 908           word = operand_subword_force (SUBREG_REG (op0),
 909                                         SUBREG_WORD (op0) + offset,
 910                                         GET_MODE (SUBREG_REG (op0)));
 911           offset = 0;
 912         }
 913       else if (GET_CODE (op0) == REG)
 914         {
 915           word = operand_subword_force (op0, offset, GET_MODE (op0));
 916           offset = 0;
 917         }
 918       else
 919         word = op0;
 920
 921       /* OFFSET is in UNITs, and UNIT is in bits.
 922          store_fixed_bit_field wants offset in bytes.  */
 923       store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT,
 924                              thissize, thispos, part, align);
 925       bitsdone += thissize;
 926     }
 927 }
 928 \f
 929 /* Generate code to extract a byte-field from STR_RTX
 930    containing BITSIZE bits, starting at BITNUM,
 931    and put it in TARGET if possible (if TARGET is nonzero).
 932    Regardless of TARGET, we return the rtx for where the value is placed.
 933    It may be a QUEUED.
 934
 935    STR_RTX is the structure containing the byte (a REG or MEM).
 936    UNSIGNEDP is nonzero if this is an unsigned bit field.
 937    MODE is the natural mode of the field value once extracted.
 938    TMODE is the mode the caller would like the value to have;
 939    but the value may be returned with type MODE instead.
 940
 941    ALIGN is the alignment that STR_RTX is known to have.
 942    TOTAL_SIZE is the size in bytes of the containing structure,
 943    or -1 if varying.
 944
 945    If a TARGET is specified and we can store in it at no extra cost,
 946    we do so, and return TARGET.
 947    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
 948    if they are equally easy.  */
 949
 950 rtx
 951 extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
 952                    target, mode, tmode, align, total_size)
 953      rtx str_rtx;
 954      unsigned HOST_WIDE_INT bitsize;
 955      unsigned HOST_WIDE_INT bitnum;
 956      int unsignedp;
 957      rtx target;
 958      enum machine_mode mode, tmode;
 959      unsigned int align;
 960      HOST_WIDE_INT total_size;
 961 {
 962   unsigned int unit
 963     = (GET_CODE (str_rtx) == MEM) ? BITS_PER_UNIT : BITS_PER_WORD;
 964   unsigned HOST_WIDE_INT offset = bitnum / unit;
 965   unsigned HOST_WIDE_INT bitpos = bitnum % unit;
 966   register rtx op0 = str_rtx;
 967   rtx spec_target = target;
 968   rtx spec_target_subreg = 0;
 969   enum machine_mode int_mode;
 970 #ifdef HAVE_extv
 971   unsigned HOST_WIDE_INT extv_bitsize;
 972   enum machine_mode extv_mode;
 973 #endif
 974 #ifdef HAVE_extzv
 975   unsigned HOST_WIDE_INT extzv_bitsize;
 976   enum machine_mode extzv_mode;
 977 #endif
 978
 979 #ifdef HAVE_extv
 980   extv_mode = insn_data[(int) CODE_FOR_extv].operand[0].mode;
 981   if (extv_mode == VOIDmode)
 982     extv_mode = word_mode;
 983   extv_bitsize = GET_MODE_BITSIZE (extv_mode);
 984 #endif
 985
 986 #ifdef HAVE_extzv
 987   extzv_mode = insn_data[(int) CODE_FOR_extzv].operand[0].mode;
 988   if (extzv_mode == VOIDmode)
 989     extzv_mode = word_mode;
 990   extzv_bitsize = GET_MODE_BITSIZE (extzv_mode);
 991 #endif
 992
 993   /* Discount the part of the structure before the desired byte.
 994      We need to know how many bytes are safe to reference after it.  */
 995   if (total_size >= 0)
 996     total_size -= (bitpos / BIGGEST_ALIGNMENT
 997                    * (BIGGEST_ALIGNMENT / BITS_PER_UNIT));
 998
 999   if (tmode == VOIDmode)
1000     tmode = mode;
1001   while (GET_CODE (op0) == SUBREG)
1002     {
1003       int outer_size = GET_MODE_BITSIZE (GET_MODE (op0));
1004       int inner_size = GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op0)));
1005
1006       offset += SUBREG_WORD (op0);
1007
1008       inner_size = MIN (inner_size, BITS_PER_WORD);
1009
1010       if (BYTES_BIG_ENDIAN && (outer_size < inner_size))
1011         {
1012           bitpos += inner_size - outer_size;
1013           if (bitpos > unit)
1014             {
1015               offset += (bitpos / unit);
1016               bitpos %= unit;
1017             }
1018         }
1019
1020       op0 = SUBREG_REG (op0);
1021     }
1022
1023   /* Make sure we are playing with integral modes.  Pun with subregs
1024      if we aren't.  */
1025   {
1026     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1027     if (imode != GET_MODE (op0))
1028       {
1029         if (GET_CODE (op0) == MEM)
1030           op0 = change_address (op0, imode, NULL_RTX);
1031         else if (imode != BLKmode)
1032           op0 = gen_lowpart (imode, op0);
1033         else
1034           abort ();
1035       }
1036   }
1037
1038   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1039      If that's wrong, the solution is to test for it and set TARGET to 0
1040      if needed.  */
1041
1042   /* If OP0 is a register, BITPOS must count within a word.
1043      But as we have it, it counts within whatever size OP0 now has.
1044      On a bigendian machine, these are not the same, so convert.  */
1045   if (BYTES_BIG_ENDIAN
1046       && GET_CODE (op0) != MEM
1047       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1048     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1049
1050   /* Extracting a full-word or multi-word value
1051      from a structure in a register or aligned memory.
1052      This can be done with just SUBREG.
1053      So too extracting a subword value in
1054      the least significant part of the register.  */
1055
1056   if (((GET_CODE (op0) != MEM
1057         && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
1058                                   GET_MODE_BITSIZE (GET_MODE (op0))))
1059        || (GET_CODE (op0) == MEM
1060            && (! SLOW_UNALIGNED_ACCESS (mode, align)
1061                || (offset * BITS_PER_UNIT % bitsize == 0
1062                    && align % bitsize == 0))))
1063       && ((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1064            && bitpos % BITS_PER_WORD == 0)
1065           || (mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0) != BLKmode
1066               /* ??? The big endian test here is wrong.  This is correct
1067                  if the value is in a register, and if mode_for_size is not
1068                  the same mode as op0.  This causes us to get unnecessarily
1069                  inefficient code from the Thumb port when -mbig-endian.  */
1070               && (BYTES_BIG_ENDIAN
1071                   ? bitpos + bitsize == BITS_PER_WORD
1072                   : bitpos == 0))))
1073     {
1074       enum machine_mode mode1
1075         = mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0);
1076
1077       if (mode1 != GET_MODE (op0))
1078         {
1079           if (GET_CODE (op0) == SUBREG)
1080             {
1081               if (GET_MODE (SUBREG_REG (op0)) == mode1
1082                   || GET_MODE_CLASS (mode1) == MODE_INT
1083                   || GET_MODE_CLASS (mode1) == MODE_PARTIAL_INT)
1084                 op0 = SUBREG_REG (op0);
1085               else
1086                 /* Else we've got some float mode source being extracted into
1087                    a different float mode destination -- this combination of
1088                    subregs results in Severe Tire Damage.  */
1089                 abort ();
1090             }
1091           if (GET_CODE (op0) == REG)
1092             op0 = gen_rtx_SUBREG (mode1, op0, offset);
1093           else
1094             op0 = change_address (op0, mode1,
1095                                   plus_constant (XEXP (op0, 0), offset));
1096         }
1097       if (mode1 != mode)
1098         return convert_to_mode (tmode, op0, unsignedp);
1099       return op0;
1100     }
1101
1102   /* Handle fields bigger than a word.  */
1103
1104   if (bitsize > BITS_PER_WORD)
1105     {
1106       /* Here we transfer the words of the field
1107          in the order least significant first.
1108          This is because the most significant word is the one which may
1109          be less than full.  */
1110
1111       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1112       unsigned int i;
1113
1114       if (target == 0 || GET_CODE (target) != REG)
1115         target = gen_reg_rtx (mode);
1116
1117       /* Indicate for flow that the entire target reg is being set.  */
1118       emit_insn (gen_rtx_CLOBBER (VOIDmode, target));
1119
1120       for (i = 0; i < nwords; i++)
1121         {
1122           /* If I is 0, use the low-order word in both field and target;
1123              if I is 1, use the next to lowest word; and so on.  */
1124           /* Word number in TARGET to use.  */
1125           unsigned int wordnum
1126             = (WORDS_BIG_ENDIAN
1127                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1128                : i);
1129           /* Offset from start of field in OP0.  */
1130           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1131                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1132                                                 * (int) BITS_PER_WORD))
1133                                      : (int) i * BITS_PER_WORD);
1134           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1135           rtx result_part
1136             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1137                                            bitsize - i * BITS_PER_WORD),
1138                                  bitnum + bit_offset, 1, target_part, mode,
1139                                  word_mode, align, total_size);
1140
1141           if (target_part == 0)
1142             abort ();
1143
1144           if (result_part != target_part)
1145             emit_move_insn (target_part, result_part);
1146         }
1147
1148       if (unsignedp)
1149         {
1150           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1151              need to be zero'd out.  */
1152           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1153             {
1154               unsigned int i, total_words;
1155
1156               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1157               for (i = nwords; i < total_words; i++)
1158                 {
1159                   int wordnum = WORDS_BIG_ENDIAN ? total_words - i - 1 : i;
1160                   rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1161                   emit_move_insn (target_part, const0_rtx);
1162                 }
1163             }
1164           return target;
1165         }
1166
1167       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1168       target = expand_shift (LSHIFT_EXPR, mode, target,
1169                              build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1170                              NULL_RTX, 0);
1171       return expand_shift (RSHIFT_EXPR, mode, target,
1172                            build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1173                            NULL_RTX, 0);
1174     }
1175
1176   /* From here on we know the desired field is smaller than a word.  */
1177
1178   /* Check if there is a correspondingly-sized integer field, so we can
1179      safely extract it as one size of integer, if necessary; then
1180      truncate or extend to the size that is wanted; then use SUBREGs or
1181      convert_to_mode to get one of the modes we really wanted.  */
1182
1183   int_mode = int_mode_for_mode (tmode);
1184   if (int_mode == BLKmode)
1185     int_mode = int_mode_for_mode (mode);
1186   if (int_mode == BLKmode)
1187     abort();    /* Should probably push op0 out to memory and then
1188                    do a load.  */
1189
1190   /* OFFSET is the number of words or bytes (UNIT says which)
1191      from STR_RTX to the first word or byte containing part of the field.  */
1192
1193   if (GET_CODE (op0) != MEM)
1194     {
1195       if (offset != 0
1196           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1197         {
1198           if (GET_CODE (op0) != REG)
1199             op0 = copy_to_reg (op0);
1200           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1201                                 op0, offset);
1202         }
1203       offset = 0;
1204     }
1205   else
1206     {
1207       op0 = protect_from_queue (str_rtx, 1);
1208     }
1209
1210   /* Now OFFSET is nonzero only for memory operands.  */
1211
1212   if (unsignedp)
1213     {
1214 #ifdef HAVE_extzv
1215       if (HAVE_extzv
1216           && (extzv_bitsize >= bitsize)
1217           && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1218                 && (bitsize + bitpos > extzv_bitsize)))
1219         {
1220           unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1221           rtx bitsize_rtx, bitpos_rtx;
1222           rtx last = get_last_insn ();
1223           rtx xop0 = op0;
1224           rtx xtarget = target;
1225           rtx xspec_target = spec_target;
1226           rtx xspec_target_subreg = spec_target_subreg;
1227           rtx pat;
1228           enum machine_mode maxmode;
1229
1230           maxmode = insn_data[(int) CODE_FOR_extzv].operand[0].mode;
1231           if (maxmode == VOIDmode)
1232             maxmode = word_mode;
1233
1234           if (GET_CODE (xop0) == MEM)
1235             {
1236               int save_volatile_ok = volatile_ok;
1237               volatile_ok = 1;
1238
1239               /* Is the memory operand acceptable?  */
1240               if (! ((*insn_data[(int) CODE_FOR_extzv].operand[1].predicate)
1241                      (xop0, GET_MODE (xop0))))
1242                 {
1243                   /* No, load into a reg and extract from there.  */
1244                   enum machine_mode bestmode;
1245
1246                   /* Get the mode to use for inserting into this field.  If
1247                      OP0 is BLKmode, get the smallest mode consistent with the
1248                      alignment. If OP0 is a non-BLKmode object that is no
1249                      wider than MAXMODE, use its mode. Otherwise, use the
1250                      smallest mode containing the field.  */
1251
1252                   if (GET_MODE (xop0) == BLKmode
1253                       || (GET_MODE_SIZE (GET_MODE (op0))
1254                           > GET_MODE_SIZE (maxmode)))
1255                     bestmode = get_best_mode (bitsize, bitnum, align, maxmode,
1256                                               MEM_VOLATILE_P (xop0));
1257                   else
1258                     bestmode = GET_MODE (xop0);
1259
1260                   if (bestmode == VOIDmode
1261                       || (SLOW_UNALIGNED_ACCESS (bestmode, align)
1262                           && GET_MODE_BITSIZE (bestmode) > align))
1263                     goto extzv_loses;
1264
1265                   /* Compute offset as multiple of this unit,
1266                      counting in bytes.  */
1267                   unit = GET_MODE_BITSIZE (bestmode);
1268                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1269                   xbitpos = bitnum % unit;
1270                   xop0 = change_address (xop0, bestmode,
1271                                          plus_constant (XEXP (xop0, 0),
1272                                                         xoffset));
1273                   /* Fetch it to a register in that size.  */
1274                   xop0 = force_reg (bestmode, xop0);
1275
1276                   /* XBITPOS counts within UNIT, which is what is expected.  */
1277                 }
1278               else
1279                 /* Get ref to first byte containing part of the field.  */
1280                 xop0 = change_address (xop0, byte_mode,
1281                                        plus_constant (XEXP (xop0, 0), xoffset));
1282
1283               volatile_ok = save_volatile_ok;
1284             }
1285
1286           /* If op0 is a register, we need it in MAXMODE (which is usually
1287              SImode). to make it acceptable to the format of extzv.  */
1288           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1289             goto extzv_loses;
1290           if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
1291             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1292
1293           /* On big-endian machines, we count bits from the most significant.
1294              If the bit field insn does not, we must invert.  */
1295           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1296             xbitpos = unit - bitsize - xbitpos;
1297
1298           /* Now convert from counting within UNIT to counting in MAXMODE.  */
1299           if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
1300             xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
1301
1302           unit = GET_MODE_BITSIZE (maxmode);
1303
1304           if (xtarget == 0
1305               || (flag_force_mem && GET_CODE (xtarget) == MEM))
1306             xtarget = xspec_target = gen_reg_rtx (tmode);
1307
1308           if (GET_MODE (xtarget) != maxmode)
1309             {
1310               if (GET_CODE (xtarget) == REG)
1311                 {
1312                   int wider = (GET_MODE_SIZE (maxmode)
1313                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1314                   xtarget = gen_lowpart (maxmode, xtarget);
1315                   if (wider)
1316                     xspec_target_subreg = xtarget;
1317                 }
1318               else
1319                 xtarget = gen_reg_rtx (maxmode);
1320             }
1321
1322           /* If this machine's extzv insists on a register target,
1323              make sure we have one.  */
1324           if (! ((*insn_data[(int) CODE_FOR_extzv].operand[0].predicate)
1325                  (xtarget, maxmode)))
1326             xtarget = gen_reg_rtx (maxmode);
1327
1328           bitsize_rtx = GEN_INT (bitsize);
1329           bitpos_rtx = GEN_INT (xbitpos);
1330
1331           pat = gen_extzv (protect_from_queue (xtarget, 1),
1332                            xop0, bitsize_rtx, bitpos_rtx);
1333           if (pat)
1334             {
1335               emit_insn (pat);
1336               target = xtarget;
1337               spec_target = xspec_target;
1338               spec_target_subreg = xspec_target_subreg;
1339             }
1340           else
1341             {
1342               delete_insns_since (last);
1343               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1344                                                 bitpos, target, 1, align);
1345             }
1346         }
1347       else
1348         extzv_loses:
1349 #endif
1350       target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1351                                         bitpos, target, 1, align);
1352     }
1353   else
1354     {
1355 #ifdef HAVE_extv
1356       if (HAVE_extv
1357           && (extv_bitsize >= bitsize)
1358           && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1359                 && (bitsize + bitpos > extv_bitsize)))
1360         {
1361           int xbitpos = bitpos, xoffset = offset;
1362           rtx bitsize_rtx, bitpos_rtx;
1363           rtx last = get_last_insn ();
1364           rtx xop0 = op0, xtarget = target;
1365           rtx xspec_target = spec_target;
1366           rtx xspec_target_subreg = spec_target_subreg;
1367           rtx pat;
1368           enum machine_mode maxmode;
1369
1370           maxmode = insn_data[(int) CODE_FOR_extv].operand[0].mode;
1371           if (maxmode == VOIDmode)
1372             maxmode = word_mode;
1373
1374           if (GET_CODE (xop0) == MEM)
1375             {
1376               /* Is the memory operand acceptable?  */
1377               if (! ((*insn_data[(int) CODE_FOR_extv].operand[1].predicate)
1378                      (xop0, GET_MODE (xop0))))
1379                 {
1380                   /* No, load into a reg and extract from there.  */
1381                   enum machine_mode bestmode;
1382
1383                   /* Get the mode to use for inserting into this field.  If
1384                      OP0 is BLKmode, get the smallest mode consistent with the
1385                      alignment. If OP0 is a non-BLKmode object that is no
1386                      wider than MAXMODE, use its mode. Otherwise, use the
1387                      smallest mode containing the field.  */
1388
1389                   if (GET_MODE (xop0) == BLKmode
1390                       || (GET_MODE_SIZE (GET_MODE (op0))
1391                           > GET_MODE_SIZE (maxmode)))
1392                     bestmode = get_best_mode (bitsize, bitnum, align, maxmode,
1393                                               MEM_VOLATILE_P (xop0));
1394                   else
1395                     bestmode = GET_MODE (xop0);
1396
1397                   if (bestmode == VOIDmode
1398                       || (SLOW_UNALIGNED_ACCESS (bestmode, align)
1399                           && GET_MODE_BITSIZE (bestmode) > align))
1400                     goto extv_loses;
1401
1402                   /* Compute offset as multiple of this unit,
1403                      counting in bytes.  */
1404                   unit = GET_MODE_BITSIZE (bestmode);
1405                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1406                   xbitpos = bitnum % unit;
1407                   xop0 = change_address (xop0, bestmode,
1408                                          plus_constant (XEXP (xop0, 0),
1409                                                         xoffset));
1410                   /* Fetch it to a register in that size.  */
1411                   xop0 = force_reg (bestmode, xop0);
1412
1413                   /* XBITPOS counts within UNIT, which is what is expected.  */
1414                 }
1415               else
1416                 /* Get ref to first byte containing part of the field.  */
1417                 xop0 = change_address (xop0, byte_mode,
1418                                        plus_constant (XEXP (xop0, 0), xoffset));
1419             }
1420
1421           /* If op0 is a register, we need it in MAXMODE (which is usually
1422              SImode) to make it acceptable to the format of extv.  */
1423           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1424             goto extv_loses;
1425           if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
1426             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1427
1428           /* On big-endian machines, we count bits from the most significant.
1429              If the bit field insn does not, we must invert.  */
1430           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1431             xbitpos = unit - bitsize - xbitpos;
1432
1433           /* XBITPOS counts within a size of UNIT.
1434              Adjust to count within a size of MAXMODE.  */
1435           if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
1436             xbitpos += (GET_MODE_BITSIZE (maxmode) - unit);
1437
1438           unit = GET_MODE_BITSIZE (maxmode);
1439
1440           if (xtarget == 0
1441               || (flag_force_mem && GET_CODE (xtarget) == MEM))
1442             xtarget = xspec_target = gen_reg_rtx (tmode);
1443
1444           if (GET_MODE (xtarget) != maxmode)
1445             {
1446               if (GET_CODE (xtarget) == REG)
1447                 {
1448                   int wider = (GET_MODE_SIZE (maxmode)
1449                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1450                   xtarget = gen_lowpart (maxmode, xtarget);
1451                   if (wider)
1452                     xspec_target_subreg = xtarget;
1453                 }
1454               else
1455                 xtarget = gen_reg_rtx (maxmode);
1456             }
1457
1458           /* If this machine's extv insists on a register target,
1459              make sure we have one.  */
1460           if (! ((*insn_data[(int) CODE_FOR_extv].operand[0].predicate)
1461                  (xtarget, maxmode)))
1462             xtarget = gen_reg_rtx (maxmode);
1463
1464           bitsize_rtx = GEN_INT (bitsize);
1465           bitpos_rtx = GEN_INT (xbitpos);
1466
1467           pat = gen_extv (protect_from_queue (xtarget, 1),
1468                           xop0, bitsize_rtx, bitpos_rtx);
1469           if (pat)
1470             {
1471               emit_insn (pat);
1472               target = xtarget;
1473               spec_target = xspec_target;
1474               spec_target_subreg = xspec_target_subreg;
1475             }
1476           else
1477             {
1478               delete_insns_since (last);
1479               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1480                                                 bitpos, target, 0, align);
1481             }
1482         }
1483       else
1484         extv_loses:
1485 #endif
1486       target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1487                                         bitpos, target, 0, align);
1488     }
1489   if (target == spec_target)
1490     return target;
1491   if (target == spec_target_subreg)
1492     return spec_target;
1493   if (GET_MODE (target) != tmode && GET_MODE (target) != mode)
1494     {
1495       /* If the target mode is floating-point, first convert to the
1496          integer mode of that size and then access it as a floating-point
1497          value via a SUBREG.  */
1498       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1499         {
1500           target = convert_to_mode (mode_for_size (GET_MODE_BITSIZE (tmode),
1501                                                    MODE_INT, 0),
1502                                     target, unsignedp);
1503           if (GET_CODE (target) != REG)
1504             target = copy_to_reg (target);
1505           return gen_rtx_SUBREG (tmode, target, 0);
1506         }
1507       else
1508         return convert_to_mode (tmode, target, unsignedp);
1509     }
1510   return target;
1511 }
1512 \f
1513 /* Extract a bit field using shifts and boolean operations
1514    Returns an rtx to represent the value.
1515    OP0 addresses a register (word) or memory (byte).
1516    BITPOS says which bit within the word or byte the bit field starts in.
1517    OFFSET says how many bytes farther the bit field starts;
1518     it is 0 if OP0 is a register.
1519    BITSIZE says how many bits long the bit field is.
1520     (If OP0 is a register, it may be narrower than a full word,
1521      but BITPOS still counts within a full word,
1522      which is significant on bigendian machines.)
1523
1524    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1525    If TARGET is nonzero, attempts to store the value there
1526    and return TARGET, but this is not guaranteed.
1527    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.
1528
1529    ALIGN is the alignment that STR_RTX is known to have.  */
1530
1531 static rtx
1532 extract_fixed_bit_field (tmode, op0, offset, bitsize, bitpos,
1533                          target, unsignedp, align)
1534      enum machine_mode tmode;
1535      register rtx op0, target;
1536      unsigned HOST_WIDE_INT offset, bitsize, bitpos;
1537      int unsignedp;
1538      unsigned int align;
1539 {
1540   unsigned int total_bits = BITS_PER_WORD;
1541   enum machine_mode mode;
1542
1543   if (GET_CODE (op0) == SUBREG || GET_CODE (op0) == REG)
1544     {
1545       /* Special treatment for a bit field split across two registers.  */
1546       if (bitsize + bitpos > BITS_PER_WORD)
1547         return extract_split_bit_field (op0, bitsize, bitpos,
1548                                         unsignedp, align);
1549     }
1550   else
1551     {
1552       /* Get the proper mode to use for this field.  We want a mode that
1553          includes the entire field.  If such a mode would be larger than
1554          a word, we won't be doing the extraction the normal way.  */
1555
1556       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT, align,
1557                             word_mode,
1558                             GET_CODE (op0) == MEM && MEM_VOLATILE_P (op0));
1559
1560       if (mode == VOIDmode)
1561         /* The only way this should occur is if the field spans word
1562            boundaries.  */
1563         return extract_split_bit_field (op0, bitsize,
1564                                         bitpos + offset * BITS_PER_UNIT,
1565                                         unsignedp, align);
1566
1567       total_bits = GET_MODE_BITSIZE (mode);
1568
1569       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1570          be in the range 0 to total_bits-1, and put any excess bytes in
1571          OFFSET.  */
1572       if (bitpos >= total_bits)
1573         {
1574           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1575           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1576                      * BITS_PER_UNIT);
1577         }
1578
1579       /* Get ref to an aligned byte, halfword, or word containing the field.
1580          Adjust BITPOS to be position within a word,
1581          and OFFSET to be the offset of that word.
1582          Then alter OP0 to refer to that word.  */
1583       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1584       offset -= (offset % (total_bits / BITS_PER_UNIT));
1585       op0 = change_address (op0, mode,
1586                             plus_constant (XEXP (op0, 0), offset));
1587     }
1588
1589   mode = GET_MODE (op0);
1590
1591   if (BYTES_BIG_ENDIAN)
1592     {
1593       /* BITPOS is the distance between our msb and that of OP0.
1594          Convert it to the distance from the lsb.  */
1595
1596       bitpos = total_bits - bitsize - bitpos;
1597     }
1598
1599   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1600      We have reduced the big-endian case to the little-endian case.  */
1601
1602   if (unsignedp)
1603     {
1604       if (bitpos)
1605         {
1606           /* If the field does not already start at the lsb,
1607              shift it so it does.  */
1608           tree amount = build_int_2 (bitpos, 0);
1609           /* Maybe propagate the target for the shift.  */
1610           /* But not if we will return it--could confuse integrate.c.  */
1611           rtx subtarget = (target != 0 && GET_CODE (target) == REG
1612                            && !REG_FUNCTION_VALUE_P (target)
1613                            ? target : 0);
1614           if (tmode != mode) subtarget = 0;
1615           op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1616         }
1617       /* Convert the value to the desired mode.  */
1618       if (mode != tmode)
1619         op0 = convert_to_mode (tmode, op0, 1);
1620
1621       /* Unless the msb of the field used to be the msb when we shifted,
1622          mask out the upper bits.  */
1623
1624       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize
1625 #if 0
1626 #ifdef SLOW_ZERO_EXTEND
1627           /* Always generate an `and' if
1628              we just zero-extended op0 and SLOW_ZERO_EXTEND, since it
1629              will combine fruitfully with the zero-extend.  */
1630           || tmode != mode
1631 #endif
1632 #endif
1633           )
1634         return expand_binop (GET_MODE (op0), and_optab, op0,
1635                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1636                              target, 1, OPTAB_LIB_WIDEN);
1637       return op0;
1638     }
1639
1640   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1641      then arithmetic-shift its lsb to the lsb of the word.  */
1642   op0 = force_reg (mode, op0);
1643   if (mode != tmode)
1644     target = 0;
1645
1646   /* Find the narrowest integer mode that contains the field.  */
1647
1648   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1649        mode = GET_MODE_WIDER_MODE (mode))
1650     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1651       {
1652         op0 = convert_to_mode (mode, op0, 0);
1653         break;
1654       }
1655
1656   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1657     {
1658       tree amount = build_int_2 (GET_MODE_BITSIZE (mode) - (bitsize + bitpos), 0);
1659       /* Maybe propagate the target for the shift.  */
1660       /* But not if we will return the result--could confuse integrate.c.  */
1661       rtx subtarget = (target != 0 && GET_CODE (target) == REG
1662                        && ! REG_FUNCTION_VALUE_P (target)
1663                        ? target : 0);
1664       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1665     }
1666
1667   return expand_shift (RSHIFT_EXPR, mode, op0,
1668                        build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1669                        target, 0);
1670 }
1671 \f
1672 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1673    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1674    complement of that if COMPLEMENT.  The mask is truncated if
1675    necessary to the width of mode MODE.  The mask is zero-extended if
1676    BITSIZE+BITPOS is too small for MODE.  */
1677
1678 static rtx
1679 mask_rtx (mode, bitpos, bitsize, complement)
1680      enum machine_mode mode;
1681      int bitpos, bitsize, complement;
1682 {
1683   HOST_WIDE_INT masklow, maskhigh;
1684
1685   if (bitpos < HOST_BITS_PER_WIDE_INT)
1686     masklow = (HOST_WIDE_INT) -1 << bitpos;
1687   else
1688     masklow = 0;
1689
1690   if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1691     masklow &= ((unsigned HOST_WIDE_INT) -1
1692                 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1693
1694   if (bitpos <= HOST_BITS_PER_WIDE_INT)
1695     maskhigh = -1;
1696   else
1697     maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1698
1699   if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1700     maskhigh &= ((unsigned HOST_WIDE_INT) -1
1701                  >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1702   else
1703     maskhigh = 0;
1704
1705   if (complement)
1706     {
1707       maskhigh = ~maskhigh;
1708       masklow = ~masklow;
1709     }
1710
1711   return immed_double_const (masklow, maskhigh, mode);
1712 }
1713
1714 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1715    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1716
1717 static rtx
1718 lshift_value (mode, value, bitpos, bitsize)
1719      enum machine_mode mode;
1720      rtx value;
1721      int bitpos, bitsize;
1722 {
1723   unsigned HOST_WIDE_INT v = INTVAL (value);
1724   HOST_WIDE_INT low, high;
1725
1726   if (bitsize < HOST_BITS_PER_WIDE_INT)
1727     v &= ~((HOST_WIDE_INT) -1 << bitsize);
1728
1729   if (bitpos < HOST_BITS_PER_WIDE_INT)
1730     {
1731       low = v << bitpos;
1732       high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1733     }
1734   else
1735     {
1736       low = 0;
1737       high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1738     }
1739
1740   return immed_double_const (low, high, mode);
1741 }
1742 \f
1743 /* Extract a bit field that is split across two words
1744    and return an RTX for the result.
1745
1746    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1747    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1748    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
1749
1750    ALIGN is the known alignment of OP0.  This is also the size of the
1751    memory objects to be used.  */
1752
1753 static rtx
1754 extract_split_bit_field (op0, bitsize, bitpos, unsignedp, align)
1755      rtx op0;
1756      unsigned HOST_WIDE_INT bitsize, bitpos;
1757      int unsignedp;
1758      unsigned int align;
1759 {
1760   unsigned int unit;
1761   unsigned int bitsdone = 0;
1762   rtx result = NULL_RTX;
1763   int first = 1;
1764
1765   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1766      much at a time.  */
1767   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1768     unit = BITS_PER_WORD;
1769   else
1770     unit = MIN (align, BITS_PER_WORD);
1771
1772   while (bitsdone < bitsize)
1773     {
1774       unsigned HOST_WIDE_INT thissize;
1775       rtx part, word;
1776       unsigned HOST_WIDE_INT thispos;
1777       unsigned HOST_WIDE_INT offset;
1778
1779       offset = (bitpos + bitsdone) / unit;
1780       thispos = (bitpos + bitsdone) % unit;
1781
1782       /* THISSIZE must not overrun a word boundary.  Otherwise,
1783          extract_fixed_bit_field will call us again, and we will mutually
1784          recurse forever.  */
1785       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1786       thissize = MIN (thissize, unit - thispos);
1787
1788       /* If OP0 is a register, then handle OFFSET here.
1789
1790          When handling multiword bitfields, extract_bit_field may pass
1791          down a word_mode SUBREG of a larger REG for a bitfield that actually
1792          crosses a word boundary.  Thus, for a SUBREG, we must find
1793          the current word starting from the base register.  */
1794       if (GET_CODE (op0) == SUBREG)
1795         {
1796           word = operand_subword_force (SUBREG_REG (op0),
1797                                         SUBREG_WORD (op0) + offset,
1798                                         GET_MODE (SUBREG_REG (op0)));
1799           offset = 0;
1800         }
1801       else if (GET_CODE (op0) == REG)
1802         {
1803           word = operand_subword_force (op0, offset, GET_MODE (op0));
1804           offset = 0;
1805         }
1806       else
1807         word = op0;
1808
1809       /* Extract the parts in bit-counting order,
1810          whose meaning is determined by BYTES_PER_UNIT.
1811          OFFSET is in UNITs, and UNIT is in bits.
1812          extract_fixed_bit_field wants offset in bytes.  */
1813       part = extract_fixed_bit_field (word_mode, word,
1814                                       offset * unit / BITS_PER_UNIT,
1815                                       thissize, thispos, 0, 1, align);
1816       bitsdone += thissize;
1817
1818       /* Shift this part into place for the result.  */
1819       if (BYTES_BIG_ENDIAN)
1820         {
1821           if (bitsize != bitsdone)
1822             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1823                                  build_int_2 (bitsize - bitsdone, 0), 0, 1);
1824         }
1825       else
1826         {
1827           if (bitsdone != thissize)
1828             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1829                                  build_int_2 (bitsdone - thissize, 0), 0, 1);
1830         }
1831
1832       if (first)
1833         result = part;
1834       else
1835         /* Combine the parts with bitwise or.  This works
1836            because we extracted each part as an unsigned bit field.  */
1837         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1838                                OPTAB_LIB_WIDEN);
1839
1840       first = 0;
1841     }
1842
1843   /* Unsigned bit field: we are done.  */
1844   if (unsignedp)
1845     return result;
1846   /* Signed bit field: sign-extend with two arithmetic shifts.  */
1847   result = expand_shift (LSHIFT_EXPR, word_mode, result,
1848                          build_int_2 (BITS_PER_WORD - bitsize, 0),
1849                          NULL_RTX, 0);
1850   return expand_shift (RSHIFT_EXPR, word_mode, result,
1851                        build_int_2 (BITS_PER_WORD - bitsize, 0), NULL_RTX, 0);
1852 }
1853 \f
1854 /* Add INC into TARGET.  */
1855
1856 void
1857 expand_inc (target, inc)
1858      rtx target, inc;
1859 {
1860   rtx value = expand_binop (GET_MODE (target), add_optab,
1861                             target, inc,
1862                             target, 0, OPTAB_LIB_WIDEN);
1863   if (value != target)
1864     emit_move_insn (target, value);
1865 }
1866
1867 /* Subtract DEC from TARGET.  */
1868
1869 void
1870 expand_dec (target, dec)
1871      rtx target, dec;
1872 {
1873   rtx value = expand_binop (GET_MODE (target), sub_optab,
1874                             target, dec,
1875                             target, 0, OPTAB_LIB_WIDEN);
1876   if (value != target)
1877     emit_move_insn (target, value);
1878 }
1879 \f
1880 /* Output a shift instruction for expression code CODE,
1881    with SHIFTED being the rtx for the value to shift,
1882    and AMOUNT the tree for the amount to shift by.
1883    Store the result in the rtx TARGET, if that is convenient.
1884    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
1885    Return the rtx for where the value is.  */
1886
1887 rtx
1888 expand_shift (code, mode, shifted, amount, target, unsignedp)
1889      enum tree_code code;
1890      register enum machine_mode mode;
1891      rtx shifted;
1892      tree amount;
1893      register rtx target;
1894      int unsignedp;
1895 {
1896   register rtx op1, temp = 0;
1897   register int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
1898   register int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
1899   int try;
1900
1901   /* Previously detected shift-counts computed by NEGATE_EXPR
1902      and shifted in the other direction; but that does not work
1903      on all machines.  */
1904
1905   op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0);
1906
1907 #ifdef SHIFT_COUNT_TRUNCATED
1908   if (SHIFT_COUNT_TRUNCATED)
1909     {
1910       if (GET_CODE (op1) == CONST_INT
1911           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
1912               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
1913         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
1914                        % GET_MODE_BITSIZE (mode));
1915       else if (GET_CODE (op1) == SUBREG
1916                && SUBREG_WORD (op1) == 0)
1917         op1 = SUBREG_REG (op1);
1918     }
1919 #endif
1920
1921   if (op1 == const0_rtx)
1922     return shifted;
1923
1924   for (try = 0; temp == 0 && try < 3; try++)
1925     {
1926       enum optab_methods methods;
1927
1928       if (try == 0)
1929         methods = OPTAB_DIRECT;
1930       else if (try == 1)
1931         methods = OPTAB_WIDEN;
1932       else
1933         methods = OPTAB_LIB_WIDEN;
1934
1935       if (rotate)
1936         {
1937           /* Widening does not work for rotation.  */
1938           if (methods == OPTAB_WIDEN)
1939             continue;
1940           else if (methods == OPTAB_LIB_WIDEN)
1941             {
1942               /* If we have been unable to open-code this by a rotation,
1943                  do it as the IOR of two shifts.  I.e., to rotate A
1944                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
1945                  where C is the bitsize of A.
1946
1947                  It is theoretically possible that the target machine might
1948                  not be able to perform either shift and hence we would
1949                  be making two libcalls rather than just the one for the
1950                  shift (similarly if IOR could not be done).  We will allow
1951                  this extremely unlikely lossage to avoid complicating the
1952                  code below.  */
1953
1954               rtx subtarget = target == shifted ? 0 : target;
1955               rtx temp1;
1956               tree type = TREE_TYPE (amount);
1957               tree new_amount = make_tree (type, op1);
1958               tree other_amount
1959                 = fold (build (MINUS_EXPR, type,
1960                                convert (type,
1961                                         build_int_2 (GET_MODE_BITSIZE (mode),
1962                                                      0)),
1963                                amount));
1964
1965               shifted = force_reg (mode, shifted);
1966
1967               temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
1968                                    mode, shifted, new_amount, subtarget, 1);
1969               temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
1970                                     mode, shifted, other_amount, 0, 1);
1971               return expand_binop (mode, ior_optab, temp, temp1, target,
1972                                    unsignedp, methods);
1973             }
1974
1975           temp = expand_binop (mode,
1976                                left ? rotl_optab : rotr_optab,
1977                                shifted, op1, target, unsignedp, methods);
1978
1979           /* If we don't have the rotate, but we are rotating by a constant
1980              that is in range, try a rotate in the opposite direction.  */
1981
1982           if (temp == 0 && GET_CODE (op1) == CONST_INT
1983               && INTVAL (op1) > 0 && INTVAL (op1) < GET_MODE_BITSIZE (mode))
1984             temp = expand_binop (mode,
1985                                  left ? rotr_optab : rotl_optab,
1986                                  shifted,
1987                                  GEN_INT (GET_MODE_BITSIZE (mode)
1988                                           - INTVAL (op1)),
1989                                  target, unsignedp, methods);
1990         }
1991       else if (unsignedp)
1992         temp = expand_binop (mode,
1993                              left ? ashl_optab : lshr_optab,
1994                              shifted, op1, target, unsignedp, methods);
1995
1996       /* Do arithmetic shifts.
1997          Also, if we are going to widen the operand, we can just as well
1998          use an arithmetic right-shift instead of a logical one.  */
1999       if (temp == 0 && ! rotate
2000           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2001         {
2002           enum optab_methods methods1 = methods;
2003
2004           /* If trying to widen a log shift to an arithmetic shift,
2005              don't accept an arithmetic shift of the same size.  */
2006           if (unsignedp)
2007             methods1 = OPTAB_MUST_WIDEN;
2008
2009           /* Arithmetic shift */
2010
2011           temp = expand_binop (mode,
2012                                left ? ashl_optab : ashr_optab,
2013                                shifted, op1, target, unsignedp, methods1);
2014         }
2015
2016       /* We used to try extzv here for logical right shifts, but that was
2017          only useful for one machine, the VAX, and caused poor code
2018          generation there for lshrdi3, so the code was deleted and a
2019          define_expand for lshrsi3 was added to vax.md.  */
2020     }
2021
2022   if (temp == 0)
2023     abort ();
2024   return temp;
2025 }
2026 \f
2027 enum alg_code { alg_zero, alg_m, alg_shift,
2028                   alg_add_t_m2, alg_sub_t_m2,
2029                   alg_add_factor, alg_sub_factor,
2030                   alg_add_t2_m, alg_sub_t2_m,
2031                   alg_add, alg_subtract, alg_factor, alg_shiftop };
2032
2033 /* This structure records a sequence of operations.
2034    `ops' is the number of operations recorded.
2035    `cost' is their total cost.
2036    The operations are stored in `op' and the corresponding
2037    logarithms of the integer coefficients in `log'.
2038
2039    These are the operations:
2040    alg_zero             total := 0;
2041    alg_m                total := multiplicand;
2042    alg_shift            total := total * coeff
2043    alg_add_t_m2         total := total + multiplicand * coeff;
2044    alg_sub_t_m2         total := total - multiplicand * coeff;
2045    alg_add_factor       total := total * coeff + total;
2046    alg_sub_factor       total := total * coeff - total;
2047    alg_add_t2_m         total := total * coeff + multiplicand;
2048    alg_sub_t2_m         total := total * coeff - multiplicand;
2049
2050    The first operand must be either alg_zero or alg_m.  */
2051
2052 struct algorithm
2053 {
2054   short cost;
2055   short ops;
2056   /* The size of the OP and LOG fields are not directly related to the
2057      word size, but the worst-case algorithms will be if we have few
2058      consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2059      In that case we will generate shift-by-2, add, shift-by-2, add,...,
2060      in total wordsize operations.  */
2061   enum alg_code op[MAX_BITS_PER_WORD];
2062   char log[MAX_BITS_PER_WORD];
2063 };
2064
2065 static void synth_mult                  PARAMS ((struct algorithm *,
2066                                                  unsigned HOST_WIDE_INT,
2067                                                  int));
2068 static unsigned HOST_WIDE_INT choose_multiplier PARAMS ((unsigned HOST_WIDE_INT,
2069                                                          int, int,
2070                                                          unsigned HOST_WIDE_INT *,
2071                                                          int *, int *));
2072 static unsigned HOST_WIDE_INT invert_mod2n      PARAMS ((unsigned HOST_WIDE_INT,
2073                                                          int));
2074 /* Compute and return the best algorithm for multiplying by T.
2075    The algorithm must cost less than cost_limit
2076    If retval.cost >= COST_LIMIT, no algorithm was found and all
2077    other field of the returned struct are undefined.  */
2078
2079 static void
2080 synth_mult (alg_out, t, cost_limit)
2081      struct algorithm *alg_out;
2082      unsigned HOST_WIDE_INT t;
2083      int cost_limit;
2084 {
2085   int m;
2086   struct algorithm *alg_in, *best_alg;
2087   int cost;
2088   unsigned HOST_WIDE_INT q;
2089
2090   /* Indicate that no algorithm is yet found.  If no algorithm
2091      is found, this value will be returned and indicate failure.  */
2092   alg_out->cost = cost_limit;
2093
2094   if (cost_limit <= 0)
2095     return;
2096
2097   /* t == 1 can be done in zero cost.  */
2098   if (t == 1)
2099     {
2100       alg_out->ops = 1;
2101       alg_out->cost = 0;
2102       alg_out->op[0] = alg_m;
2103       return;
2104     }
2105
2106   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2107      fail now.  */
2108   if (t == 0)
2109     {
2110       if (zero_cost >= cost_limit)
2111         return;
2112       else
2113         {
2114           alg_out->ops = 1;
2115           alg_out->cost = zero_cost;
2116           alg_out->op[0] = alg_zero;
2117           return;
2118         }
2119     }
2120
2121   /* We'll be needing a couple extra algorithm structures now.  */
2122
2123   alg_in = (struct algorithm *)alloca (sizeof (struct algorithm));
2124   best_alg = (struct algorithm *)alloca (sizeof (struct algorithm));
2125
2126   /* If we have a group of zero bits at the low-order part of T, try
2127      multiplying by the remaining bits and then doing a shift.  */
2128
2129   if ((t & 1) == 0)
2130     {
2131       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2132       q = t >> m;
2133       cost = shift_cost[m];
2134       synth_mult (alg_in, q, cost_limit - cost);
2135
2136       cost += alg_in->cost;
2137       if (cost < cost_limit)
2138         {
2139           struct algorithm *x;
2140           x = alg_in, alg_in = best_alg, best_alg = x;
2141           best_alg->log[best_alg->ops] = m;
2142           best_alg->op[best_alg->ops] = alg_shift;
2143           cost_limit = cost;
2144         }
2145     }
2146
2147   /* If we have an odd number, add or subtract one.  */
2148   if ((t & 1) != 0)
2149     {
2150       unsigned HOST_WIDE_INT w;
2151
2152       for (w = 1; (w & t) != 0; w <<= 1)
2153         ;
2154       /* If T was -1, then W will be zero after the loop.  This is another
2155          case where T ends with ...111.  Handling this with (T + 1) and
2156          subtract 1 produces slightly better code and results in algorithm
2157          selection much faster than treating it like the ...0111 case
2158          below.  */
2159       if (w == 0
2160           || (w > 2
2161               /* Reject the case where t is 3.
2162                  Thus we prefer addition in that case.  */
2163               && t != 3))
2164         {
2165           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2166
2167           cost = add_cost;
2168           synth_mult (alg_in, t + 1, cost_limit - cost);
2169
2170           cost += alg_in->cost;
2171           if (cost < cost_limit)
2172             {
2173               struct algorithm *x;
2174               x = alg_in, alg_in = best_alg, best_alg = x;
2175               best_alg->log[best_alg->ops] = 0;
2176               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2177               cost_limit = cost;
2178             }
2179         }
2180       else
2181         {
2182           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2183
2184           cost = add_cost;
2185           synth_mult (alg_in, t - 1, cost_limit - cost);
2186
2187           cost += alg_in->cost;
2188           if (cost < cost_limit)
2189             {
2190               struct algorithm *x;
2191               x = alg_in, alg_in = best_alg, best_alg = x;
2192               best_alg->log[best_alg->ops] = 0;
2193               best_alg->op[best_alg->ops] = alg_add_t_m2;
2194               cost_limit = cost;
2195             }
2196         }
2197     }
2198
2199   /* Look for factors of t of the form
2200      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2201      If we find such a factor, we can multiply by t using an algorithm that
2202      multiplies by q, shift the result by m and add/subtract it to itself.
2203
2204      We search for large factors first and loop down, even if large factors
2205      are less probable than small; if we find a large factor we will find a
2206      good sequence quickly, and therefore be able to prune (by decreasing
2207      COST_LIMIT) the search.  */
2208
2209   for (m = floor_log2 (t - 1); m >= 2; m--)
2210     {
2211       unsigned HOST_WIDE_INT d;
2212
2213       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2214       if (t % d == 0 && t > d)
2215         {
2216           cost = MIN (shiftadd_cost[m], add_cost + shift_cost[m]);
2217           synth_mult (alg_in, t / d, cost_limit - cost);
2218
2219           cost += alg_in->cost;
2220           if (cost < cost_limit)
2221             {
2222               struct algorithm *x;
2223               x = alg_in, alg_in = best_alg, best_alg = x;
2224               best_alg->log[best_alg->ops] = m;
2225               best_alg->op[best_alg->ops] = alg_add_factor;
2226               cost_limit = cost;
2227             }
2228           /* Other factors will have been taken care of in the recursion.  */
2229           break;
2230         }
2231
2232       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2233       if (t % d == 0 && t > d)
2234         {
2235           cost = MIN (shiftsub_cost[m], add_cost + shift_cost[m]);
2236           synth_mult (alg_in, t / d, cost_limit - cost);
2237
2238           cost += alg_in->cost;
2239           if (cost < cost_limit)
2240             {
2241               struct algorithm *x;
2242               x = alg_in, alg_in = best_alg, best_alg = x;
2243               best_alg->log[best_alg->ops] = m;
2244               best_alg->op[best_alg->ops] = alg_sub_factor;
2245               cost_limit = cost;
2246             }
2247           break;
2248         }
2249     }
2250
2251   /* Try shift-and-add (load effective address) instructions,
2252      i.e. do a*3, a*5, a*9.  */
2253   if ((t & 1) != 0)
2254     {
2255       q = t - 1;
2256       q = q & -q;
2257       m = exact_log2 (q);
2258       if (m >= 0)
2259         {
2260           cost = shiftadd_cost[m];
2261           synth_mult (alg_in, (t - 1) >> m, cost_limit - cost);
2262
2263           cost += alg_in->cost;
2264           if (cost < cost_limit)
2265             {
2266               struct algorithm *x;
2267               x = alg_in, alg_in = best_alg, best_alg = x;
2268               best_alg->log[best_alg->ops] = m;
2269               best_alg->op[best_alg->ops] = alg_add_t2_m;
2270               cost_limit = cost;
2271             }
2272         }
2273
2274       q = t + 1;
2275       q = q & -q;
2276       m = exact_log2 (q);
2277       if (m >= 0)
2278         {
2279           cost = shiftsub_cost[m];
2280           synth_mult (alg_in, (t + 1) >> m, cost_limit - cost);
2281
2282           cost += alg_in->cost;
2283           if (cost < cost_limit)
2284             {
2285               struct algorithm *x;
2286               x = alg_in, alg_in = best_alg, best_alg = x;
2287               best_alg->log[best_alg->ops] = m;
2288               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2289               cost_limit = cost;
2290             }
2291         }
2292     }
2293
2294   /* If cost_limit has not decreased since we stored it in alg_out->cost,
2295      we have not found any algorithm.  */
2296   if (cost_limit == alg_out->cost)
2297     return;
2298
2299   /* If we are getting a too long sequence for `struct algorithm'
2300      to record, make this search fail.  */
2301   if (best_alg->ops == MAX_BITS_PER_WORD)
2302     return;
2303
2304   /* Copy the algorithm from temporary space to the space at alg_out.
2305      We avoid using structure assignment because the majority of
2306      best_alg is normally undefined, and this is a critical function.  */
2307   alg_out->ops = best_alg->ops + 1;
2308   alg_out->cost = cost_limit;
2309   bcopy ((char *) best_alg->op, (char *) alg_out->op,
2310          alg_out->ops * sizeof *alg_out->op);
2311   bcopy ((char *) best_alg->log, (char *) alg_out->log,
2312          alg_out->ops * sizeof *alg_out->log);
2313 }
2314 \f
2315 /* Perform a multiplication and return an rtx for the result.
2316    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
2317    TARGET is a suggestion for where to store the result (an rtx).
2318
2319    We check specially for a constant integer as OP1.
2320    If you want this check for OP0 as well, then before calling
2321    you should swap the two operands if OP0 would be constant.  */
2322
2323 rtx
2324 expand_mult (mode, op0, op1, target, unsignedp)
2325      enum machine_mode mode;
2326      register rtx op0, op1, target;
2327      int unsignedp;
2328 {
2329   rtx const_op1 = op1;
2330
2331   /* synth_mult does an `unsigned int' multiply.  As long as the mode is
2332      less than or equal in size to `unsigned int' this doesn't matter.
2333      If the mode is larger than `unsigned int', then synth_mult works only
2334      if the constant value exactly fits in an `unsigned int' without any
2335      truncation.  This means that multiplying by negative values does
2336      not work; results are off by 2^32 on a 32 bit machine.  */
2337
2338   /* If we are multiplying in DImode, it may still be a win
2339      to try to work with shifts and adds.  */
2340   if (GET_CODE (op1) == CONST_DOUBLE
2341       && GET_MODE_CLASS (GET_MODE (op1)) == MODE_INT
2342       && HOST_BITS_PER_INT >= BITS_PER_WORD
2343       && CONST_DOUBLE_HIGH (op1) == 0)
2344     const_op1 = GEN_INT (CONST_DOUBLE_LOW (op1));
2345   else if (HOST_BITS_PER_INT < GET_MODE_BITSIZE (mode)
2346            && GET_CODE (op1) == CONST_INT
2347            && INTVAL (op1) < 0)
2348     const_op1 = 0;
2349
2350   /* We used to test optimize here, on the grounds that it's better to
2351      produce a smaller program when -O is not used.
2352      But this causes such a terrible slowdown sometimes
2353      that it seems better to use synth_mult always.  */
2354
2355   if (const_op1 && GET_CODE (const_op1) == CONST_INT)
2356     {
2357       struct algorithm alg;
2358       struct algorithm alg2;
2359       HOST_WIDE_INT val = INTVAL (op1);
2360       HOST_WIDE_INT val_so_far;
2361       rtx insn;
2362       int mult_cost;
2363       enum {basic_variant, negate_variant, add_variant} variant = basic_variant;
2364
2365       /* Try to do the computation three ways: multiply by the negative of OP1
2366          and then negate, do the multiplication directly, or do multiplication
2367          by OP1 - 1.  */
2368
2369       mult_cost = rtx_cost (gen_rtx_MULT (mode, op0, op1), SET);
2370       mult_cost = MIN (12 * add_cost, mult_cost);
2371
2372       synth_mult (&alg, val, mult_cost);
2373
2374       /* This works only if the inverted value actually fits in an
2375          `unsigned int' */
2376       if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2377         {
2378           synth_mult (&alg2, - val,
2379                       (alg.cost < mult_cost ? alg.cost : mult_cost) - negate_cost);
2380           if (alg2.cost + negate_cost < alg.cost)
2381             alg = alg2, variant = negate_variant;
2382         }
2383
2384       /* This proves very useful for division-by-constant.  */
2385       synth_mult (&alg2, val - 1,
2386                   (alg.cost < mult_cost ? alg.cost : mult_cost) - add_cost);
2387       if (alg2.cost + add_cost < alg.cost)
2388         alg = alg2, variant = add_variant;
2389
2390       if (alg.cost < mult_cost)
2391         {
2392           /* We found something cheaper than a multiply insn.  */
2393           int opno;
2394           rtx accum, tem;
2395
2396           op0 = protect_from_queue (op0, 0);
2397
2398           /* Avoid referencing memory over and over.
2399              For speed, but also for correctness when mem is volatile.  */
2400           if (GET_CODE (op0) == MEM)
2401             op0 = force_reg (mode, op0);
2402
2403           /* ACCUM starts out either as OP0 or as a zero, depending on
2404              the first operation.  */
2405
2406           if (alg.op[0] == alg_zero)
2407             {
2408               accum = copy_to_mode_reg (mode, const0_rtx);
2409               val_so_far = 0;
2410             }
2411           else if (alg.op[0] == alg_m)
2412             {
2413               accum = copy_to_mode_reg (mode, op0);
2414               val_so_far = 1;
2415             }
2416           else
2417             abort ();
2418
2419           for (opno = 1; opno < alg.ops; opno++)
2420             {
2421               int log = alg.log[opno];
2422               int preserve = preserve_subexpressions_p ();
2423               rtx shift_subtarget = preserve ? 0 : accum;
2424               rtx add_target
2425                 = (opno == alg.ops - 1 && target != 0 && variant != add_variant
2426                    && ! preserve)
2427                   ? target : 0;
2428               rtx accum_target = preserve ? 0 : accum;
2429
2430               switch (alg.op[opno])
2431                 {
2432                 case alg_shift:
2433                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2434                                         build_int_2 (log, 0), NULL_RTX, 0);
2435                   val_so_far <<= log;
2436                   break;
2437
2438                 case alg_add_t_m2:
2439                   tem = expand_shift (LSHIFT_EXPR, mode, op0,
2440                                       build_int_2 (log, 0), NULL_RTX, 0);
2441                   accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2442                                          add_target
2443                                          ? add_target : accum_target);
2444                   val_so_far += (HOST_WIDE_INT) 1 << log;
2445                   break;
2446
2447                 case alg_sub_t_m2:
2448                   tem = expand_shift (LSHIFT_EXPR, mode, op0,
2449                                       build_int_2 (log, 0), NULL_RTX, 0);
2450                   accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2451                                          add_target
2452                                          ? add_target : accum_target);
2453                   val_so_far -= (HOST_WIDE_INT) 1 << log;
2454                   break;
2455
2456                 case alg_add_t2_m:
2457                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2458                                         build_int_2 (log, 0), shift_subtarget,
2459                                         0);
2460                   accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2461                                          add_target
2462                                          ? add_target : accum_target);
2463                   val_so_far = (val_so_far << log) + 1;
2464                   break;
2465
2466                 case alg_sub_t2_m:
2467                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2468                                         build_int_2 (log, 0), shift_subtarget,
2469                                         0);
2470                   accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2471                                          add_target
2472                                          ? add_target : accum_target);
2473                   val_so_far = (val_so_far << log) - 1;
2474                   break;
2475
2476                 case alg_add_factor:
2477                   tem = expand_shift (LSHIFT_EXPR, mode, accum,
2478                                       build_int_2 (log, 0), NULL_RTX, 0);
2479                   accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2480                                          add_target
2481                                          ? add_target : accum_target);
2482                   val_so_far += val_so_far << log;
2483                   break;
2484
2485                 case alg_sub_factor:
2486                   tem = expand_shift (LSHIFT_EXPR, mode, accum,
2487                                       build_int_2 (log, 0), NULL_RTX, 0);
2488                   accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2489                                          (add_target ? add_target
2490                                           : preserve ? 0 : tem));
2491                   val_so_far = (val_so_far << log) - val_so_far;
2492                   break;
2493
2494                 default:
2495                   abort ();
2496                 }
2497
2498               /* Write a REG_EQUAL note on the last insn so that we can cse
2499                  multiplication sequences.  */
2500
2501               insn = get_last_insn ();
2502               set_unique_reg_note (insn,
2503                                    REG_EQUAL,
2504                                    gen_rtx_MULT (mode, op0,
2505                                                  GEN_INT (val_so_far)));
2506             }
2507
2508           if (variant == negate_variant)
2509             {
2510               val_so_far = - val_so_far;
2511               accum = expand_unop (mode, neg_optab, accum, target, 0);
2512             }
2513           else if (variant == add_variant)
2514             {
2515               val_so_far = val_so_far + 1;
2516               accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
2517             }
2518
2519           if (val != val_so_far)
2520             abort ();
2521
2522           return accum;
2523         }
2524     }
2525
2526   /* This used to use umul_optab if unsigned, but for non-widening multiply
2527      there is no difference between signed and unsigned.  */
2528   op0 = expand_binop (mode, smul_optab,
2529                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
2530   if (op0 == 0)
2531     abort ();
2532   return op0;
2533 }
2534 \f
2535 /* Return the smallest n such that 2**n >= X.  */
2536
2537 int
2538 ceil_log2 (x)
2539      unsigned HOST_WIDE_INT x;
2540 {
2541   return floor_log2 (x - 1) + 1;
2542 }
2543
2544 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
2545    replace division by D, and put the least significant N bits of the result
2546    in *MULTIPLIER_PTR and return the most significant bit.
2547
2548    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
2549    needed precision is in PRECISION (should be <= N).
2550
2551    PRECISION should be as small as possible so this function can choose
2552    multiplier more freely.
2553
2554    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
2555    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
2556
2557    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
2558    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
2559
2560 static
2561 unsigned HOST_WIDE_INT
2562 choose_multiplier (d, n, precision, multiplier_ptr, post_shift_ptr, lgup_ptr)
2563      unsigned HOST_WIDE_INT d;
2564      int n;
2565      int precision;
2566      unsigned HOST_WIDE_INT *multiplier_ptr;
2567      int *post_shift_ptr;
2568      int *lgup_ptr;
2569 {
2570   HOST_WIDE_INT mhigh_hi, mlow_hi;
2571   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
2572   int lgup, post_shift;
2573   int pow, pow2;
2574   unsigned HOST_WIDE_INT nl, dummy1;
2575   HOST_WIDE_INT nh, dummy2;
2576
2577   /* lgup = ceil(log2(divisor)); */
2578   lgup = ceil_log2 (d);
2579
2580   if (lgup > n)
2581     abort ();
2582
2583   pow = n + lgup;
2584   pow2 = n + lgup - precision;
2585
2586   if (pow == 2 * HOST_BITS_PER_WIDE_INT)
2587     {
2588       /* We could handle this with some effort, but this case is much better
2589          handled directly with a scc insn, so rely on caller using that.  */
2590       abort ();
2591     }
2592
2593   /* mlow = 2^(N + lgup)/d */
2594  if (pow >= HOST_BITS_PER_WIDE_INT)
2595     {
2596       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
2597       nl = 0;
2598     }
2599   else
2600     {
2601       nh = 0;
2602       nl = (unsigned HOST_WIDE_INT) 1 << pow;
2603     }
2604   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2605                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
2606
2607   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
2608   if (pow2 >= HOST_BITS_PER_WIDE_INT)
2609     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
2610   else
2611     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
2612   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2613                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
2614
2615   if (mhigh_hi && nh - d >= d)
2616     abort ();
2617   if (mhigh_hi > 1 || mlow_hi > 1)
2618     abort ();
2619   /* assert that mlow < mhigh.  */
2620   if (! (mlow_hi < mhigh_hi || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo)))
2621     abort();
2622
2623   /* If precision == N, then mlow, mhigh exceed 2^N
2624      (but they do not exceed 2^(N+1)).  */
2625
2626   /* Reduce to lowest terms */
2627   for (post_shift = lgup; post_shift > 0; post_shift--)
2628     {
2629       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
2630       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
2631       if (ml_lo >= mh_lo)
2632         break;
2633
2634       mlow_hi = 0;
2635       mlow_lo = ml_lo;
2636       mhigh_hi = 0;
2637       mhigh_lo = mh_lo;
2638     }
2639
2640   *post_shift_ptr = post_shift;
2641   *lgup_ptr = lgup;
2642   if (n < HOST_BITS_PER_WIDE_INT)
2643     {
2644       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
2645       *multiplier_ptr = mhigh_lo & mask;
2646       return mhigh_lo >= mask;
2647     }
2648   else
2649     {
2650       *multiplier_ptr = mhigh_lo;
2651       return mhigh_hi;
2652     }
2653 }
2654
2655 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
2656    congruent to 1 (mod 2**N).  */
2657
2658 static unsigned HOST_WIDE_INT
2659 invert_mod2n (x, n)
2660      unsigned HOST_WIDE_INT x;
2661      int n;
2662 {
2663   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
2664
2665   /* The algorithm notes that the choice y = x satisfies
2666      x*y == 1 mod 2^3, since x is assumed odd.
2667      Each iteration doubles the number of bits of significance in y.  */
2668
2669   unsigned HOST_WIDE_INT mask;
2670   unsigned HOST_WIDE_INT y = x;
2671   int nbit = 3;
2672
2673   mask = (n == HOST_BITS_PER_WIDE_INT
2674           ? ~(unsigned HOST_WIDE_INT) 0
2675           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
2676
2677   while (nbit < n)
2678     {
2679       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
2680       nbit *= 2;
2681     }
2682   return y;
2683 }
2684
2685 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
2686    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
2687    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
2688    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
2689    become signed.
2690
2691    The result is put in TARGET if that is convenient.
2692
2693    MODE is the mode of operation.  */
2694
2695 rtx
2696 expand_mult_highpart_adjust (mode, adj_operand, op0, op1, target, unsignedp)
2697      enum machine_mode mode;
2698      register rtx adj_operand, op0, op1, target;
2699      int unsignedp;
2700 {
2701   rtx tem;
2702   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
2703
2704   tem = expand_shift (RSHIFT_EXPR, mode, op0,
2705                       build_int_2 (GET_MODE_BITSIZE (mode) - 1, 0),
2706                       NULL_RTX, 0);
2707   tem = expand_and (tem, op1, NULL_RTX);
2708   adj_operand
2709     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
2710                      adj_operand);
2711
2712   tem = expand_shift (RSHIFT_EXPR, mode, op1,
2713                       build_int_2 (GET_MODE_BITSIZE (mode) - 1, 0),
2714                       NULL_RTX, 0);
2715   tem = expand_and (tem, op0, NULL_RTX);
2716   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
2717                           target);
2718
2719   return target;
2720 }
2721
2722 /* Emit code to multiply OP0 and CNST1, putting the high half of the result
2723    in TARGET if that is convenient, and return where the result is.  If the
2724    operation can not be performed, 0 is returned.
2725
2726    MODE is the mode of operation and result.
2727
2728    UNSIGNEDP nonzero means unsigned multiply.
2729
2730    MAX_COST is the total allowed cost for the expanded RTL.  */
2731
2732 rtx
2733 expand_mult_highpart (mode, op0, cnst1, target, unsignedp, max_cost)
2734      enum machine_mode mode;
2735      register rtx op0, target;
2736      unsigned HOST_WIDE_INT cnst1;
2737      int unsignedp;
2738      int max_cost;
2739 {
2740   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
2741   optab mul_highpart_optab;
2742   optab moptab;
2743   rtx tem;
2744   int size = GET_MODE_BITSIZE (mode);
2745   rtx op1, wide_op1;
2746
2747   /* We can't support modes wider than HOST_BITS_PER_INT.  */
2748   if (size > HOST_BITS_PER_WIDE_INT)
2749     abort ();
2750
2751   op1 = GEN_INT (cnst1);
2752
2753   if (GET_MODE_BITSIZE (wider_mode) <= HOST_BITS_PER_INT)
2754     wide_op1 = op1;
2755   else
2756     wide_op1
2757       = immed_double_const (cnst1,
2758                             (unsignedp
2759                              ? (HOST_WIDE_INT) 0
2760                              : -(cnst1 >> (HOST_BITS_PER_WIDE_INT - 1))),
2761                             wider_mode);
2762
2763   /* expand_mult handles constant multiplication of word_mode
2764      or narrower.  It does a poor job for large modes.  */
2765   if (size < BITS_PER_WORD
2766       && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
2767     {
2768       /* We have to do this, since expand_binop doesn't do conversion for
2769          multiply.  Maybe change expand_binop to handle widening multiply?  */
2770       op0 = convert_to_mode (wider_mode, op0, unsignedp);
2771
2772       tem = expand_mult (wider_mode, op0, wide_op1, NULL_RTX, unsignedp);
2773       tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2774                           build_int_2 (size, 0), NULL_RTX, 1);
2775       return convert_modes (mode, wider_mode, tem, unsignedp);
2776     }
2777
2778   if (target == 0)
2779     target = gen_reg_rtx (mode);
2780
2781   /* Firstly, try using a multiplication insn that only generates the needed
2782      high part of the product, and in the sign flavor of unsignedp.  */
2783   if (mul_highpart_cost[(int) mode] < max_cost)
2784     {
2785       mul_highpart_optab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
2786       target = expand_binop (mode, mul_highpart_optab,
2787                              op0, wide_op1, target, unsignedp, OPTAB_DIRECT);
2788       if (target)
2789         return target;
2790     }
2791
2792   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
2793      Need to adjust the result after the multiplication.  */
2794   if (mul_highpart_cost[(int) mode] + 2 * shift_cost[size-1] + 4 * add_cost < max_cost)
2795     {
2796       mul_highpart_optab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
2797       target = expand_binop (mode, mul_highpart_optab,
2798                              op0, wide_op1, target, unsignedp, OPTAB_DIRECT);
2799       if (target)
2800         /* We used the wrong signedness.  Adjust the result.  */
2801         return expand_mult_highpart_adjust (mode, target, op0,
2802                                             op1, target, unsignedp);
2803     }
2804
2805   /* Try widening multiplication.  */
2806   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
2807   if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2808       && mul_widen_cost[(int) wider_mode] < max_cost)
2809     {
2810       op1 = force_reg (mode, op1);
2811       goto try;
2812     }
2813
2814   /* Try widening the mode and perform a non-widening multiplication.  */
2815   moptab = smul_optab;
2816   if (smul_optab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2817       && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
2818     {
2819       op1 = wide_op1;
2820       goto try;
2821     }
2822
2823   /* Try widening multiplication of opposite signedness, and adjust.  */
2824   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
2825   if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2826       && (mul_widen_cost[(int) wider_mode]
2827           + 2 * shift_cost[size-1] + 4 * add_cost < max_cost))
2828     {
2829       rtx regop1 = force_reg (mode, op1);
2830       tem = expand_binop (wider_mode, moptab, op0, regop1,
2831                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
2832       if (tem != 0)
2833         {
2834           /* Extract the high half of the just generated product.  */
2835           tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2836                               build_int_2 (size, 0), NULL_RTX, 1);
2837           tem = convert_modes (mode, wider_mode, tem, unsignedp);
2838           /* We used the wrong signedness.  Adjust the result.  */
2839           return expand_mult_highpart_adjust (mode, tem, op0, op1,
2840                                               target, unsignedp);
2841         }
2842     }
2843
2844   return 0;
2845
2846  try:
2847   /* Pass NULL_RTX as target since TARGET has wrong mode.  */
2848   tem = expand_binop (wider_mode, moptab, op0, op1,
2849                       NULL_RTX, unsignedp, OPTAB_WIDEN);
2850   if (tem == 0)
2851     return 0;
2852
2853   /* Extract the high half of the just generated product.  */
2854   if (mode == word_mode)
2855     {
2856       return gen_highpart (mode, tem);
2857     }
2858   else
2859     {
2860       tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2861                           build_int_2 (size, 0), NULL_RTX, 1);
2862       return convert_modes (mode, wider_mode, tem, unsignedp);
2863     }
2864 }
2865 \f
2866 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
2867    if that is convenient, and returning where the result is.
2868    You may request either the quotient or the remainder as the result;
2869    specify REM_FLAG nonzero to get the remainder.
2870
2871    CODE is the expression code for which kind of division this is;
2872    it controls how rounding is done.  MODE is the machine mode to use.
2873    UNSIGNEDP nonzero means do unsigned division.  */
2874
2875 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
2876    and then correct it by or'ing in missing high bits
2877    if result of ANDI is nonzero.
2878    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
2879    This could optimize to a bfexts instruction.
2880    But C doesn't use these operations, so their optimizations are
2881    left for later.  */
2882 /* ??? For modulo, we don't actually need the highpart of the first product,
2883    the low part will do nicely.  And for small divisors, the second multiply
2884    can also be a low-part only multiply or even be completely left out.
2885    E.g. to calculate the remainder of a division by 3 with a 32 bit
2886    multiply, multiply with 0x55555556 and extract the upper two bits;
2887    the result is exact for inputs up to 0x1fffffff.
2888    The input range can be reduced by using cross-sum rules.
2889    For odd divisors >= 3, the following table gives right shift counts
2890    so that if an number is shifted by an integer multiple of the given
2891    amount, the remainder stays the same:
2892    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
2893    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
2894    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
2895    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
2896    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
2897
2898    Cross-sum rules for even numbers can be derived by leaving as many bits
2899    to the right alone as the divisor has zeros to the right.
2900    E.g. if x is an unsigned 32 bit number:
2901    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
2902    */
2903
2904 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
2905
2906 rtx
2907 expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
2908      int rem_flag;
2909      enum tree_code code;
2910      enum machine_mode mode;
2911      register rtx op0, op1, target;
2912      int unsignedp;
2913 {
2914   enum machine_mode compute_mode;
2915   register rtx tquotient;
2916   rtx quotient = 0, remainder = 0;
2917   rtx last;
2918   int size;
2919   rtx insn, set;
2920   optab optab1, optab2;
2921   int op1_is_constant, op1_is_pow2;
2922   int max_cost, extra_cost;
2923   static HOST_WIDE_INT last_div_const = 0;
2924
2925   op1_is_constant = GET_CODE (op1) == CONST_INT;
2926   op1_is_pow2 = (op1_is_constant
2927                  && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
2928                       || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1))))));
2929
2930   /*
2931      This is the structure of expand_divmod:
2932
2933      First comes code to fix up the operands so we can perform the operations
2934      correctly and efficiently.
2935
2936      Second comes a switch statement with code specific for each rounding mode.
2937      For some special operands this code emits all RTL for the desired
2938      operation, for other cases, it generates only a quotient and stores it in
2939      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
2940      to indicate that it has not done anything.
2941
2942      Last comes code that finishes the operation.  If QUOTIENT is set and
2943      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
2944      QUOTIENT is not set, it is computed using trunc rounding.
2945
2946      We try to generate special code for division and remainder when OP1 is a
2947      constant.  If |OP1| = 2**n we can use shifts and some other fast
2948      operations.  For other values of OP1, we compute a carefully selected
2949      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
2950      by m.
2951
2952      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
2953      half of the product.  Different strategies for generating the product are
2954      implemented in expand_mult_highpart.
2955
2956      If what we actually want is the remainder, we generate that by another
2957      by-constant multiplication and a subtraction.  */
2958
2959   /* We shouldn't be called with OP1 == const1_rtx, but some of the
2960      code below will malfunction if we are, so check here and handle
2961      the special case if so.  */
2962   if (op1 == const1_rtx)
2963     return rem_flag ? const0_rtx : op0;
2964
2965   if (target
2966       /* Don't use the function value register as a target
2967          since we have to read it as well as write it,
2968          and function-inlining gets confused by this.  */
2969       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
2970           /* Don't clobber an operand while doing a multi-step calculation.  */
2971           || ((rem_flag || op1_is_constant)
2972               && (reg_mentioned_p (target, op0)
2973                   || (GET_CODE (op0) == MEM && GET_CODE (target) == MEM)))
2974           || reg_mentioned_p (target, op1)
2975           || (GET_CODE (op1) == MEM && GET_CODE (target) == MEM)))
2976     target = 0;
2977
2978   /* Get the mode in which to perform this computation.  Normally it will
2979      be MODE, but sometimes we can't do the desired operation in MODE.
2980      If so, pick a wider mode in which we can do the operation.  Convert
2981      to that mode at the start to avoid repeated conversions.
2982
2983      First see what operations we need.  These depend on the expression
2984      we are evaluating.  (We assume that divxx3 insns exist under the
2985      same conditions that modxx3 insns and that these insns don't normally
2986      fail.  If these assumptions are not correct, we may generate less
2987      efficient code in some cases.)
2988
2989      Then see if we find a mode in which we can open-code that operation
2990      (either a division, modulus, or shift).  Finally, check for the smallest
2991      mode for which we can do the operation with a library call.  */
2992
2993   /* We might want to refine this now that we have division-by-constant
2994      optimization.  Since expand_mult_highpart tries so many variants, it is
2995      not straightforward to generalize this.  Maybe we should make an array
2996      of possible modes in init_expmed?  Save this for GCC 2.7.  */
2997
2998   optab1 = (op1_is_pow2 ? (unsignedp ? lshr_optab : ashr_optab)
2999             : (unsignedp ? udiv_optab : sdiv_optab));
3000   optab2 = (op1_is_pow2 ? optab1 : (unsignedp ? udivmod_optab : sdivmod_optab));
3001
3002   for (compute_mode = mode; compute_mode != VOIDmode;
3003        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3004     if (optab1->handlers[(int) compute_mode].insn_code != CODE_FOR_nothing
3005         || optab2->handlers[(int) compute_mode].insn_code != CODE_FOR_nothing)
3006       break;
3007
3008   if (compute_mode == VOIDmode)
3009     for (compute_mode = mode; compute_mode != VOIDmode;
3010          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3011       if (optab1->handlers[(int) compute_mode].libfunc
3012           || optab2->handlers[(int) compute_mode].libfunc)
3013         break;
3014
3015   /* If we still couldn't find a mode, use MODE, but we'll probably abort
3016      in expand_binop.  */
3017   if (compute_mode == VOIDmode)
3018     compute_mode = mode;
3019
3020   if (target && GET_MODE (target) == compute_mode)
3021     tquotient = target;
3022   else
3023     tquotient = gen_reg_rtx (compute_mode);
3024
3025   size = GET_MODE_BITSIZE (compute_mode);
3026 #if 0
3027   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3028      (mode), and thereby get better code when OP1 is a constant.  Do that
3029      later.  It will require going over all usages of SIZE below.  */
3030   size = GET_MODE_BITSIZE (mode);
3031 #endif
3032
3033   /* Only deduct something for a REM if the last divide done was
3034      for a different constant.   Then set the constant of the last
3035      divide.  */
3036   max_cost = div_cost[(int) compute_mode]
3037     - (rem_flag && ! (last_div_const != 0 && op1_is_constant
3038                       && INTVAL (op1) == last_div_const)
3039        ? mul_cost[(int) compute_mode] + add_cost : 0);
3040
3041   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3042
3043   /* Now convert to the best mode to use.  */
3044   if (compute_mode != mode)
3045     {
3046       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3047       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3048
3049       /* convert_modes may have placed op1 into a register, so we
3050          must recompute the following.  */
3051       op1_is_constant = GET_CODE (op1) == CONST_INT;
3052       op1_is_pow2 = (op1_is_constant
3053                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3054                           || (! unsignedp
3055                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3056     }
3057
3058   /* If one of the operands is a volatile MEM, copy it into a register.  */
3059
3060   if (GET_CODE (op0) == MEM && MEM_VOLATILE_P (op0))
3061     op0 = force_reg (compute_mode, op0);
3062   if (GET_CODE (op1) == MEM && MEM_VOLATILE_P (op1))
3063     op1 = force_reg (compute_mode, op1);
3064
3065   /* If we need the remainder or if OP1 is constant, we need to
3066      put OP0 in a register in case it has any queued subexpressions.  */
3067   if (rem_flag || op1_is_constant)
3068     op0 = force_reg (compute_mode, op0);
3069
3070   last = get_last_insn ();
3071
3072   /* Promote floor rounding to trunc rounding for unsigned operations.  */
3073   if (unsignedp)
3074     {
3075       if (code == FLOOR_DIV_EXPR)
3076         code = TRUNC_DIV_EXPR;
3077       if (code == FLOOR_MOD_EXPR)
3078         code = TRUNC_MOD_EXPR;
3079       if (code == EXACT_DIV_EXPR && op1_is_pow2)
3080         code = TRUNC_DIV_EXPR;
3081     }
3082
3083   if (op1 != const0_rtx)
3084     switch (code)
3085       {
3086       case TRUNC_MOD_EXPR:
3087       case TRUNC_DIV_EXPR:
3088         if (op1_is_constant)
3089           {
3090             if (unsignedp)
3091               {
3092                 unsigned HOST_WIDE_INT mh, ml;
3093                 int pre_shift, post_shift;
3094                 int dummy;
3095                 unsigned HOST_WIDE_INT d = INTVAL (op1);
3096
3097                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3098                   {
3099                     pre_shift = floor_log2 (d);
3100                     if (rem_flag)
3101                       {
3102                         remainder
3103                           = expand_binop (compute_mode, and_optab, op0,
3104                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3105                                           remainder, 1,
3106                                           OPTAB_LIB_WIDEN);
3107                         if (remainder)
3108                           return gen_lowpart (mode, remainder);
3109                       }
3110                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3111                                              build_int_2 (pre_shift, 0),
3112                                              tquotient, 1);
3113                   }
3114                 else if (size <= HOST_BITS_PER_WIDE_INT)
3115                   {
3116                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
3117                       {
3118                         /* Most significant bit of divisor is set; emit an scc
3119                            insn.  */
3120                         quotient = emit_store_flag (tquotient, GEU, op0, op1,
3121                                                     compute_mode, 1, 1);
3122                         if (quotient == 0)
3123                           goto fail1;
3124                       }
3125                     else
3126                       {
3127                         /* Find a suitable multiplier and right shift count
3128                            instead of multiplying with D.  */
3129
3130                         mh = choose_multiplier (d, size, size,
3131                                                 &ml, &post_shift, &dummy);
3132
3133                         /* If the suggested multiplier is more than SIZE bits,
3134                            we can do better for even divisors, using an
3135                            initial right shift.  */
3136                         if (mh != 0 && (d & 1) == 0)
3137                           {
3138                             pre_shift = floor_log2 (d & -d);
3139                             mh = choose_multiplier (d >> pre_shift, size,
3140                                                     size - pre_shift,
3141                                                     &ml, &post_shift, &dummy);
3142                             if (mh)
3143                               abort ();
3144                           }
3145                         else
3146                           pre_shift = 0;
3147
3148                         if (mh != 0)
3149                           {
3150                             rtx t1, t2, t3, t4;
3151
3152                             extra_cost = (shift_cost[post_shift - 1]
3153                                           + shift_cost[1] + 2 * add_cost);
3154                             t1 = expand_mult_highpart (compute_mode, op0, ml,
3155                                                        NULL_RTX, 1,
3156                                                        max_cost - extra_cost);
3157                             if (t1 == 0)
3158                               goto fail1;
3159                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
3160                                                                op0, t1),
3161                                                 NULL_RTX);
3162                             t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3163                                                build_int_2 (1, 0), NULL_RTX,1);
3164                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
3165                                                               t1, t3),
3166                                                 NULL_RTX);
3167                             quotient
3168                               = expand_shift (RSHIFT_EXPR, compute_mode, t4,
3169                                               build_int_2 (post_shift - 1, 0),
3170                                               tquotient, 1);
3171                           }
3172                         else
3173                           {
3174                             rtx t1, t2;
3175
3176                             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3177                                                build_int_2 (pre_shift, 0),
3178                                                NULL_RTX, 1);
3179                             extra_cost = (shift_cost[pre_shift]
3180                                           + shift_cost[post_shift]);
3181                             t2 = expand_mult_highpart (compute_mode, t1, ml,
3182                                                        NULL_RTX, 1,
3183                                                        max_cost - extra_cost);
3184                             if (t2 == 0)
3185                               goto fail1;
3186                             quotient
3187                               = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3188                                               build_int_2 (post_shift, 0),
3189                                               tquotient, 1);
3190                           }
3191                       }
3192                   }
3193                 else            /* Too wide mode to use tricky code */
3194                   break;
3195
3196                 insn = get_last_insn ();
3197                 if (insn != last
3198                     && (set = single_set (insn)) != 0
3199                     && SET_DEST (set) == quotient)
3200                   set_unique_reg_note (insn,
3201                                        REG_EQUAL,
3202                                        gen_rtx_UDIV (compute_mode, op0, op1));
3203               }
3204             else                /* TRUNC_DIV, signed */
3205               {
3206                 unsigned HOST_WIDE_INT ml;
3207                 int lgup, post_shift;
3208                 HOST_WIDE_INT d = INTVAL (op1);
3209                 unsigned HOST_WIDE_INT abs_d = d >= 0 ? d : -d;
3210
3211                 /* n rem d = n rem -d */
3212                 if (rem_flag && d < 0)
3213                   {
3214                     d = abs_d;
3215                     op1 = GEN_INT (abs_d);
3216                   }
3217
3218                 if (d == 1)
3219                   quotient = op0;
3220                 else if (d == -1)
3221                   quotient = expand_unop (compute_mode, neg_optab, op0,
3222                                           tquotient, 0);
3223                 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
3224                   {
3225                     /* This case is not handled correctly below.  */
3226                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
3227                                                 compute_mode, 1, 1);
3228                     if (quotient == 0)
3229                       goto fail1;
3230                   }
3231                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
3232                          && (rem_flag ? smod_pow2_cheap : sdiv_pow2_cheap))
3233                   ;
3234                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
3235                   {
3236                     lgup = floor_log2 (abs_d);
3237                     if (abs_d != 2 && BRANCH_COST < 3)
3238                       {
3239                         rtx label = gen_label_rtx ();
3240                         rtx t1;
3241
3242                         t1 = copy_to_mode_reg (compute_mode, op0);
3243                         do_cmp_and_jump (t1, const0_rtx, GE,
3244                                          compute_mode, label);
3245                         expand_inc (t1, GEN_INT (abs_d - 1));
3246                         emit_label (label);
3247                         quotient = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3248                                                  build_int_2 (lgup, 0),
3249                                                  tquotient, 0);
3250                       }
3251                     else
3252                       {
3253                         rtx t1, t2, t3;
3254                         t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3255                                            build_int_2 (size - 1, 0),
3256                                            NULL_RTX, 0);
3257                         t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3258                                            build_int_2 (size - lgup, 0),
3259                                            NULL_RTX, 1);
3260                         t3 = force_operand (gen_rtx_PLUS (compute_mode,
3261                                                           op0, t2),
3262                                             NULL_RTX);
3263                         quotient = expand_shift (RSHIFT_EXPR, compute_mode, t3,
3264                                                  build_int_2 (lgup, 0),
3265                                                  tquotient, 0);
3266                       }
3267
3268                     /* We have computed OP0 / abs(OP1).  If OP1 is negative, negate
3269                        the quotient.  */
3270                     if (d < 0)
3271                       {
3272                         insn = get_last_insn ();
3273                         if (insn != last
3274                             && (set = single_set (insn)) != 0
3275                             && SET_DEST (set) == quotient
3276                             && abs_d < ((unsigned HOST_WIDE_INT) 1
3277                                         << (HOST_BITS_PER_WIDE_INT - 1)))
3278                           set_unique_reg_note (insn,
3279                                                REG_EQUAL,
3280                                                gen_rtx_DIV (compute_mode,
3281                                                             op0,
3282                                                             GEN_INT (abs_d)));
3283
3284                         quotient = expand_unop (compute_mode, neg_optab,
3285                                                 quotient, quotient, 0);
3286                       }
3287                   }
3288                 else if (size <= HOST_BITS_PER_WIDE_INT)
3289                   {
3290                     choose_multiplier (abs_d, size, size - 1,
3291                                        &ml, &post_shift, &lgup);
3292                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
3293                       {
3294                         rtx t1, t2, t3;
3295
3296                         extra_cost = (shift_cost[post_shift]
3297                                       + shift_cost[size - 1] + add_cost);
3298                         t1 = expand_mult_highpart (compute_mode, op0, ml,
3299                                                    NULL_RTX, 0,
3300                                                    max_cost - extra_cost);
3301                         if (t1 == 0)
3302                           goto fail1;
3303                         t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3304                                            build_int_2 (post_shift, 0), NULL_RTX, 0);
3305                         t3 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3306                                            build_int_2 (size - 1, 0), NULL_RTX, 0);
3307                         if (d < 0)
3308                           quotient
3309                             = force_operand (gen_rtx_MINUS (compute_mode,
3310                                                             t3, t2),
3311                                              tquotient);
3312                         else
3313                           quotient
3314                             = force_operand (gen_rtx_MINUS (compute_mode,
3315                                                             t2, t3),
3316                                              tquotient);
3317                       }
3318                     else
3319                       {
3320                         rtx t1, t2, t3, t4;
3321
3322                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
3323                         extra_cost = (shift_cost[post_shift]
3324                                       + shift_cost[size - 1] + 2 * add_cost);
3325                         t1 = expand_mult_highpart (compute_mode, op0, ml,
3326                                                    NULL_RTX, 0,
3327                                                    max_cost - extra_cost);
3328                         if (t1 == 0)
3329                           goto fail1;
3330                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
3331                                                           t1, op0),
3332                                             NULL_RTX);
3333                         t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3334                                            build_int_2 (post_shift, 0),
3335                                            NULL_RTX, 0);
3336                         t4 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3337                                            build_int_2 (size - 1, 0),
3338                                            NULL_RTX, 0);
3339                         if (d < 0)
3340                           quotient
3341                             = force_operand (gen_rtx_MINUS (compute_mode,
3342                                                             t4, t3),
3343                                              tquotient);
3344                         else
3345                           quotient
3346                             = force_operand (gen_rtx_MINUS (compute_mode,
3347                                                             t3, t4),
3348                                              tquotient);
3349                       }
3350                   }
3351                 else            /* Too wide mode to use tricky code */
3352                   break;
3353
3354                 insn = get_last_insn ();
3355                 if (insn != last
3356                     && (set = single_set (insn)) != 0
3357                     && SET_DEST (set) == quotient)
3358                   set_unique_reg_note (insn,
3359                                        REG_EQUAL,
3360                                        gen_rtx_DIV (compute_mode, op0, op1));
3361               }
3362             break;
3363           }
3364       fail1:
3365         delete_insns_since (last);
3366         break;
3367
3368       case FLOOR_DIV_EXPR:
3369       case FLOOR_MOD_EXPR:
3370       /* We will come here only for signed operations.  */
3371         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
3372           {
3373             unsigned HOST_WIDE_INT mh, ml;
3374             int pre_shift, lgup, post_shift;
3375             HOST_WIDE_INT d = INTVAL (op1);
3376
3377             if (d > 0)
3378               {
3379                 /* We could just as easily deal with negative constants here,
3380                    but it does not seem worth the trouble for GCC 2.6.  */
3381                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3382                   {
3383                     pre_shift = floor_log2 (d);
3384                     if (rem_flag)
3385                       {
3386                         remainder = expand_binop (compute_mode, and_optab, op0,
3387                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3388                                                   remainder, 0, OPTAB_LIB_WIDEN);
3389                         if (remainder)
3390                           return gen_lowpart (mode, remainder);
3391                       }
3392                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3393                                              build_int_2 (pre_shift, 0),
3394                                              tquotient, 0);
3395                   }
3396                 else
3397                   {
3398                     rtx t1, t2, t3, t4;
3399
3400                     mh = choose_multiplier (d, size, size - 1,
3401                                             &ml, &post_shift, &lgup);
3402                     if (mh)
3403                       abort ();
3404
3405                     t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3406                                        build_int_2 (size - 1, 0), NULL_RTX, 0);
3407                     t2 = expand_binop (compute_mode, xor_optab, op0, t1,
3408                                        NULL_RTX, 0, OPTAB_WIDEN);
3409                     extra_cost = (shift_cost[post_shift]
3410                                   + shift_cost[size - 1] + 2 * add_cost);
3411                     t3 = expand_mult_highpart (compute_mode, t2, ml,
3412                                                NULL_RTX, 1,
3413                                                max_cost - extra_cost);
3414                     if (t3 != 0)
3415                       {
3416                         t4 = expand_shift (RSHIFT_EXPR, compute_mode, t3,
3417                                            build_int_2 (post_shift, 0),
3418                                            NULL_RTX, 1);
3419                         quotient = expand_binop (compute_mode, xor_optab,
3420                                                  t4, t1, tquotient, 0,
3421                                                  OPTAB_WIDEN);
3422                       }
3423                   }
3424               }
3425             else
3426               {
3427                 rtx nsign, t1, t2, t3, t4;
3428                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
3429                                                   op0, constm1_rtx), NULL_RTX);
3430                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
3431                                    0, OPTAB_WIDEN);
3432                 nsign = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3433                                       build_int_2 (size - 1, 0), NULL_RTX, 0);
3434                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
3435                                     NULL_RTX);
3436                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
3437                                     NULL_RTX, 0);
3438                 if (t4)
3439                   {
3440                     rtx t5;
3441                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
3442                                       NULL_RTX, 0);
3443                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
3444                                                             t4, t5),
3445                                               tquotient);
3446                   }
3447               }
3448           }
3449
3450         if (quotient != 0)
3451           break;
3452         delete_insns_since (last);
3453
3454         /* Try using an instruction that produces both the quotient and
3455            remainder, using truncation.  We can easily compensate the quotient
3456            or remainder to get floor rounding, once we have the remainder.
3457            Notice that we compute also the final remainder value here,
3458            and return the result right away.  */
3459         if (target == 0 || GET_MODE (target) != compute_mode)
3460           target = gen_reg_rtx (compute_mode);
3461
3462         if (rem_flag)
3463           {
3464             remainder
3465               = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode);
3466             quotient = gen_reg_rtx (compute_mode);
3467           }
3468         else
3469           {
3470             quotient
3471               = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode);
3472             remainder = gen_reg_rtx (compute_mode);
3473           }
3474
3475         if (expand_twoval_binop (sdivmod_optab, op0, op1,
3476                                  quotient, remainder, 0))
3477           {
3478             /* This could be computed with a branch-less sequence.
3479                Save that for later.  */
3480             rtx tem;
3481             rtx label = gen_label_rtx ();
3482             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
3483             tem = expand_binop (compute_mode, xor_optab, op0, op1,
3484                                 NULL_RTX, 0, OPTAB_WIDEN);
3485             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
3486             expand_dec (quotient, const1_rtx);
3487             expand_inc (remainder, op1);
3488             emit_label (label);
3489             return gen_lowpart (mode, rem_flag ? remainder : quotient);
3490           }
3491
3492         /* No luck with division elimination or divmod.  Have to do it
3493            by conditionally adjusting op0 *and* the result.  */
3494         {
3495           rtx label1, label2, label3, label4, label5;
3496           rtx adjusted_op0;
3497           rtx tem;
3498
3499           quotient = gen_reg_rtx (compute_mode);
3500           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3501           label1 = gen_label_rtx ();
3502           label2 = gen_label_rtx ();
3503           label3 = gen_label_rtx ();
3504           label4 = gen_label_rtx ();
3505           label5 = gen_label_rtx ();
3506           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
3507           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
3508           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3509                               quotient, 0, OPTAB_LIB_WIDEN);
3510           if (tem != quotient)
3511             emit_move_insn (quotient, tem);
3512           emit_jump_insn (gen_jump (label5));
3513           emit_barrier ();
3514           emit_label (label1);
3515           expand_inc (adjusted_op0, const1_rtx);
3516           emit_jump_insn (gen_jump (label4));
3517           emit_barrier ();
3518           emit_label (label2);
3519           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
3520           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3521                               quotient, 0, OPTAB_LIB_WIDEN);
3522           if (tem != quotient)
3523             emit_move_insn (quotient, tem);
3524           emit_jump_insn (gen_jump (label5));
3525           emit_barrier ();
3526           emit_label (label3);
3527           expand_dec (adjusted_op0, const1_rtx);
3528           emit_label (label4);
3529           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3530                               quotient, 0, OPTAB_LIB_WIDEN);
3531           if (tem != quotient)
3532             emit_move_insn (quotient, tem);
3533           expand_dec (quotient, const1_rtx);
3534           emit_label (label5);
3535         }
3536         break;
3537
3538       case CEIL_DIV_EXPR:
3539       case CEIL_MOD_EXPR:
3540         if (unsignedp)
3541           {
3542             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
3543               {
3544                 rtx t1, t2, t3;
3545                 unsigned HOST_WIDE_INT d = INTVAL (op1);
3546                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3547                                    build_int_2 (floor_log2 (d), 0),
3548                                    tquotient, 1);
3549                 t2 = expand_binop (compute_mode, and_optab, op0,
3550                                    GEN_INT (d - 1),
3551                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3552                 t3 = gen_reg_rtx (compute_mode);
3553                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
3554                                       compute_mode, 1, 1);
3555                 if (t3 == 0)
3556                   {
3557                     rtx lab;
3558                     lab = gen_label_rtx ();
3559                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
3560                     expand_inc (t1, const1_rtx);
3561                     emit_label (lab);
3562                     quotient = t1;
3563                   }
3564                 else
3565                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
3566                                                           t1, t3),
3567                                             tquotient);
3568                 break;
3569               }
3570
3571             /* Try using an instruction that produces both the quotient and
3572                remainder, using truncation.  We can easily compensate the
3573                quotient or remainder to get ceiling rounding, once we have the
3574                remainder.  Notice that we compute also the final remainder
3575                value here, and return the result right away.  */
3576             if (target == 0 || GET_MODE (target) != compute_mode)
3577               target = gen_reg_rtx (compute_mode);
3578
3579             if (rem_flag)
3580               {
3581                 remainder = (GET_CODE (target) == REG
3582                              ? target : gen_reg_rtx (compute_mode));
3583                 quotient = gen_reg_rtx (compute_mode);
3584               }
3585             else
3586               {
3587                 quotient = (GET_CODE (target) == REG
3588                             ? target : gen_reg_rtx (compute_mode));
3589                 remainder = gen_reg_rtx (compute_mode);
3590               }
3591
3592             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
3593                                      remainder, 1))
3594               {
3595                 /* This could be computed with a branch-less sequence.
3596                    Save that for later.  */
3597                 rtx label = gen_label_rtx ();
3598                 do_cmp_and_jump (remainder, const0_rtx, EQ,
3599                                  compute_mode, label);
3600                 expand_inc (quotient, const1_rtx);
3601                 expand_dec (remainder, op1);
3602                 emit_label (label);
3603                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
3604               }
3605
3606             /* No luck with division elimination or divmod.  Have to do it
3607                by conditionally adjusting op0 *and* the result.  */
3608             {
3609               rtx label1, label2;
3610               rtx adjusted_op0, tem;
3611
3612               quotient = gen_reg_rtx (compute_mode);
3613               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3614               label1 = gen_label_rtx ();
3615               label2 = gen_label_rtx ();
3616               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
3617                                compute_mode, label1);
3618               emit_move_insn  (quotient, const0_rtx);
3619               emit_jump_insn (gen_jump (label2));
3620               emit_barrier ();
3621               emit_label (label1);
3622               expand_dec (adjusted_op0, const1_rtx);
3623               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
3624                                   quotient, 1, OPTAB_LIB_WIDEN);
3625               if (tem != quotient)
3626                 emit_move_insn (quotient, tem);
3627               expand_inc (quotient, const1_rtx);
3628               emit_label (label2);
3629             }
3630           }
3631         else /* signed */
3632           {
3633             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3634                 && INTVAL (op1) >= 0)
3635               {
3636                 /* This is extremely similar to the code for the unsigned case
3637                    above.  For 2.7 we should merge these variants, but for
3638                    2.6.1 I don't want to touch the code for unsigned since that
3639                    get used in C.  The signed case will only be used by other
3640                    languages (Ada).  */
3641
3642                 rtx t1, t2, t3;
3643                 unsigned HOST_WIDE_INT d = INTVAL (op1);
3644                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3645                                    build_int_2 (floor_log2 (d), 0),
3646                                    tquotient, 0);
3647                 t2 = expand_binop (compute_mode, and_optab, op0,
3648                                    GEN_INT (d - 1),
3649                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3650                 t3 = gen_reg_rtx (compute_mode);
3651                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
3652                                       compute_mode, 1, 1);
3653                 if (t3 == 0)
3654                   {
3655                     rtx lab;
3656                     lab = gen_label_rtx ();
3657                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
3658                     expand_inc (t1, const1_rtx);
3659                     emit_label (lab);
3660                     quotient = t1;
3661                   }
3662                 else
3663                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
3664                                                           t1, t3),
3665                                             tquotient);
3666                 break;
3667               }
3668
3669             /* Try using an instruction that produces both the quotient and
3670                remainder, using truncation.  We can easily compensate the
3671                quotient or remainder to get ceiling rounding, once we have the
3672                remainder.  Notice that we compute also the final remainder
3673                value here, and return the result right away.  */
3674             if (target == 0 || GET_MODE (target) != compute_mode)
3675               target = gen_reg_rtx (compute_mode);
3676             if (rem_flag)
3677               {
3678                 remainder= (GET_CODE (target) == REG
3679                             ? target : gen_reg_rtx (compute_mode));
3680                 quotient = gen_reg_rtx (compute_mode);
3681               }
3682             else
3683               {
3684                 quotient = (GET_CODE (target) == REG
3685                             ? target : gen_reg_rtx (compute_mode));
3686                 remainder = gen_reg_rtx (compute_mode);
3687               }
3688
3689             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
3690                                      remainder, 0))
3691               {
3692                 /* This could be computed with a branch-less sequence.
3693                    Save that for later.  */
3694                 rtx tem;
3695                 rtx label = gen_label_rtx ();
3696                 do_cmp_and_jump (remainder, const0_rtx, EQ,
3697                                  compute_mode, label);
3698                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
3699                                     NULL_RTX, 0, OPTAB_WIDEN);
3700                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
3701                 expand_inc (quotient, const1_rtx);
3702                 expand_dec (remainder, op1);
3703                 emit_label (label);
3704                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
3705               }
3706
3707             /* No luck with division elimination or divmod.  Have to do it
3708                by conditionally adjusting op0 *and* the result.  */
3709             {
3710               rtx label1, label2, label3, label4, label5;
3711               rtx adjusted_op0;
3712               rtx tem;
3713
3714               quotient = gen_reg_rtx (compute_mode);
3715               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3716               label1 = gen_label_rtx ();
3717               label2 = gen_label_rtx ();
3718               label3 = gen_label_rtx ();
3719               label4 = gen_label_rtx ();
3720               label5 = gen_label_rtx ();
3721               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
3722               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
3723                                compute_mode, label1);
3724               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3725                                   quotient, 0, OPTAB_LIB_WIDEN);
3726               if (tem != quotient)
3727                 emit_move_insn (quotient, tem);
3728               emit_jump_insn (gen_jump (label5));
3729               emit_barrier ();
3730               emit_label (label1);
3731               expand_dec (adjusted_op0, const1_rtx);
3732               emit_jump_insn (gen_jump (label4));
3733               emit_barrier ();
3734               emit_label (label2);
3735               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
3736                                compute_mode, label3);
3737               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3738                                   quotient, 0, OPTAB_LIB_WIDEN);
3739               if (tem != quotient)
3740                 emit_move_insn (quotient, tem);
3741               emit_jump_insn (gen_jump (label5));
3742               emit_barrier ();
3743               emit_label (label3);
3744               expand_inc (adjusted_op0, const1_rtx);
3745               emit_label (label4);
3746               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3747                                   quotient, 0, OPTAB_LIB_WIDEN);
3748               if (tem != quotient)
3749                 emit_move_insn (quotient, tem);
3750               expand_inc (quotient, const1_rtx);
3751               emit_label (label5);
3752             }
3753           }
3754         break;
3755
3756       case EXACT_DIV_EXPR:
3757         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
3758           {
3759             HOST_WIDE_INT d = INTVAL (op1);
3760             unsigned HOST_WIDE_INT ml;
3761             int post_shift;
3762             rtx t1;
3763
3764             post_shift = floor_log2 (d & -d);
3765             ml = invert_mod2n (d >> post_shift, size);
3766             t1 = expand_mult (compute_mode, op0, GEN_INT (ml), NULL_RTX,
3767                               unsignedp);
3768             quotient = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3769                                      build_int_2 (post_shift, 0),
3770                                      NULL_RTX, unsignedp);
3771
3772             insn = get_last_insn ();
3773             set_unique_reg_note (insn,
3774                                  REG_EQUAL,
3775                                  gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
3776                                                  compute_mode,
3777                                                  op0, op1));
3778           }
3779         break;
3780
3781       case ROUND_DIV_EXPR:
3782       case ROUND_MOD_EXPR:
3783         if (unsignedp)
3784           {
3785             rtx tem;
3786             rtx label;
3787             label = gen_label_rtx ();
3788             quotient = gen_reg_rtx (compute_mode);
3789             remainder = gen_reg_rtx (compute_mode);
3790             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
3791               {
3792                 rtx tem;
3793                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
3794                                          quotient, 1, OPTAB_LIB_WIDEN);
3795                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
3796                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
3797                                           remainder, 1, OPTAB_LIB_WIDEN);
3798               }
3799             tem = plus_constant (op1, -1);
3800             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
3801                                 build_int_2 (1, 0), NULL_RTX, 1);
3802             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
3803             expand_inc (quotient, const1_rtx);
3804             expand_dec (remainder, op1);
3805             emit_label (label);
3806           }
3807         else
3808           {
3809             rtx abs_rem, abs_op1, tem, mask;
3810             rtx label;
3811             label = gen_label_rtx ();
3812             quotient = gen_reg_rtx (compute_mode);
3813             remainder = gen_reg_rtx (compute_mode);
3814             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
3815               {
3816                 rtx tem;
3817                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
3818                                          quotient, 0, OPTAB_LIB_WIDEN);
3819                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
3820                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
3821                                           remainder, 0, OPTAB_LIB_WIDEN);
3822               }
3823             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 0);
3824             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 0);
3825             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
3826                                 build_int_2 (1, 0), NULL_RTX, 1);
3827             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
3828             tem = expand_binop (compute_mode, xor_optab, op0, op1,
3829                                 NULL_RTX, 0, OPTAB_WIDEN);
3830             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
3831                                 build_int_2 (size - 1, 0), NULL_RTX, 0);
3832             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
3833                                 NULL_RTX, 0, OPTAB_WIDEN);
3834             tem = expand_binop (compute_mode, sub_optab, tem, mask,
3835                                 NULL_RTX, 0, OPTAB_WIDEN);
3836             expand_inc (quotient, tem);
3837             tem = expand_binop (compute_mode, xor_optab, mask, op1,
3838                                 NULL_RTX, 0, OPTAB_WIDEN);
3839             tem = expand_binop (compute_mode, sub_optab, tem, mask,
3840                                 NULL_RTX, 0, OPTAB_WIDEN);
3841             expand_dec (remainder, tem);
3842             emit_label (label);
3843           }
3844         return gen_lowpart (mode, rem_flag ? remainder : quotient);
3845
3846       default:
3847         abort ();
3848       }
3849
3850   if (quotient == 0)
3851     {
3852       if (target && GET_MODE (target) != compute_mode)
3853         target = 0;
3854
3855       if (rem_flag)
3856         {
3857           /* Try to produce the remainder without producing the quotient.
3858              If we seem to have a divmod patten that does not require widening,
3859              don't try windening here.  We should really have an WIDEN argument
3860              to expand_twoval_binop, since what we'd really like to do here is
3861              1) try a mod insn in compute_mode
3862              2) try a divmod insn in compute_mode
3863              3) try a div insn in compute_mode and multiply-subtract to get
3864                 remainder
3865              4) try the same things with widening allowed.  */
3866           remainder
3867             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
3868                                  op0, op1, target,
3869                                  unsignedp,
3870                                  ((optab2->handlers[(int) compute_mode].insn_code
3871                                    != CODE_FOR_nothing)
3872                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
3873           if (remainder == 0)
3874             {
3875               /* No luck there.  Can we do remainder and divide at once
3876                  without a library call?  */
3877               remainder = gen_reg_rtx (compute_mode);
3878               if (! expand_twoval_binop ((unsignedp
3879                                           ? udivmod_optab
3880                                           : sdivmod_optab),
3881                                          op0, op1,
3882                                          NULL_RTX, remainder, unsignedp))
3883                 remainder = 0;
3884             }
3885
3886           if (remainder)
3887             return gen_lowpart (mode, remainder);
3888         }
3889
3890       /* Produce the quotient.  Try a quotient insn, but not a library call.
3891          If we have a divmod in this mode, use it in preference to widening
3892          the div (for this test we assume it will not fail). Note that optab2
3893          is set to the one of the two optabs that the call below will use.  */
3894       quotient
3895         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
3896                              op0, op1, rem_flag ? NULL_RTX : target,
3897                              unsignedp,
3898                              ((optab2->handlers[(int) compute_mode].insn_code
3899                                != CODE_FOR_nothing)
3900                               ? OPTAB_DIRECT : OPTAB_WIDEN));
3901
3902       if (quotient == 0)
3903         {
3904           /* No luck there.  Try a quotient-and-remainder insn,
3905              keeping the quotient alone.  */
3906           quotient = gen_reg_rtx (compute_mode);
3907           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
3908                                      op0, op1,
3909                                      quotient, NULL_RTX, unsignedp))
3910             {
3911               quotient = 0;
3912               if (! rem_flag)
3913                 /* Still no luck.  If we are not computing the remainder,
3914                    use a library call for the quotient.  */
3915                 quotient = sign_expand_binop (compute_mode,
3916                                               udiv_optab, sdiv_optab,
3917                                               op0, op1, target,
3918                                               unsignedp, OPTAB_LIB_WIDEN);
3919             }
3920         }
3921     }
3922
3923   if (rem_flag)
3924     {
3925       if (target && GET_MODE (target) != compute_mode)
3926         target = 0;
3927
3928       if (quotient == 0)
3929         /* No divide instruction either.  Use library for remainder.  */
3930         remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
3931                                        op0, op1, target,
3932                                        unsignedp, OPTAB_LIB_WIDEN);
3933       else
3934         {
3935           /* We divided.  Now finish doing X - Y * (X / Y).  */
3936           remainder = expand_mult (compute_mode, quotient, op1,
3937                                    NULL_RTX, unsignedp);
3938           remainder = expand_binop (compute_mode, sub_optab, op0,
3939                                     remainder, target, unsignedp,
3940                                     OPTAB_LIB_WIDEN);
3941         }
3942     }
3943
3944   return gen_lowpart (mode, rem_flag ? remainder : quotient);
3945 }
3946 \f
3947 /* Return a tree node with data type TYPE, describing the value of X.
3948    Usually this is an RTL_EXPR, if there is no obvious better choice.
3949    X may be an expression, however we only support those expressions
3950    generated by loop.c.   */
3951
3952 tree
3953 make_tree (type, x)
3954      tree type;
3955      rtx x;
3956 {
3957   tree t;
3958
3959   switch (GET_CODE (x))
3960     {
3961     case CONST_INT:
3962       t = build_int_2 (INTVAL (x),
3963                        (TREE_UNSIGNED (type)
3964                         && (GET_MODE_BITSIZE (TYPE_MODE (type)) < HOST_BITS_PER_WIDE_INT))
3965                        || INTVAL (x) >= 0 ? 0 : -1);
3966       TREE_TYPE (t) = type;
3967       return t;
3968
3969     case CONST_DOUBLE:
3970       if (GET_MODE (x) == VOIDmode)
3971         {
3972           t = build_int_2 (CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
3973           TREE_TYPE (t) = type;
3974         }
3975       else
3976         {
3977           REAL_VALUE_TYPE d;
3978
3979           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
3980           t = build_real (type, d);
3981         }
3982
3983       return t;
3984
3985     case PLUS:
3986       return fold (build (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
3987                           make_tree (type, XEXP (x, 1))));
3988
3989     case MINUS:
3990       return fold (build (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
3991                           make_tree (type, XEXP (x, 1))));
3992
3993     case NEG:
3994       return fold (build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0))));
3995
3996     case MULT:
3997       return fold (build (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
3998                           make_tree (type, XEXP (x, 1))));
3999
4000     case ASHIFT:
4001       return fold (build (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4002                           make_tree (type, XEXP (x, 1))));
4003
4004     case LSHIFTRT:
4005       return fold (convert (type,
4006                             build (RSHIFT_EXPR, unsigned_type (type),
4007                                    make_tree (unsigned_type (type),
4008                                               XEXP (x, 0)),
4009                                    make_tree (type, XEXP (x, 1)))));
4010
4011     case ASHIFTRT:
4012       return fold (convert (type,
4013                             build (RSHIFT_EXPR, signed_type (type),
4014                                    make_tree (signed_type (type), XEXP (x, 0)),
4015                                    make_tree (type, XEXP (x, 1)))));
4016
4017     case DIV:
4018       if (TREE_CODE (type) != REAL_TYPE)
4019         t = signed_type (type);
4020       else
4021         t = type;
4022
4023       return fold (convert (type,
4024                             build (TRUNC_DIV_EXPR, t,
4025                                    make_tree (t, XEXP (x, 0)),
4026                                    make_tree (t, XEXP (x, 1)))));
4027     case UDIV:
4028       t = unsigned_type (type);
4029       return fold (convert (type,
4030                             build (TRUNC_DIV_EXPR, t,
4031                                    make_tree (t, XEXP (x, 0)),
4032                                    make_tree (t, XEXP (x, 1)))));
4033    default:
4034       t = make_node (RTL_EXPR);
4035       TREE_TYPE (t) = type;
4036       RTL_EXPR_RTL (t) = x;
4037       /* There are no insns to be output
4038          when this rtl_expr is used.  */
4039       RTL_EXPR_SEQUENCE (t) = 0;
4040       return t;
4041     }
4042 }
4043
4044 /* Return an rtx representing the value of X * MULT + ADD.
4045    TARGET is a suggestion for where to store the result (an rtx).
4046    MODE is the machine mode for the computation.
4047    X and MULT must have mode MODE.  ADD may have a different mode.
4048    So can X (defaults to same as MODE).
4049    UNSIGNEDP is non-zero to do unsigned multiplication.
4050    This may emit insns.  */
4051
4052 rtx
4053 expand_mult_add (x, target, mult, add, mode, unsignedp)
4054      rtx x, target, mult, add;
4055      enum machine_mode mode;
4056      int unsignedp;
4057 {
4058   tree type = type_for_mode (mode, unsignedp);
4059   tree add_type = (GET_MODE (add) == VOIDmode
4060                    ? type : type_for_mode (GET_MODE (add), unsignedp));
4061   tree result =  fold (build (PLUS_EXPR, type,
4062                               fold (build (MULT_EXPR, type,
4063                                            make_tree (type, x),
4064                                            make_tree (type, mult))),
4065                               make_tree (add_type, add)));
4066
4067   return expand_expr (result, target, VOIDmode, 0);
4068 }
4069 \f
4070 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
4071    and returning TARGET.
4072
4073    If TARGET is 0, a pseudo-register or constant is returned.  */
4074
4075 rtx
4076 expand_and (op0, op1, target)
4077      rtx op0, op1, target;
4078 {
4079   enum machine_mode mode = VOIDmode;
4080   rtx tem;
4081
4082   if (GET_MODE (op0) != VOIDmode)
4083     mode = GET_MODE (op0);
4084   else if (GET_MODE (op1) != VOIDmode)
4085     mode = GET_MODE (op1);
4086
4087   if (mode != VOIDmode)
4088     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
4089   else if (GET_CODE (op0) == CONST_INT && GET_CODE (op1) == CONST_INT)
4090     tem = GEN_INT (INTVAL (op0) & INTVAL (op1));
4091   else
4092     abort ();
4093
4094   if (target == 0)
4095     target = tem;
4096   else if (tem != target)
4097     emit_move_insn (target, tem);
4098   return target;
4099 }
4100 \f
4101 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
4102    and storing in TARGET.  Normally return TARGET.
4103    Return 0 if that cannot be done.
4104
4105    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
4106    it is VOIDmode, they cannot both be CONST_INT.
4107
4108    UNSIGNEDP is for the case where we have to widen the operands
4109    to perform the operation.  It says to use zero-extension.
4110
4111    NORMALIZEP is 1 if we should convert the result to be either zero
4112    or one.  Normalize is -1 if we should convert the result to be
4113    either zero or -1.  If NORMALIZEP is zero, the result will be left
4114    "raw" out of the scc insn.  */
4115
4116 rtx
4117 emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep)
4118      rtx target;
4119      enum rtx_code code;
4120      rtx op0, op1;
4121      enum machine_mode mode;
4122      int unsignedp;
4123      int normalizep;
4124 {
4125   rtx subtarget;
4126   enum insn_code icode;
4127   enum machine_mode compare_mode;
4128   enum machine_mode target_mode = GET_MODE (target);
4129   rtx tem;
4130   rtx last = get_last_insn ();
4131   rtx pattern, comparison;
4132
4133   if (unsignedp)
4134     code = unsigned_condition (code);
4135
4136   /* If one operand is constant, make it the second one.  Only do this
4137      if the other operand is not constant as well.  */
4138
4139   if ((CONSTANT_P (op0) && ! CONSTANT_P (op1))
4140       || (GET_CODE (op0) == CONST_INT && GET_CODE (op1) != CONST_INT))
4141     {
4142       tem = op0;
4143       op0 = op1;
4144       op1 = tem;
4145       code = swap_condition (code);
4146     }
4147
4148   if (mode == VOIDmode)
4149     mode = GET_MODE (op0);
4150
4151   /* For some comparisons with 1 and -1, we can convert this to
4152      comparisons with zero.  This will often produce more opportunities for
4153      store-flag insns.  */
4154
4155   switch (code)
4156     {
4157     case LT:
4158       if (op1 == const1_rtx)
4159         op1 = const0_rtx, code = LE;
4160       break;
4161     case LE:
4162       if (op1 == constm1_rtx)
4163         op1 = const0_rtx, code = LT;
4164       break;
4165     case GE:
4166       if (op1 == const1_rtx)
4167         op1 = const0_rtx, code = GT;
4168       break;
4169     case GT:
4170       if (op1 == constm1_rtx)
4171         op1 = const0_rtx, code = GE;
4172       break;
4173     case GEU:
4174       if (op1 == const1_rtx)
4175         op1 = const0_rtx, code = NE;
4176       break;
4177     case LTU:
4178       if (op1 == const1_rtx)
4179         op1 = const0_rtx, code = EQ;
4180       break;
4181     default:
4182       break;
4183     }
4184
4185   /* If we are comparing a double-word integer with zero, we can convert
4186      the comparison into one involving a single word.  */
4187   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
4188       && GET_MODE_CLASS (mode) == MODE_INT
4189       && op1 == const0_rtx)
4190     {
4191       if (code == EQ || code == NE)
4192         {
4193           /* Do a logical OR of the two words and compare the result.  */
4194           rtx op0h = gen_highpart (word_mode, op0);
4195           rtx op0l = gen_lowpart (word_mode, op0);
4196           rtx op0both = expand_binop (word_mode, ior_optab, op0h, op0l,
4197                                       NULL_RTX, unsignedp, OPTAB_DIRECT);
4198           if (op0both != 0)
4199             return emit_store_flag (target, code, op0both, op1, word_mode,
4200                                     unsignedp, normalizep);
4201         }
4202       else if (code == LT || code == GE)
4203         /* If testing the sign bit, can just test on high word.  */
4204         return emit_store_flag (target, code, gen_highpart (word_mode, op0),
4205                                 op1, word_mode, unsignedp, normalizep);
4206     }
4207
4208   /* From now on, we won't change CODE, so set ICODE now.  */
4209   icode = setcc_gen_code[(int) code];
4210
4211   /* If this is A < 0 or A >= 0, we can do this by taking the ones
4212      complement of A (for GE) and shifting the sign bit to the low bit.  */
4213   if (op1 == const0_rtx && (code == LT || code == GE)
4214       && GET_MODE_CLASS (mode) == MODE_INT
4215       && (normalizep || STORE_FLAG_VALUE == 1
4216           || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4217               && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
4218                   == (HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
4219     {
4220       subtarget = target;
4221
4222       /* If the result is to be wider than OP0, it is best to convert it
4223          first.  If it is to be narrower, it is *incorrect* to convert it
4224          first.  */
4225       if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
4226         {
4227           op0 = protect_from_queue (op0, 0);
4228           op0 = convert_modes (target_mode, mode, op0, 0);
4229           mode = target_mode;
4230         }
4231
4232       if (target_mode != mode)
4233         subtarget = 0;
4234
4235       if (code == GE)
4236         op0 = expand_unop (mode, one_cmpl_optab, op0,
4237                            ((STORE_FLAG_VALUE == 1 || normalizep)
4238                             ? 0 : subtarget), 0);
4239
4240       if (STORE_FLAG_VALUE == 1 || normalizep)
4241         /* If we are supposed to produce a 0/1 value, we want to do
4242            a logical shift from the sign bit to the low-order bit; for
4243            a -1/0 value, we do an arithmetic shift.  */
4244         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
4245                             size_int (GET_MODE_BITSIZE (mode) - 1),
4246                             subtarget, normalizep != -1);
4247
4248       if (mode != target_mode)
4249         op0 = convert_modes (target_mode, mode, op0, 0);
4250
4251       return op0;
4252     }
4253
4254   if (icode != CODE_FOR_nothing)
4255     {
4256       insn_operand_predicate_fn pred;
4257
4258       /* We think we may be able to do this with a scc insn.  Emit the
4259          comparison and then the scc insn.
4260
4261          compare_from_rtx may call emit_queue, which would be deleted below
4262          if the scc insn fails.  So call it ourselves before setting LAST.
4263          Likewise for do_pending_stack_adjust.  */
4264
4265       emit_queue ();
4266       do_pending_stack_adjust ();
4267       last = get_last_insn ();
4268
4269       comparison
4270         = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX, 0);
4271       if (GET_CODE (comparison) == CONST_INT)
4272         return (comparison == const0_rtx ? const0_rtx
4273                 : normalizep == 1 ? const1_rtx
4274                 : normalizep == -1 ? constm1_rtx
4275                 : const_true_rtx);
4276
4277       /* If the code of COMPARISON doesn't match CODE, something is
4278          wrong; we can no longer be sure that we have the operation.
4279          We could handle this case, but it should not happen.  */
4280
4281       if (GET_CODE (comparison) != code)
4282         abort ();
4283
4284       /* Get a reference to the target in the proper mode for this insn.  */
4285       compare_mode = insn_data[(int) icode].operand[0].mode;
4286       subtarget = target;
4287       pred = insn_data[(int) icode].operand[0].predicate;
4288       if (preserve_subexpressions_p ()
4289           || ! (*pred) (subtarget, compare_mode))
4290         subtarget = gen_reg_rtx (compare_mode);
4291
4292       pattern = GEN_FCN (icode) (subtarget);
4293       if (pattern)
4294         {
4295           emit_insn (pattern);
4296
4297           /* If we are converting to a wider mode, first convert to
4298              TARGET_MODE, then normalize.  This produces better combining
4299              opportunities on machines that have a SIGN_EXTRACT when we are
4300              testing a single bit.  This mostly benefits the 68k.
4301
4302              If STORE_FLAG_VALUE does not have the sign bit set when
4303              interpreted in COMPARE_MODE, we can do this conversion as
4304              unsigned, which is usually more efficient.  */
4305           if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (compare_mode))
4306             {
4307               convert_move (target, subtarget,
4308                             (GET_MODE_BITSIZE (compare_mode)
4309                              <= HOST_BITS_PER_WIDE_INT)
4310                             && 0 == (STORE_FLAG_VALUE
4311                                      & ((HOST_WIDE_INT) 1
4312                                         << (GET_MODE_BITSIZE (compare_mode) -1))));
4313               op0 = target;
4314               compare_mode = target_mode;
4315             }
4316           else
4317             op0 = subtarget;
4318
4319           /* If we want to keep subexpressions around, don't reuse our
4320              last target.  */
4321
4322           if (preserve_subexpressions_p ())
4323             subtarget = 0;
4324
4325           /* Now normalize to the proper value in COMPARE_MODE.  Sometimes
4326              we don't have to do anything.  */
4327           if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
4328             ;
4329           /* STORE_FLAG_VALUE might be the most negative number, so write
4330              the comparison this way to avoid a compiler-time warning.  */
4331           else if (- normalizep == STORE_FLAG_VALUE)
4332             op0 = expand_unop (compare_mode, neg_optab, op0, subtarget, 0);
4333
4334           /* We don't want to use STORE_FLAG_VALUE < 0 below since this
4335              makes it hard to use a value of just the sign bit due to
4336              ANSI integer constant typing rules.  */
4337           else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_WIDE_INT
4338                    && (STORE_FLAG_VALUE
4339                        & ((HOST_WIDE_INT) 1
4340                           << (GET_MODE_BITSIZE (compare_mode) - 1))))
4341             op0 = expand_shift (RSHIFT_EXPR, compare_mode, op0,
4342                                 size_int (GET_MODE_BITSIZE (compare_mode) - 1),
4343                                 subtarget, normalizep == 1);
4344           else if (STORE_FLAG_VALUE & 1)
4345             {
4346               op0 = expand_and (op0, const1_rtx, subtarget);
4347               if (normalizep == -1)
4348                 op0 = expand_unop (compare_mode, neg_optab, op0, op0, 0);
4349             }
4350           else
4351             abort ();
4352
4353           /* If we were converting to a smaller mode, do the
4354              conversion now.  */
4355           if (target_mode != compare_mode)
4356             {
4357               convert_move (target, op0, 0);
4358               return target;
4359             }
4360           else
4361             return op0;
4362         }
4363     }
4364
4365   delete_insns_since (last);
4366
4367   /* If expensive optimizations, use different pseudo registers for each
4368      insn, instead of reusing the same pseudo.  This leads to better CSE,
4369      but slows down the compiler, since there are more pseudos */
4370   subtarget = (!flag_expensive_optimizations
4371                && (target_mode == mode)) ? target : NULL_RTX;
4372
4373   /* If we reached here, we can't do this with a scc insn.  However, there
4374      are some comparisons that can be done directly.  For example, if
4375      this is an equality comparison of integers, we can try to exclusive-or
4376      (or subtract) the two operands and use a recursive call to try the
4377      comparison with zero.  Don't do any of these cases if branches are
4378      very cheap.  */
4379
4380   if (BRANCH_COST > 0
4381       && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
4382       && op1 != const0_rtx)
4383     {
4384       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
4385                           OPTAB_WIDEN);
4386
4387       if (tem == 0)
4388         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
4389                             OPTAB_WIDEN);
4390       if (tem != 0)
4391         tem = emit_store_flag (target, code, tem, const0_rtx,
4392                                mode, unsignedp, normalizep);
4393       if (tem == 0)
4394         delete_insns_since (last);
4395       return tem;
4396     }
4397
4398   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
4399      the constant zero.  Reject all other comparisons at this point.  Only
4400      do LE and GT if branches are expensive since they are expensive on
4401      2-operand machines.  */
4402
4403   if (BRANCH_COST == 0
4404       || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
4405       || (code != EQ && code != NE
4406           && (BRANCH_COST <= 1 || (code != LE && code != GT))))
4407     return 0;
4408
4409   /* See what we need to return.  We can only return a 1, -1, or the
4410      sign bit.  */
4411
4412   if (normalizep == 0)
4413     {
4414       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
4415         normalizep = STORE_FLAG_VALUE;
4416
4417       else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4418                && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
4419                    == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
4420         ;
4421       else
4422         return 0;
4423     }
4424
4425   /* Try to put the result of the comparison in the sign bit.  Assume we can't
4426      do the necessary operation below.  */
4427
4428   tem = 0;
4429
4430   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
4431      the sign bit set.  */
4432
4433   if (code == LE)
4434     {
4435       /* This is destructive, so SUBTARGET can't be OP0.  */
4436       if (rtx_equal_p (subtarget, op0))
4437         subtarget = 0;
4438
4439       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
4440                           OPTAB_WIDEN);
4441       if (tem)
4442         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
4443                             OPTAB_WIDEN);
4444     }
4445
4446   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
4447      number of bits in the mode of OP0, minus one.  */
4448
4449   if (code == GT)
4450     {
4451       if (rtx_equal_p (subtarget, op0))
4452         subtarget = 0;
4453
4454       tem = expand_shift (RSHIFT_EXPR, mode, op0,
4455                           size_int (GET_MODE_BITSIZE (mode) - 1),
4456                           subtarget, 0);
4457       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
4458                           OPTAB_WIDEN);
4459     }
4460
4461   if (code == EQ || code == NE)
4462     {
4463       /* For EQ or NE, one way to do the comparison is to apply an operation
4464          that converts the operand into a positive number if it is non-zero
4465          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
4466          for NE we negate.  This puts the result in the sign bit.  Then we
4467          normalize with a shift, if needed.
4468
4469          Two operations that can do the above actions are ABS and FFS, so try
4470          them.  If that doesn't work, and MODE is smaller than a full word,
4471          we can use zero-extension to the wider mode (an unsigned conversion)
4472          as the operation.  */
4473
4474       if (abs_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
4475         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
4476       else if (ffs_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
4477         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
4478       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
4479         {
4480           op0 = protect_from_queue (op0, 0);
4481           tem = convert_modes (word_mode, mode, op0, 1);
4482           mode = word_mode;
4483         }
4484
4485       if (tem != 0)
4486         {
4487           if (code == EQ)
4488             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
4489                                 0, OPTAB_WIDEN);
4490           else
4491             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
4492         }
4493
4494       /* If we couldn't do it that way, for NE we can "or" the two's complement
4495          of the value with itself.  For EQ, we take the one's complement of
4496          that "or", which is an extra insn, so we only handle EQ if branches
4497          are expensive.  */
4498
4499       if (tem == 0 && (code == NE || BRANCH_COST > 1))
4500         {
4501           if (rtx_equal_p (subtarget, op0))
4502             subtarget = 0;
4503
4504           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
4505           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
4506                               OPTAB_WIDEN);
4507
4508           if (tem && code == EQ)
4509             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
4510         }
4511     }
4512
4513   if (tem && normalizep)
4514     tem = expand_shift (RSHIFT_EXPR, mode, tem,
4515                         size_int (GET_MODE_BITSIZE (mode) - 1),
4516                         subtarget, normalizep == 1);
4517
4518   if (tem)
4519     {
4520       if (GET_MODE (tem) != target_mode)
4521         {
4522           convert_move (target, tem, 0);
4523           tem = target;
4524         }
4525       else if (!subtarget)
4526         {
4527           emit_move_insn (target, tem);
4528           tem = target;
4529         }
4530     }
4531   else
4532     delete_insns_since (last);
4533
4534   return tem;
4535 }
4536
4537 /* Like emit_store_flag, but always succeeds.  */
4538
4539 rtx
4540 emit_store_flag_force (target, code, op0, op1, mode, unsignedp, normalizep)
4541      rtx target;
4542      enum rtx_code code;
4543      rtx op0, op1;
4544      enum machine_mode mode;
4545      int unsignedp;
4546      int normalizep;
4547 {
4548   rtx tem, label;
4549
4550   /* First see if emit_store_flag can do the job.  */
4551   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
4552   if (tem != 0)
4553     return tem;
4554
4555   if (normalizep == 0)
4556     normalizep = 1;
4557
4558   /* If this failed, we have to do this with set/compare/jump/set code.  */
4559
4560   if (GET_CODE (target) != REG
4561       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
4562     target = gen_reg_rtx (GET_MODE (target));
4563
4564   emit_move_insn (target, const1_rtx);
4565   label = gen_label_rtx ();
4566   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, 0,
4567                            NULL_RTX, label);
4568
4569   emit_move_insn (target, const0_rtx);
4570   emit_label (label);
4571
4572   return target;
4573 }
4574 \f
4575 /* Perform possibly multi-word comparison and conditional jump to LABEL
4576    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE
4577
4578    The algorithm is based on the code in expr.c:do_jump.
4579
4580    Note that this does not perform a general comparison.  Only variants
4581    generated within expmed.c are correctly handled, others abort (but could
4582    be handled if needed).  */
4583
4584 static void
4585 do_cmp_and_jump (arg1, arg2, op, mode, label)
4586      rtx arg1, arg2, label;
4587      enum rtx_code op;
4588      enum machine_mode mode;
4589 {
4590   /* If this mode is an integer too wide to compare properly,
4591      compare word by word.  Rely on cse to optimize constant cases.  */
4592
4593   if (GET_MODE_CLASS (mode) == MODE_INT
4594       && ! can_compare_p (op, mode, ccp_jump))
4595     {
4596       rtx label2 = gen_label_rtx ();
4597
4598       switch (op)
4599         {
4600         case LTU:
4601           do_jump_by_parts_greater_rtx (mode, 1, arg2, arg1, label2, label);
4602           break;
4603
4604         case LEU:
4605           do_jump_by_parts_greater_rtx (mode, 1, arg1, arg2, label, label2);
4606           break;
4607
4608         case LT:
4609           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label2, label);
4610           break;
4611
4612         case GT:
4613           do_jump_by_parts_greater_rtx (mode, 0, arg1, arg2, label2, label);
4614           break;
4615
4616         case GE:
4617           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label, label2);
4618           break;
4619
4620           /* do_jump_by_parts_equality_rtx compares with zero.  Luckily
4621              that's the only equality operations we do */
4622         case EQ:
4623           if (arg2 != const0_rtx || mode != GET_MODE(arg1))
4624             abort();
4625           do_jump_by_parts_equality_rtx (arg1, label2, label);
4626           break;
4627
4628         case NE:
4629           if (arg2 != const0_rtx || mode != GET_MODE(arg1))
4630             abort();
4631           do_jump_by_parts_equality_rtx (arg1, label, label2);
4632           break;
4633
4634         default:
4635           abort();
4636         }
4637
4638       emit_label (label2);
4639     }
4640   else
4641     {
4642       emit_cmp_and_jump_insns (arg1, arg2, op, NULL_RTX, mode, 0, 0, label);
4643     }
4644 }