gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002 Free Software Foundation, Inc.
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 2, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING.  If not, write to the Free
  20 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  21 02111-1307, USA.  */
  22
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "toplev.h"
  27 #include "rtl.h"
  28 #include "tree.h"
  29 #include "tm_p.h"
  30 #include "flags.h"
  31 #include "insn-config.h"
  32 #include "expr.h"
  33 #include "optabs.h"
  34 #include "real.h"
  35 #include "recog.h"
  36
  37 static void store_fixed_bit_field       PARAMS ((rtx, unsigned HOST_WIDE_INT,
  38                                                  unsigned HOST_WIDE_INT,
  39                                                  unsigned HOST_WIDE_INT, rtx));
  40 static void store_split_bit_field       PARAMS ((rtx, unsigned HOST_WIDE_INT,
  41                                                  unsigned HOST_WIDE_INT, rtx));
  42 static rtx extract_fixed_bit_field      PARAMS ((enum machine_mode, rtx,
  43                                                  unsigned HOST_WIDE_INT,
  44                                                  unsigned HOST_WIDE_INT,
  45                                                  unsigned HOST_WIDE_INT,
  46                                                  rtx, int));
  47 static rtx mask_rtx                     PARAMS ((enum machine_mode, int,
  48                                                  int, int));
  49 static rtx lshift_value                 PARAMS ((enum machine_mode, rtx,
  50                                                  int, int));
  51 static rtx extract_split_bit_field      PARAMS ((rtx, unsigned HOST_WIDE_INT,
  52                                                  unsigned HOST_WIDE_INT, int));
  53 static void do_cmp_and_jump             PARAMS ((rtx, rtx, enum rtx_code,
  54                                                  enum machine_mode, rtx));
  55
  56 /* Non-zero means divides or modulus operations are relatively cheap for
  57    powers of two, so don't use branches; emit the operation instead.
  58    Usually, this will mean that the MD file will emit non-branch
  59    sequences.  */
  60
  61 static int sdiv_pow2_cheap, smod_pow2_cheap;
  62
  63 #ifndef SLOW_UNALIGNED_ACCESS
  64 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  65 #endif
  66
  67 /* For compilers that support multiple targets with different word sizes,
  68    MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
  69    is the H8/300(H) compiler.  */
  70
  71 #ifndef MAX_BITS_PER_WORD
  72 #define MAX_BITS_PER_WORD BITS_PER_WORD
  73 #endif
  74
  75 /* Reduce conditional compilation elsewhere.  */
  76 #ifndef HAVE_insv
  77 #define HAVE_insv       0
  78 #define CODE_FOR_insv   CODE_FOR_nothing
  79 #define gen_insv(a,b,c,d) NULL_RTX
  80 #endif
  81 #ifndef HAVE_extv
  82 #define HAVE_extv       0
  83 #define CODE_FOR_extv   CODE_FOR_nothing
  84 #define gen_extv(a,b,c,d) NULL_RTX
  85 #endif
  86 #ifndef HAVE_extzv
  87 #define HAVE_extzv      0
  88 #define CODE_FOR_extzv  CODE_FOR_nothing
  89 #define gen_extzv(a,b,c,d) NULL_RTX
  90 #endif
  91
  92 /* Cost of various pieces of RTL.  Note that some of these are indexed by
  93    shift count and some by mode.  */
  94 static int add_cost, negate_cost, zero_cost;
  95 static int shift_cost[MAX_BITS_PER_WORD];
  96 static int shiftadd_cost[MAX_BITS_PER_WORD];
  97 static int shiftsub_cost[MAX_BITS_PER_WORD];
  98 static int mul_cost[NUM_MACHINE_MODES];
  99 static int div_cost[NUM_MACHINE_MODES];
 100 static int mul_widen_cost[NUM_MACHINE_MODES];
 101 static int mul_highpart_cost[NUM_MACHINE_MODES];
 102
 103 void
 104 init_expmed ()
 105 {
 106   /* This is "some random pseudo register" for purposes of calling recog
 107      to see what insns exist.  */
 108   rtx reg = gen_rtx_REG (word_mode, 10000);
 109   rtx shift_insn, shiftadd_insn, shiftsub_insn;
 110   int dummy;
 111   int m;
 112   enum machine_mode mode, wider_mode;
 113
 114   start_sequence ();
 115
 116   reg = gen_rtx_REG (word_mode, 10000);
 117
 118   zero_cost = rtx_cost (const0_rtx, 0);
 119   add_cost = rtx_cost (gen_rtx_PLUS (word_mode, reg, reg), SET);
 120
 121   shift_insn = emit_insn (gen_rtx_SET (VOIDmode, reg,
 122                                        gen_rtx_ASHIFT (word_mode, reg,
 123                                                        const0_rtx)));
 124
 125   shiftadd_insn
 126     = emit_insn (gen_rtx_SET (VOIDmode, reg,
 127                               gen_rtx_PLUS (word_mode,
 128                                             gen_rtx_MULT (word_mode,
 129                                                           reg, const0_rtx),
 130                                             reg)));
 131
 132   shiftsub_insn
 133     = emit_insn (gen_rtx_SET (VOIDmode, reg,
 134                               gen_rtx_MINUS (word_mode,
 135                                              gen_rtx_MULT (word_mode,
 136                                                            reg, const0_rtx),
 137                                              reg)));
 138
 139   init_recog ();
 140
 141   shift_cost[0] = 0;
 142   shiftadd_cost[0] = shiftsub_cost[0] = add_cost;
 143
 144   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 145     {
 146       shift_cost[m] = shiftadd_cost[m] = shiftsub_cost[m] = 32000;
 147
 148       XEXP (SET_SRC (PATTERN (shift_insn)), 1) = GEN_INT (m);
 149       if (recog (PATTERN (shift_insn), shift_insn, &dummy) >= 0)
 150         shift_cost[m] = rtx_cost (SET_SRC (PATTERN (shift_insn)), SET);
 151
 152       XEXP (XEXP (SET_SRC (PATTERN (shiftadd_insn)), 0), 1)
 153         = GEN_INT ((HOST_WIDE_INT) 1 << m);
 154       if (recog (PATTERN (shiftadd_insn), shiftadd_insn, &dummy) >= 0)
 155         shiftadd_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftadd_insn)), SET);
 156
 157       XEXP (XEXP (SET_SRC (PATTERN (shiftsub_insn)), 0), 1)
 158         = GEN_INT ((HOST_WIDE_INT) 1 << m);
 159       if (recog (PATTERN (shiftsub_insn), shiftsub_insn, &dummy) >= 0)
 160         shiftsub_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftsub_insn)), SET);
 161     }
 162
 163   negate_cost = rtx_cost (gen_rtx_NEG (word_mode, reg), SET);
 164
 165   sdiv_pow2_cheap
 166     = (rtx_cost (gen_rtx_DIV (word_mode, reg, GEN_INT (32)), SET)
 167        <= 2 * add_cost);
 168   smod_pow2_cheap
 169     = (rtx_cost (gen_rtx_MOD (word_mode, reg, GEN_INT (32)), SET)
 170        <= 2 * add_cost);
 171
 172   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 173        mode != VOIDmode;
 174        mode = GET_MODE_WIDER_MODE (mode))
 175     {
 176       reg = gen_rtx_REG (mode, 10000);
 177       div_cost[(int) mode] = rtx_cost (gen_rtx_UDIV (mode, reg, reg), SET);
 178       mul_cost[(int) mode] = rtx_cost (gen_rtx_MULT (mode, reg, reg), SET);
 179       wider_mode = GET_MODE_WIDER_MODE (mode);
 180       if (wider_mode != VOIDmode)
 181         {
 182           mul_widen_cost[(int) wider_mode]
 183             = rtx_cost (gen_rtx_MULT (wider_mode,
 184                                       gen_rtx_ZERO_EXTEND (wider_mode, reg),
 185                                       gen_rtx_ZERO_EXTEND (wider_mode, reg)),
 186                         SET);
 187           mul_highpart_cost[(int) mode]
 188             = rtx_cost (gen_rtx_TRUNCATE
 189                         (mode,
 190                          gen_rtx_LSHIFTRT (wider_mode,
 191                                            gen_rtx_MULT (wider_mode,
 192                                                          gen_rtx_ZERO_EXTEND
 193                                                          (wider_mode, reg),
 194                                                          gen_rtx_ZERO_EXTEND
 195                                                          (wider_mode, reg)),
 196                                            GEN_INT (GET_MODE_BITSIZE (mode)))),
 197                         SET);
 198         }
 199     }
 200
 201   end_sequence ();
 202 }
 203
 204 /* Return an rtx representing minus the value of X.
 205    MODE is the intended mode of the result,
 206    useful if X is a CONST_INT.  */
 207
 208 rtx
 209 negate_rtx (mode, x)
 210      enum machine_mode mode;
 211      rtx x;
 212 {
 213   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 214
 215   if (result == 0)
 216     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 217
 218   return result;
 219 }
 220
 221 /* Report on the availability of insv/extv/extzv and the desired mode
 222    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
 223    is false; else the mode of the specified operand.  If OPNO is -1,
 224    all the caller cares about is whether the insn is available.  */
 225 enum machine_mode
 226 mode_for_extraction (pattern, opno)
 227      enum extraction_pattern pattern;
 228      int opno;
 229 {
 230   const struct insn_data *data;
 231
 232   switch (pattern)
 233     {
 234     case EP_insv:
 235       if (HAVE_insv)
 236         {
 237           data = &insn_data[CODE_FOR_insv];
 238           break;
 239         }
 240       return MAX_MACHINE_MODE;
 241
 242     case EP_extv:
 243       if (HAVE_extv)
 244         {
 245           data = &insn_data[CODE_FOR_extv];
 246           break;
 247         }
 248       return MAX_MACHINE_MODE;
 249
 250     case EP_extzv:
 251       if (HAVE_extzv)
 252         {
 253           data = &insn_data[CODE_FOR_extzv];
 254           break;
 255         }
 256       return MAX_MACHINE_MODE;
 257
 258     default:
 259       abort ();
 260     }
 261
 262   if (opno == -1)
 263     return VOIDmode;
 264
 265   /* Everyone who uses this function used to follow it with
 266      if (result == VOIDmode) result = word_mode; */
 267   if (data->operand[opno].mode == VOIDmode)
 268     return word_mode;
 269   return data->operand[opno].mode;
 270 }
 271
 272 \f
 273 /* Generate code to store value from rtx VALUE
 274    into a bit-field within structure STR_RTX
 275    containing BITSIZE bits starting at bit BITNUM.
 276    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
 277    ALIGN is the alignment that STR_RTX is known to have.
 278    TOTAL_SIZE is the size of the structure in bytes, or -1 if varying.  */
 279
 280 /* ??? Note that there are two different ideas here for how
 281    to determine the size to count bits within, for a register.
 282    One is BITS_PER_WORD, and the other is the size of operand 3
 283    of the insv pattern.
 284
 285    If operand 3 of the insv pattern is VOIDmode, then we will use BITS_PER_WORD
 286    else, we use the mode of operand 3.  */
 287
 288 rtx
 289 store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, total_size)
 290      rtx str_rtx;
 291      unsigned HOST_WIDE_INT bitsize;
 292      unsigned HOST_WIDE_INT bitnum;
 293      enum machine_mode fieldmode;
 294      rtx value;
 295      HOST_WIDE_INT total_size;
 296 {
 297   unsigned int unit
 298     = (GET_CODE (str_rtx) == MEM) ? BITS_PER_UNIT : BITS_PER_WORD;
 299   unsigned HOST_WIDE_INT offset = bitnum / unit;
 300   unsigned HOST_WIDE_INT bitpos = bitnum % unit;
 301   rtx op0 = str_rtx;
 302   int byte_offset;
 303
 304   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
 305
 306   /* Discount the part of the structure before the desired byte.
 307      We need to know how many bytes are safe to reference after it.  */
 308   if (total_size >= 0)
 309     total_size -= (bitpos / BIGGEST_ALIGNMENT
 310                    * (BIGGEST_ALIGNMENT / BITS_PER_UNIT));
 311
 312   while (GET_CODE (op0) == SUBREG)
 313     {
 314       /* The following line once was done only if WORDS_BIG_ENDIAN,
 315          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 316          meaningful at a much higher level; when structures are copied
 317          between memory and regs, the higher-numbered regs
 318          always get higher addresses.  */
 319       offset += (SUBREG_BYTE (op0) / UNITS_PER_WORD);
 320       /* We used to adjust BITPOS here, but now we do the whole adjustment
 321          right after the loop.  */
 322       op0 = SUBREG_REG (op0);
 323     }
 324
 325   value = protect_from_queue (value, 0);
 326
 327   if (flag_force_mem)
 328     value = force_not_mem (value);
 329
 330   /* If the target is a register, overwriting the entire object, or storing
 331      a full-word or multi-word field can be done with just a SUBREG.
 332
 333      If the target is memory, storing any naturally aligned field can be
 334      done with a simple store.  For targets that support fast unaligned
 335      memory, any naturally sized, unit aligned field can be done directly.  */
 336
 337   byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 338                 + (offset * UNITS_PER_WORD);
 339
 340   if (bitpos == 0
 341       && bitsize == GET_MODE_BITSIZE (fieldmode)
 342       && (GET_CODE (op0) != MEM
 343           ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 344              || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
 345             && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
 346           : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 347              || (offset * BITS_PER_UNIT % bitsize == 0
 348                  && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
 349     {
 350       if (GET_MODE (op0) != fieldmode)
 351         {
 352           if (GET_CODE (op0) == SUBREG)
 353             {
 354               if (GET_MODE (SUBREG_REG (op0)) == fieldmode
 355                   || GET_MODE_CLASS (fieldmode) == MODE_INT
 356                   || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT)
 357                 op0 = SUBREG_REG (op0);
 358               else
 359                 /* Else we've got some float mode source being extracted into
 360                    a different float mode destination -- this combination of
 361                    subregs results in Severe Tire Damage.  */
 362                 abort ();
 363             }
 364           if (GET_CODE (op0) == REG)
 365             op0 = gen_rtx_SUBREG (fieldmode, op0, byte_offset);
 366           else
 367             op0 = adjust_address (op0, fieldmode, offset);
 368         }
 369       emit_move_insn (op0, value);
 370       return value;
 371     }
 372
 373   /* Make sure we are playing with integral modes.  Pun with subregs
 374      if we aren't.  This must come after the entire register case above,
 375      since that case is valid for any mode.  The following cases are only
 376      valid for integral modes.  */
 377   {
 378     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 379     if (imode != GET_MODE (op0))
 380       {
 381         if (GET_CODE (op0) == MEM)
 382           op0 = adjust_address (op0, imode, 0);
 383         else if (imode != BLKmode)
 384           op0 = gen_lowpart (imode, op0);
 385         else
 386           abort ();
 387       }
 388   }
 389
 390   /* If OP0 is a register, BITPOS must count within a word.
 391      But as we have it, it counts within whatever size OP0 now has.
 392      On a bigendian machine, these are not the same, so convert.  */
 393   if (BYTES_BIG_ENDIAN
 394       && GET_CODE (op0) != MEM
 395       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 396     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 397
 398   /* Storing an lsb-aligned field in a register
 399      can be done with a movestrict instruction.  */
 400
 401   if (GET_CODE (op0) != MEM
 402       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 403       && bitsize == GET_MODE_BITSIZE (fieldmode)
 404       && (movstrict_optab->handlers[(int) fieldmode].insn_code
 405           != CODE_FOR_nothing))
 406     {
 407       int icode = movstrict_optab->handlers[(int) fieldmode].insn_code;
 408
 409       /* Get appropriate low part of the value being stored.  */
 410       if (GET_CODE (value) == CONST_INT || GET_CODE (value) == REG)
 411         value = gen_lowpart (fieldmode, value);
 412       else if (!(GET_CODE (value) == SYMBOL_REF
 413                  || GET_CODE (value) == LABEL_REF
 414                  || GET_CODE (value) == CONST))
 415         value = convert_to_mode (fieldmode, value, 0);
 416
 417       if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
 418         value = copy_to_mode_reg (fieldmode, value);
 419
 420       if (GET_CODE (op0) == SUBREG)
 421         {
 422           if (GET_MODE (SUBREG_REG (op0)) == fieldmode
 423               || GET_MODE_CLASS (fieldmode) == MODE_INT
 424               || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT)
 425             op0 = SUBREG_REG (op0);
 426           else
 427             /* Else we've got some float mode source being extracted into
 428                a different float mode destination -- this combination of
 429                subregs results in Severe Tire Damage.  */
 430             abort ();
 431         }
 432
 433       emit_insn (GEN_FCN (icode)
 434                  (gen_rtx_SUBREG (fieldmode, op0,
 435                                   (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 436                                   + (offset * UNITS_PER_WORD)),
 437                                   value));
 438
 439       return value;
 440     }
 441
 442   /* Handle fields bigger than a word.  */
 443
 444   if (bitsize > BITS_PER_WORD)
 445     {
 446       /* Here we transfer the words of the field
 447          in the order least significant first.
 448          This is because the most significant word is the one which may
 449          be less than full.
 450          However, only do that if the value is not BLKmode.  */
 451
 452       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 453       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 454       unsigned int i;
 455
 456       /* This is the mode we must force value to, so that there will be enough
 457          subwords to extract.  Note that fieldmode will often (always?) be
 458          VOIDmode, because that is what store_field uses to indicate that this
 459          is a bit field, but passing VOIDmode to operand_subword_force will
 460          result in an abort.  */
 461       fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 462
 463       for (i = 0; i < nwords; i++)
 464         {
 465           /* If I is 0, use the low-order word in both field and target;
 466              if I is 1, use the next to lowest word; and so on.  */
 467           unsigned int wordnum = (backwards ? nwords - i - 1 : i);
 468           unsigned int bit_offset = (backwards
 469                                      ? MAX ((int) bitsize - ((int) i + 1)
 470                                             * BITS_PER_WORD,
 471                                             0)
 472                                      : (int) i * BITS_PER_WORD);
 473
 474           store_bit_field (op0, MIN (BITS_PER_WORD,
 475                                      bitsize - i * BITS_PER_WORD),
 476                            bitnum + bit_offset, word_mode,
 477                            operand_subword_force (value, wordnum,
 478                                                   (GET_MODE (value) == VOIDmode
 479                                                    ? fieldmode
 480                                                    : GET_MODE (value))),
 481                            total_size);
 482         }
 483       return value;
 484     }
 485
 486   /* From here on we can assume that the field to be stored in is
 487      a full-word (whatever type that is), since it is shorter than a word.  */
 488
 489   /* OFFSET is the number of words or bytes (UNIT says which)
 490      from STR_RTX to the first word or byte containing part of the field.  */
 491
 492   if (GET_CODE (op0) != MEM)
 493     {
 494       if (offset != 0
 495           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 496         {
 497           if (GET_CODE (op0) != REG)
 498             {
 499               /* Since this is a destination (lvalue), we can't copy it to a
 500                  pseudo.  We can trivially remove a SUBREG that does not
 501                  change the size of the operand.  Such a SUBREG may have been
 502                  added above.  Otherwise, abort.  */
 503               if (GET_CODE (op0) == SUBREG
 504                   && (GET_MODE_SIZE (GET_MODE (op0))
 505                       == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))))
 506                 op0 = SUBREG_REG (op0);
 507               else
 508                 abort ();
 509             }
 510           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 511                                 op0, (offset * UNITS_PER_WORD));
 512         }
 513       offset = 0;
 514     }
 515   else
 516     op0 = protect_from_queue (op0, 1);
 517
 518   /* If VALUE is a floating-point mode, access it as an integer of the
 519      corresponding size.  This can occur on a machine with 64 bit registers
 520      that uses SFmode for float.  This can also occur for unaligned float
 521      structure fields.  */
 522   if (GET_MODE_CLASS (GET_MODE (value)) == MODE_FLOAT)
 523     {
 524       if (GET_CODE (value) != REG)
 525         value = copy_to_reg (value);
 526       value = gen_rtx_SUBREG (word_mode, value, 0);
 527     }
 528
 529   /* Now OFFSET is nonzero only if OP0 is memory
 530      and is therefore always measured in bytes.  */
 531
 532   if (HAVE_insv
 533       && GET_MODE (value) != BLKmode
 534       && !(bitsize == 1 && GET_CODE (value) == CONST_INT)
 535       /* Ensure insv's size is wide enough for this field.  */
 536       && (GET_MODE_BITSIZE (op_mode) >= bitsize)
 537       && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 538             && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))))
 539     {
 540       int xbitpos = bitpos;
 541       rtx value1;
 542       rtx xop0 = op0;
 543       rtx last = get_last_insn ();
 544       rtx pat;
 545       enum machine_mode maxmode = mode_for_extraction (EP_insv, 3);
 546       int save_volatile_ok = volatile_ok;
 547
 548       volatile_ok = 1;
 549
 550       /* If this machine's insv can only insert into a register, copy OP0
 551          into a register and save it back later.  */
 552       /* This used to check flag_force_mem, but that was a serious
 553          de-optimization now that flag_force_mem is enabled by -O2.  */
 554       if (GET_CODE (op0) == MEM
 555           && ! ((*insn_data[(int) CODE_FOR_insv].operand[0].predicate)
 556                 (op0, VOIDmode)))
 557         {
 558           rtx tempreg;
 559           enum machine_mode bestmode;
 560
 561           /* Get the mode to use for inserting into this field.  If OP0 is
 562              BLKmode, get the smallest mode consistent with the alignment. If
 563              OP0 is a non-BLKmode object that is no wider than MAXMODE, use its
 564              mode. Otherwise, use the smallest mode containing the field.  */
 565
 566           if (GET_MODE (op0) == BLKmode
 567               || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (maxmode))
 568             bestmode
 569               = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0), maxmode,
 570                                MEM_VOLATILE_P (op0));
 571           else
 572             bestmode = GET_MODE (op0);
 573
 574           if (bestmode == VOIDmode
 575               || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
 576                   && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
 577             goto insv_loses;
 578
 579           /* Adjust address to point to the containing unit of that mode.
 580              Compute offset as multiple of this unit, counting in bytes.  */
 581           unit = GET_MODE_BITSIZE (bestmode);
 582           offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 583           bitpos = bitnum % unit;
 584           op0 = adjust_address (op0, bestmode,  offset);
 585
 586           /* Fetch that unit, store the bitfield in it, then store
 587              the unit.  */
 588           tempreg = copy_to_reg (op0);
 589           store_bit_field (tempreg, bitsize, bitpos, fieldmode, value,
 590                            total_size);
 591           emit_move_insn (op0, tempreg);
 592           return value;
 593         }
 594       volatile_ok = save_volatile_ok;
 595
 596       /* Add OFFSET into OP0's address.  */
 597       if (GET_CODE (xop0) == MEM)
 598         xop0 = adjust_address (xop0, byte_mode, offset);
 599
 600       /* If xop0 is a register, we need it in MAXMODE
 601          to make it acceptable to the format of insv.  */
 602       if (GET_CODE (xop0) == SUBREG)
 603         /* We can't just change the mode, because this might clobber op0,
 604            and we will need the original value of op0 if insv fails.  */
 605         xop0 = gen_rtx_SUBREG (maxmode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 606       if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
 607         xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
 608
 609       /* On big-endian machines, we count bits from the most significant.
 610          If the bit field insn does not, we must invert.  */
 611
 612       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 613         xbitpos = unit - bitsize - xbitpos;
 614
 615       /* We have been counting XBITPOS within UNIT.
 616          Count instead within the size of the register.  */
 617       if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
 618         xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
 619
 620       unit = GET_MODE_BITSIZE (maxmode);
 621
 622       /* Convert VALUE to maxmode (which insv insn wants) in VALUE1.  */
 623       value1 = value;
 624       if (GET_MODE (value) != maxmode)
 625         {
 626           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 627             {
 628               /* Optimization: Don't bother really extending VALUE
 629                  if it has all the bits we will actually use.  However,
 630                  if we must narrow it, be sure we do it correctly.  */
 631
 632               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (maxmode))
 633                 {
 634                   rtx tmp;
 635
 636                   tmp = simplify_subreg (maxmode, value1, GET_MODE (value), 0);
 637                   if (! tmp)
 638                     tmp = simplify_gen_subreg (maxmode,
 639                                                force_reg (GET_MODE (value),
 640                                                           value1),
 641                                                GET_MODE (value), 0);
 642                   value1 = tmp;
 643                 }
 644               else
 645                 value1 = gen_lowpart (maxmode, value1);
 646             }
 647           else if (GET_CODE (value) == CONST_INT)
 648             value1 = GEN_INT (trunc_int_for_mode (INTVAL (value), maxmode));
 649           else if (!CONSTANT_P (value))
 650             /* Parse phase is supposed to make VALUE's data type
 651                match that of the component reference, which is a type
 652                at least as wide as the field; so VALUE should have
 653                a mode that corresponds to that type.  */
 654             abort ();
 655         }
 656
 657       /* If this machine's insv insists on a register,
 658          get VALUE1 into a register.  */
 659       if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
 660              (value1, maxmode)))
 661         value1 = force_reg (maxmode, value1);
 662
 663       pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
 664       if (pat)
 665         emit_insn (pat);
 666       else
 667         {
 668           delete_insns_since (last);
 669           store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 670         }
 671     }
 672   else
 673     insv_loses:
 674     /* Insv is not available; store using shifts and boolean ops.  */
 675     store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 676   return value;
 677 }
 678 \f
 679 /* Use shifts and boolean operations to store VALUE
 680    into a bit field of width BITSIZE
 681    in a memory location specified by OP0 except offset by OFFSET bytes.
 682      (OFFSET must be 0 if OP0 is a register.)
 683    The field starts at position BITPOS within the byte.
 684     (If OP0 is a register, it may be a full word or a narrower mode,
 685      but BITPOS still counts within a full word,
 686      which is significant on bigendian machines.)
 687
 688    Note that protect_from_queue has already been done on OP0 and VALUE.  */
 689
 690 static void
 691 store_fixed_bit_field (op0, offset, bitsize, bitpos, value)
 692      rtx op0;
 693      unsigned HOST_WIDE_INT offset, bitsize, bitpos;
 694      rtx value;
 695 {
 696   enum machine_mode mode;
 697   unsigned int total_bits = BITS_PER_WORD;
 698   rtx subtarget, temp;
 699   int all_zero = 0;
 700   int all_one = 0;
 701
 702   /* There is a case not handled here:
 703      a structure with a known alignment of just a halfword
 704      and a field split across two aligned halfwords within the structure.
 705      Or likewise a structure with a known alignment of just a byte
 706      and a field split across two bytes.
 707      Such cases are not supposed to be able to occur.  */
 708
 709   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 710     {
 711       if (offset != 0)
 712         abort ();
 713       /* Special treatment for a bit field split across two registers.  */
 714       if (bitsize + bitpos > BITS_PER_WORD)
 715         {
 716           store_split_bit_field (op0, bitsize, bitpos, value);
 717           return;
 718         }
 719     }
 720   else
 721     {
 722       /* Get the proper mode to use for this field.  We want a mode that
 723          includes the entire field.  If such a mode would be larger than
 724          a word, we won't be doing the extraction the normal way.
 725          We don't want a mode bigger than the destination.  */
 726
 727       mode = GET_MODE (op0);
 728       if (GET_MODE_BITSIZE (mode) == 0
 729           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 730         mode = word_mode;
 731       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 732                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 733
 734       if (mode == VOIDmode)
 735         {
 736           /* The only way this should occur is if the field spans word
 737              boundaries.  */
 738           store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
 739                                  value);
 740           return;
 741         }
 742
 743       total_bits = GET_MODE_BITSIZE (mode);
 744
 745       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 746          be in the range 0 to total_bits-1, and put any excess bytes in
 747          OFFSET.  */
 748       if (bitpos >= total_bits)
 749         {
 750           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 751           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 752                      * BITS_PER_UNIT);
 753         }
 754
 755       /* Get ref to an aligned byte, halfword, or word containing the field.
 756          Adjust BITPOS to be position within a word,
 757          and OFFSET to be the offset of that word.
 758          Then alter OP0 to refer to that word.  */
 759       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 760       offset -= (offset % (total_bits / BITS_PER_UNIT));
 761       op0 = adjust_address (op0, mode, offset);
 762     }
 763
 764   mode = GET_MODE (op0);
 765
 766   /* Now MODE is either some integral mode for a MEM as OP0,
 767      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 768      The bit field is contained entirely within OP0.
 769      BITPOS is the starting bit number within OP0.
 770      (OP0's mode may actually be narrower than MODE.)  */
 771
 772   if (BYTES_BIG_ENDIAN)
 773       /* BITPOS is the distance between our msb
 774          and that of the containing datum.
 775          Convert it to the distance from the lsb.  */
 776       bitpos = total_bits - bitsize - bitpos;
 777
 778   /* Now BITPOS is always the distance between our lsb
 779      and that of OP0.  */
 780
 781   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 782      we must first convert its mode to MODE.  */
 783
 784   if (GET_CODE (value) == CONST_INT)
 785     {
 786       HOST_WIDE_INT v = INTVAL (value);
 787
 788       if (bitsize < HOST_BITS_PER_WIDE_INT)
 789         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 790
 791       if (v == 0)
 792         all_zero = 1;
 793       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 794                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 795                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 796         all_one = 1;
 797
 798       value = lshift_value (mode, value, bitpos, bitsize);
 799     }
 800   else
 801     {
 802       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 803                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 804
 805       if (GET_MODE (value) != mode)
 806         {
 807           if ((GET_CODE (value) == REG || GET_CODE (value) == SUBREG)
 808               && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
 809             value = gen_lowpart (mode, value);
 810           else
 811             value = convert_to_mode (mode, value, 1);
 812         }
 813
 814       if (must_and)
 815         value = expand_binop (mode, and_optab, value,
 816                               mask_rtx (mode, 0, bitsize, 0),
 817                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
 818       if (bitpos > 0)
 819         value = expand_shift (LSHIFT_EXPR, mode, value,
 820                               build_int_2 (bitpos, 0), NULL_RTX, 1);
 821     }
 822
 823   /* Now clear the chosen bits in OP0,
 824      except that if VALUE is -1 we need not bother.  */
 825
 826   subtarget = (GET_CODE (op0) == REG || ! flag_force_mem) ? op0 : 0;
 827
 828   if (! all_one)
 829     {
 830       temp = expand_binop (mode, and_optab, op0,
 831                            mask_rtx (mode, bitpos, bitsize, 1),
 832                            subtarget, 1, OPTAB_LIB_WIDEN);
 833       subtarget = temp;
 834     }
 835   else
 836     temp = op0;
 837
 838   /* Now logical-or VALUE into OP0, unless it is zero.  */
 839
 840   if (! all_zero)
 841     temp = expand_binop (mode, ior_optab, temp, value,
 842                          subtarget, 1, OPTAB_LIB_WIDEN);
 843   if (op0 != temp)
 844     emit_move_insn (op0, temp);
 845 }
 846 \f
 847 /* Store a bit field that is split across multiple accessible memory objects.
 848
 849    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
 850    BITSIZE is the field width; BITPOS the position of its first bit
 851    (within the word).
 852    VALUE is the value to store.
 853
 854    This does not yet handle fields wider than BITS_PER_WORD.  */
 855
 856 static void
 857 store_split_bit_field (op0, bitsize, bitpos, value)
 858      rtx op0;
 859      unsigned HOST_WIDE_INT bitsize, bitpos;
 860      rtx value;
 861 {
 862   unsigned int unit;
 863   unsigned int bitsdone = 0;
 864
 865   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
 866      much at a time.  */
 867   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 868     unit = BITS_PER_WORD;
 869   else
 870     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
 871
 872   /* If VALUE is a constant other than a CONST_INT, get it into a register in
 873      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
 874      that VALUE might be a floating-point constant.  */
 875   if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
 876     {
 877       rtx word = gen_lowpart_common (word_mode, value);
 878
 879       if (word && (value != word))
 880         value = word;
 881       else
 882         value = gen_lowpart_common (word_mode,
 883                                     force_reg (GET_MODE (value) != VOIDmode
 884                                                ? GET_MODE (value)
 885                                                : word_mode, value));
 886     }
 887   else if (GET_CODE (value) == ADDRESSOF)
 888     value = copy_to_reg (value);
 889
 890   while (bitsdone < bitsize)
 891     {
 892       unsigned HOST_WIDE_INT thissize;
 893       rtx part, word;
 894       unsigned HOST_WIDE_INT thispos;
 895       unsigned HOST_WIDE_INT offset;
 896
 897       offset = (bitpos + bitsdone) / unit;
 898       thispos = (bitpos + bitsdone) % unit;
 899
 900       /* THISSIZE must not overrun a word boundary.  Otherwise,
 901          store_fixed_bit_field will call us again, and we will mutually
 902          recurse forever.  */
 903       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
 904       thissize = MIN (thissize, unit - thispos);
 905
 906       if (BYTES_BIG_ENDIAN)
 907         {
 908           int total_bits;
 909
 910           /* We must do an endian conversion exactly the same way as it is
 911              done in extract_bit_field, so that the two calls to
 912              extract_fixed_bit_field will have comparable arguments.  */
 913           if (GET_CODE (value) != MEM || GET_MODE (value) == BLKmode)
 914             total_bits = BITS_PER_WORD;
 915           else
 916             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
 917
 918           /* Fetch successively less significant portions.  */
 919           if (GET_CODE (value) == CONST_INT)
 920             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 921                              >> (bitsize - bitsdone - thissize))
 922                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
 923           else
 924             /* The args are chosen so that the last part includes the
 925                lsb.  Give extract_bit_field the value it needs (with
 926                endianness compensation) to fetch the piece we want.  */
 927             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
 928                                             total_bits - bitsize + bitsdone,
 929                                             NULL_RTX, 1);
 930         }
 931       else
 932         {
 933           /* Fetch successively more significant portions.  */
 934           if (GET_CODE (value) == CONST_INT)
 935             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 936                              >> bitsdone)
 937                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
 938           else
 939             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
 940                                             bitsdone, NULL_RTX, 1);
 941         }
 942
 943       /* If OP0 is a register, then handle OFFSET here.
 944
 945          When handling multiword bitfields, extract_bit_field may pass
 946          down a word_mode SUBREG of a larger REG for a bitfield that actually
 947          crosses a word boundary.  Thus, for a SUBREG, we must find
 948          the current word starting from the base register.  */
 949       if (GET_CODE (op0) == SUBREG)
 950         {
 951           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
 952           word = operand_subword_force (SUBREG_REG (op0), word_offset,
 953                                         GET_MODE (SUBREG_REG (op0)));
 954           offset = 0;
 955         }
 956       else if (GET_CODE (op0) == REG)
 957         {
 958           word = operand_subword_force (op0, offset, GET_MODE (op0));
 959           offset = 0;
 960         }
 961       else
 962         word = op0;
 963
 964       /* OFFSET is in UNITs, and UNIT is in bits.
 965          store_fixed_bit_field wants offset in bytes.  */
 966       store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
 967                              thispos, part);
 968       bitsdone += thissize;
 969     }
 970 }
 971 \f
 972 /* Generate code to extract a byte-field from STR_RTX
 973    containing BITSIZE bits, starting at BITNUM,
 974    and put it in TARGET if possible (if TARGET is nonzero).
 975    Regardless of TARGET, we return the rtx for where the value is placed.
 976    It may be a QUEUED.
 977
 978    STR_RTX is the structure containing the byte (a REG or MEM).
 979    UNSIGNEDP is nonzero if this is an unsigned bit field.
 980    MODE is the natural mode of the field value once extracted.
 981    TMODE is the mode the caller would like the value to have;
 982    but the value may be returned with type MODE instead.
 983
 984    TOTAL_SIZE is the size in bytes of the containing structure,
 985    or -1 if varying.
 986
 987    If a TARGET is specified and we can store in it at no extra cost,
 988    we do so, and return TARGET.
 989    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
 990    if they are equally easy.  */
 991
 992 rtx
 993 extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
 994                    target, mode, tmode, total_size)
 995      rtx str_rtx;
 996      unsigned HOST_WIDE_INT bitsize;
 997      unsigned HOST_WIDE_INT bitnum;
 998      int unsignedp;
 999      rtx target;
1000      enum machine_mode mode, tmode;
1001      HOST_WIDE_INT total_size;
1002 {
1003   unsigned int unit
1004     = (GET_CODE (str_rtx) == MEM) ? BITS_PER_UNIT : BITS_PER_WORD;
1005   unsigned HOST_WIDE_INT offset = bitnum / unit;
1006   unsigned HOST_WIDE_INT bitpos = bitnum % unit;
1007   rtx op0 = str_rtx;
1008   rtx spec_target = target;
1009   rtx spec_target_subreg = 0;
1010   enum machine_mode int_mode;
1011   enum machine_mode extv_mode = mode_for_extraction (EP_extv, 0);
1012   enum machine_mode extzv_mode = mode_for_extraction (EP_extzv, 0);
1013   enum machine_mode mode1;
1014   int byte_offset;
1015
1016   /* Discount the part of the structure before the desired byte.
1017      We need to know how many bytes are safe to reference after it.  */
1018   if (total_size >= 0)
1019     total_size -= (bitpos / BIGGEST_ALIGNMENT
1020                    * (BIGGEST_ALIGNMENT / BITS_PER_UNIT));
1021
1022   if (tmode == VOIDmode)
1023     tmode = mode;
1024   while (GET_CODE (op0) == SUBREG)
1025     {
1026       int outer_size = GET_MODE_BITSIZE (GET_MODE (op0));
1027       int inner_size = GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op0)));
1028
1029       offset += SUBREG_BYTE (op0) / UNITS_PER_WORD;
1030
1031       inner_size = MIN (inner_size, BITS_PER_WORD);
1032
1033       if (BYTES_BIG_ENDIAN && (outer_size < inner_size))
1034         {
1035           bitpos += inner_size - outer_size;
1036           if (bitpos > unit)
1037             {
1038               offset += (bitpos / unit);
1039               bitpos %= unit;
1040             }
1041         }
1042
1043       op0 = SUBREG_REG (op0);
1044     }
1045
1046   if (GET_CODE (op0) == REG
1047       && mode == GET_MODE (op0)
1048       && bitnum == 0
1049       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1050     {
1051       /* We're trying to extract a full register from itself.  */
1052       return op0;
1053     }
1054
1055   /* Make sure we are playing with integral modes.  Pun with subregs
1056      if we aren't.  */
1057   {
1058     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1059     if (imode != GET_MODE (op0))
1060       {
1061         if (GET_CODE (op0) == MEM)
1062           op0 = adjust_address (op0, imode, 0);
1063         else if (imode != BLKmode)
1064           op0 = gen_lowpart (imode, op0);
1065         else
1066           abort ();
1067       }
1068   }
1069
1070   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1071      If that's wrong, the solution is to test for it and set TARGET to 0
1072      if needed.  */
1073
1074   /* If OP0 is a register, BITPOS must count within a word.
1075      But as we have it, it counts within whatever size OP0 now has.
1076      On a bigendian machine, these are not the same, so convert.  */
1077   if (BYTES_BIG_ENDIAN
1078       && GET_CODE (op0) != MEM
1079       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1080     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1081
1082   /* Extracting a full-word or multi-word value
1083      from a structure in a register or aligned memory.
1084      This can be done with just SUBREG.
1085      So too extracting a subword value in
1086      the least significant part of the register.  */
1087
1088   byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
1089                 + (offset * UNITS_PER_WORD);
1090
1091   mode1  = (VECTOR_MODE_P (tmode)
1092            ? mode
1093            : mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0));
1094
1095   if (((GET_CODE (op0) != MEM
1096         && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
1097                                   GET_MODE_BITSIZE (GET_MODE (op0)))
1098         && GET_MODE_SIZE (mode1) != 0
1099         && byte_offset % GET_MODE_SIZE (mode1) == 0)
1100        || (GET_CODE (op0) == MEM
1101            && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1102                || (offset * BITS_PER_UNIT % bitsize == 0
1103                    && MEM_ALIGN (op0) % bitsize == 0))))
1104       && ((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1105            && bitpos % BITS_PER_WORD == 0)
1106           || (mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0) != BLKmode
1107               /* ??? The big endian test here is wrong.  This is correct
1108                  if the value is in a register, and if mode_for_size is not
1109                  the same mode as op0.  This causes us to get unnecessarily
1110                  inefficient code from the Thumb port when -mbig-endian.  */
1111               && (BYTES_BIG_ENDIAN
1112                   ? bitpos + bitsize == BITS_PER_WORD
1113                   : bitpos == 0))))
1114     {
1115       if (mode1 != GET_MODE (op0))
1116         {
1117           if (GET_CODE (op0) == SUBREG)
1118             {
1119               if (GET_MODE (SUBREG_REG (op0)) == mode1
1120                   || GET_MODE_CLASS (mode1) == MODE_INT
1121                   || GET_MODE_CLASS (mode1) == MODE_PARTIAL_INT)
1122                 op0 = SUBREG_REG (op0);
1123               else
1124                 /* Else we've got some float mode source being extracted into
1125                    a different float mode destination -- this combination of
1126                    subregs results in Severe Tire Damage.  */
1127                 abort ();
1128             }
1129           if (GET_CODE (op0) == REG)
1130             op0 = gen_rtx_SUBREG (mode1, op0, byte_offset);
1131           else
1132             op0 = adjust_address (op0, mode1, offset);
1133         }
1134       if (mode1 != mode)
1135         return convert_to_mode (tmode, op0, unsignedp);
1136       return op0;
1137     }
1138
1139   /* Handle fields bigger than a word.  */
1140
1141   if (bitsize > BITS_PER_WORD)
1142     {
1143       /* Here we transfer the words of the field
1144          in the order least significant first.
1145          This is because the most significant word is the one which may
1146          be less than full.  */
1147
1148       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1149       unsigned int i;
1150
1151       if (target == 0 || GET_CODE (target) != REG)
1152         target = gen_reg_rtx (mode);
1153
1154       /* Indicate for flow that the entire target reg is being set.  */
1155       emit_insn (gen_rtx_CLOBBER (VOIDmode, target));
1156
1157       for (i = 0; i < nwords; i++)
1158         {
1159           /* If I is 0, use the low-order word in both field and target;
1160              if I is 1, use the next to lowest word; and so on.  */
1161           /* Word number in TARGET to use.  */
1162           unsigned int wordnum
1163             = (WORDS_BIG_ENDIAN
1164                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1165                : i);
1166           /* Offset from start of field in OP0.  */
1167           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1168                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1169                                                 * (int) BITS_PER_WORD))
1170                                      : (int) i * BITS_PER_WORD);
1171           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1172           rtx result_part
1173             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1174                                            bitsize - i * BITS_PER_WORD),
1175                                  bitnum + bit_offset, 1, target_part, mode,
1176                                  word_mode, total_size);
1177
1178           if (target_part == 0)
1179             abort ();
1180
1181           if (result_part != target_part)
1182             emit_move_insn (target_part, result_part);
1183         }
1184
1185       if (unsignedp)
1186         {
1187           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1188              need to be zero'd out.  */
1189           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1190             {
1191               unsigned int i, total_words;
1192
1193               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1194               for (i = nwords; i < total_words; i++)
1195                 emit_move_insn
1196                   (operand_subword (target,
1197                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1198                                     1, VOIDmode),
1199                    const0_rtx);
1200             }
1201           return target;
1202         }
1203
1204       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1205       target = expand_shift (LSHIFT_EXPR, mode, target,
1206                              build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1207                              NULL_RTX, 0);
1208       return expand_shift (RSHIFT_EXPR, mode, target,
1209                            build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1210                            NULL_RTX, 0);
1211     }
1212
1213   /* From here on we know the desired field is smaller than a word.  */
1214
1215   /* Check if there is a correspondingly-sized integer field, so we can
1216      safely extract it as one size of integer, if necessary; then
1217      truncate or extend to the size that is wanted; then use SUBREGs or
1218      convert_to_mode to get one of the modes we really wanted.  */
1219
1220   int_mode = int_mode_for_mode (tmode);
1221   if (int_mode == BLKmode)
1222     int_mode = int_mode_for_mode (mode);
1223   if (int_mode == BLKmode)
1224     abort ();    /* Should probably push op0 out to memory and then
1225                     do a load.  */
1226
1227   /* OFFSET is the number of words or bytes (UNIT says which)
1228      from STR_RTX to the first word or byte containing part of the field.  */
1229
1230   if (GET_CODE (op0) != MEM)
1231     {
1232       if (offset != 0
1233           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1234         {
1235           if (GET_CODE (op0) != REG)
1236             op0 = copy_to_reg (op0);
1237           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1238                                 op0, (offset * UNITS_PER_WORD));
1239         }
1240       offset = 0;
1241     }
1242   else
1243     op0 = protect_from_queue (str_rtx, 1);
1244
1245   /* Now OFFSET is nonzero only for memory operands.  */
1246
1247   if (unsignedp)
1248     {
1249       if (HAVE_extzv
1250           && (GET_MODE_BITSIZE (extzv_mode) >= bitsize)
1251           && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1252                 && (bitsize + bitpos > GET_MODE_BITSIZE (extzv_mode))))
1253         {
1254           unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1255           rtx bitsize_rtx, bitpos_rtx;
1256           rtx last = get_last_insn ();
1257           rtx xop0 = op0;
1258           rtx xtarget = target;
1259           rtx xspec_target = spec_target;
1260           rtx xspec_target_subreg = spec_target_subreg;
1261           rtx pat;
1262           enum machine_mode maxmode = mode_for_extraction (EP_extzv, 0);
1263
1264           if (GET_CODE (xop0) == MEM)
1265             {
1266               int save_volatile_ok = volatile_ok;
1267               volatile_ok = 1;
1268
1269               /* Is the memory operand acceptable?  */
1270               if (! ((*insn_data[(int) CODE_FOR_extzv].operand[1].predicate)
1271                      (xop0, GET_MODE (xop0))))
1272                 {
1273                   /* No, load into a reg and extract from there.  */
1274                   enum machine_mode bestmode;
1275
1276                   /* Get the mode to use for inserting into this field.  If
1277                      OP0 is BLKmode, get the smallest mode consistent with the
1278                      alignment. If OP0 is a non-BLKmode object that is no
1279                      wider than MAXMODE, use its mode. Otherwise, use the
1280                      smallest mode containing the field.  */
1281
1282                   if (GET_MODE (xop0) == BLKmode
1283                       || (GET_MODE_SIZE (GET_MODE (op0))
1284                           > GET_MODE_SIZE (maxmode)))
1285                     bestmode = get_best_mode (bitsize, bitnum,
1286                                               MEM_ALIGN (xop0), maxmode,
1287                                               MEM_VOLATILE_P (xop0));
1288                   else
1289                     bestmode = GET_MODE (xop0);
1290
1291                   if (bestmode == VOIDmode
1292                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1293                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1294                     goto extzv_loses;
1295
1296                   /* Compute offset as multiple of this unit,
1297                      counting in bytes.  */
1298                   unit = GET_MODE_BITSIZE (bestmode);
1299                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1300                   xbitpos = bitnum % unit;
1301                   xop0 = adjust_address (xop0, bestmode, xoffset);
1302
1303                   /* Fetch it to a register in that size.  */
1304                   xop0 = force_reg (bestmode, xop0);
1305
1306                   /* XBITPOS counts within UNIT, which is what is expected.  */
1307                 }
1308               else
1309                 /* Get ref to first byte containing part of the field.  */
1310                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1311
1312               volatile_ok = save_volatile_ok;
1313             }
1314
1315           /* If op0 is a register, we need it in MAXMODE (which is usually
1316              SImode). to make it acceptable to the format of extzv.  */
1317           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1318             goto extzv_loses;
1319           if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
1320             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1321
1322           /* On big-endian machines, we count bits from the most significant.
1323              If the bit field insn does not, we must invert.  */
1324           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1325             xbitpos = unit - bitsize - xbitpos;
1326
1327           /* Now convert from counting within UNIT to counting in MAXMODE.  */
1328           if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
1329             xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
1330
1331           unit = GET_MODE_BITSIZE (maxmode);
1332
1333           if (xtarget == 0
1334               || (flag_force_mem && GET_CODE (xtarget) == MEM))
1335             xtarget = xspec_target = gen_reg_rtx (tmode);
1336
1337           if (GET_MODE (xtarget) != maxmode)
1338             {
1339               if (GET_CODE (xtarget) == REG)
1340                 {
1341                   int wider = (GET_MODE_SIZE (maxmode)
1342                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1343                   xtarget = gen_lowpart (maxmode, xtarget);
1344                   if (wider)
1345                     xspec_target_subreg = xtarget;
1346                 }
1347               else
1348                 xtarget = gen_reg_rtx (maxmode);
1349             }
1350
1351           /* If this machine's extzv insists on a register target,
1352              make sure we have one.  */
1353           if (! ((*insn_data[(int) CODE_FOR_extzv].operand[0].predicate)
1354                  (xtarget, maxmode)))
1355             xtarget = gen_reg_rtx (maxmode);
1356
1357           bitsize_rtx = GEN_INT (bitsize);
1358           bitpos_rtx = GEN_INT (xbitpos);
1359
1360           pat = gen_extzv (protect_from_queue (xtarget, 1),
1361                            xop0, bitsize_rtx, bitpos_rtx);
1362           if (pat)
1363             {
1364               emit_insn (pat);
1365               target = xtarget;
1366               spec_target = xspec_target;
1367               spec_target_subreg = xspec_target_subreg;
1368             }
1369           else
1370             {
1371               delete_insns_since (last);
1372               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1373                                                 bitpos, target, 1);
1374             }
1375         }
1376       else
1377       extzv_loses:
1378         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1379                                           bitpos, target, 1);
1380     }
1381   else
1382     {
1383       if (HAVE_extv
1384           && (GET_MODE_BITSIZE (extv_mode) >= bitsize)
1385           && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1386                 && (bitsize + bitpos > GET_MODE_BITSIZE (extv_mode))))
1387         {
1388           int xbitpos = bitpos, xoffset = offset;
1389           rtx bitsize_rtx, bitpos_rtx;
1390           rtx last = get_last_insn ();
1391           rtx xop0 = op0, xtarget = target;
1392           rtx xspec_target = spec_target;
1393           rtx xspec_target_subreg = spec_target_subreg;
1394           rtx pat;
1395           enum machine_mode maxmode = mode_for_extraction (EP_extv, 0);
1396
1397           if (GET_CODE (xop0) == MEM)
1398             {
1399               /* Is the memory operand acceptable?  */
1400               if (! ((*insn_data[(int) CODE_FOR_extv].operand[1].predicate)
1401                      (xop0, GET_MODE (xop0))))
1402                 {
1403                   /* No, load into a reg and extract from there.  */
1404                   enum machine_mode bestmode;
1405
1406                   /* Get the mode to use for inserting into this field.  If
1407                      OP0 is BLKmode, get the smallest mode consistent with the
1408                      alignment. If OP0 is a non-BLKmode object that is no
1409                      wider than MAXMODE, use its mode. Otherwise, use the
1410                      smallest mode containing the field.  */
1411
1412                   if (GET_MODE (xop0) == BLKmode
1413                       || (GET_MODE_SIZE (GET_MODE (op0))
1414                           > GET_MODE_SIZE (maxmode)))
1415                     bestmode = get_best_mode (bitsize, bitnum,
1416                                               MEM_ALIGN (xop0), maxmode,
1417                                               MEM_VOLATILE_P (xop0));
1418                   else
1419                     bestmode = GET_MODE (xop0);
1420
1421                   if (bestmode == VOIDmode
1422                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1423                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1424                     goto extv_loses;
1425
1426                   /* Compute offset as multiple of this unit,
1427                      counting in bytes.  */
1428                   unit = GET_MODE_BITSIZE (bestmode);
1429                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1430                   xbitpos = bitnum % unit;
1431                   xop0 = adjust_address (xop0, bestmode, xoffset);
1432
1433                   /* Fetch it to a register in that size.  */
1434                   xop0 = force_reg (bestmode, xop0);
1435
1436                   /* XBITPOS counts within UNIT, which is what is expected.  */
1437                 }
1438               else
1439                 /* Get ref to first byte containing part of the field.  */
1440                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1441             }
1442
1443           /* If op0 is a register, we need it in MAXMODE (which is usually
1444              SImode) to make it acceptable to the format of extv.  */
1445           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1446             goto extv_loses;
1447           if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
1448             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1449
1450           /* On big-endian machines, we count bits from the most significant.
1451              If the bit field insn does not, we must invert.  */
1452           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1453             xbitpos = unit - bitsize - xbitpos;
1454
1455           /* XBITPOS counts within a size of UNIT.
1456              Adjust to count within a size of MAXMODE.  */
1457           if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
1458             xbitpos += (GET_MODE_BITSIZE (maxmode) - unit);
1459
1460           unit = GET_MODE_BITSIZE (maxmode);
1461
1462           if (xtarget == 0
1463               || (flag_force_mem && GET_CODE (xtarget) == MEM))
1464             xtarget = xspec_target = gen_reg_rtx (tmode);
1465
1466           if (GET_MODE (xtarget) != maxmode)
1467             {
1468               if (GET_CODE (xtarget) == REG)
1469                 {
1470                   int wider = (GET_MODE_SIZE (maxmode)
1471                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1472                   xtarget = gen_lowpart (maxmode, xtarget);
1473                   if (wider)
1474                     xspec_target_subreg = xtarget;
1475                 }
1476               else
1477                 xtarget = gen_reg_rtx (maxmode);
1478             }
1479
1480           /* If this machine's extv insists on a register target,
1481              make sure we have one.  */
1482           if (! ((*insn_data[(int) CODE_FOR_extv].operand[0].predicate)
1483                  (xtarget, maxmode)))
1484             xtarget = gen_reg_rtx (maxmode);
1485
1486           bitsize_rtx = GEN_INT (bitsize);
1487           bitpos_rtx = GEN_INT (xbitpos);
1488
1489           pat = gen_extv (protect_from_queue (xtarget, 1),
1490                           xop0, bitsize_rtx, bitpos_rtx);
1491           if (pat)
1492             {
1493               emit_insn (pat);
1494               target = xtarget;
1495               spec_target = xspec_target;
1496               spec_target_subreg = xspec_target_subreg;
1497             }
1498           else
1499             {
1500               delete_insns_since (last);
1501               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1502                                                 bitpos, target, 0);
1503             }
1504         }
1505       else
1506       extv_loses:
1507         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1508                                           bitpos, target, 0);
1509     }
1510   if (target == spec_target)
1511     return target;
1512   if (target == spec_target_subreg)
1513     return spec_target;
1514   if (GET_MODE (target) != tmode && GET_MODE (target) != mode)
1515     {
1516       /* If the target mode is floating-point, first convert to the
1517          integer mode of that size and then access it as a floating-point
1518          value via a SUBREG.  */
1519       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1520         {
1521           target = convert_to_mode (mode_for_size (GET_MODE_BITSIZE (tmode),
1522                                                    MODE_INT, 0),
1523                                     target, unsignedp);
1524           if (GET_CODE (target) != REG)
1525             target = copy_to_reg (target);
1526           return gen_rtx_SUBREG (tmode, target, 0);
1527         }
1528       else
1529         return convert_to_mode (tmode, target, unsignedp);
1530     }
1531   return target;
1532 }
1533 \f
1534 /* Extract a bit field using shifts and boolean operations
1535    Returns an rtx to represent the value.
1536    OP0 addresses a register (word) or memory (byte).
1537    BITPOS says which bit within the word or byte the bit field starts in.
1538    OFFSET says how many bytes farther the bit field starts;
1539     it is 0 if OP0 is a register.
1540    BITSIZE says how many bits long the bit field is.
1541     (If OP0 is a register, it may be narrower than a full word,
1542      but BITPOS still counts within a full word,
1543      which is significant on bigendian machines.)
1544
1545    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1546    If TARGET is nonzero, attempts to store the value there
1547    and return TARGET, but this is not guaranteed.
1548    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1549
1550 static rtx
1551 extract_fixed_bit_field (tmode, op0, offset, bitsize, bitpos,
1552                          target, unsignedp)
1553      enum machine_mode tmode;
1554      rtx op0, target;
1555      unsigned HOST_WIDE_INT offset, bitsize, bitpos;
1556      int unsignedp;
1557 {
1558   unsigned int total_bits = BITS_PER_WORD;
1559   enum machine_mode mode;
1560
1561   if (GET_CODE (op0) == SUBREG || GET_CODE (op0) == REG)
1562     {
1563       /* Special treatment for a bit field split across two registers.  */
1564       if (bitsize + bitpos > BITS_PER_WORD)
1565         return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1566     }
1567   else
1568     {
1569       /* Get the proper mode to use for this field.  We want a mode that
1570          includes the entire field.  If such a mode would be larger than
1571          a word, we won't be doing the extraction the normal way.  */
1572
1573       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1574                             MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1575
1576       if (mode == VOIDmode)
1577         /* The only way this should occur is if the field spans word
1578            boundaries.  */
1579         return extract_split_bit_field (op0, bitsize,
1580                                         bitpos + offset * BITS_PER_UNIT,
1581                                         unsignedp);
1582
1583       total_bits = GET_MODE_BITSIZE (mode);
1584
1585       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1586          be in the range 0 to total_bits-1, and put any excess bytes in
1587          OFFSET.  */
1588       if (bitpos >= total_bits)
1589         {
1590           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1591           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1592                      * BITS_PER_UNIT);
1593         }
1594
1595       /* Get ref to an aligned byte, halfword, or word containing the field.
1596          Adjust BITPOS to be position within a word,
1597          and OFFSET to be the offset of that word.
1598          Then alter OP0 to refer to that word.  */
1599       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1600       offset -= (offset % (total_bits / BITS_PER_UNIT));
1601       op0 = adjust_address (op0, mode, offset);
1602     }
1603
1604   mode = GET_MODE (op0);
1605
1606   if (BYTES_BIG_ENDIAN)
1607     /* BITPOS is the distance between our msb and that of OP0.
1608        Convert it to the distance from the lsb.  */
1609     bitpos = total_bits - bitsize - bitpos;
1610
1611   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1612      We have reduced the big-endian case to the little-endian case.  */
1613
1614   if (unsignedp)
1615     {
1616       if (bitpos)
1617         {
1618           /* If the field does not already start at the lsb,
1619              shift it so it does.  */
1620           tree amount = build_int_2 (bitpos, 0);
1621           /* Maybe propagate the target for the shift.  */
1622           /* But not if we will return it--could confuse integrate.c.  */
1623           rtx subtarget = (target != 0 && GET_CODE (target) == REG
1624                            && !REG_FUNCTION_VALUE_P (target)
1625                            ? target : 0);
1626           if (tmode != mode) subtarget = 0;
1627           op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1628         }
1629       /* Convert the value to the desired mode.  */
1630       if (mode != tmode)
1631         op0 = convert_to_mode (tmode, op0, 1);
1632
1633       /* Unless the msb of the field used to be the msb when we shifted,
1634          mask out the upper bits.  */
1635
1636       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1637         return expand_binop (GET_MODE (op0), and_optab, op0,
1638                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1639                              target, 1, OPTAB_LIB_WIDEN);
1640       return op0;
1641     }
1642
1643   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1644      then arithmetic-shift its lsb to the lsb of the word.  */
1645   op0 = force_reg (mode, op0);
1646   if (mode != tmode)
1647     target = 0;
1648
1649   /* Find the narrowest integer mode that contains the field.  */
1650
1651   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1652        mode = GET_MODE_WIDER_MODE (mode))
1653     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1654       {
1655         op0 = convert_to_mode (mode, op0, 0);
1656         break;
1657       }
1658
1659   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1660     {
1661       tree amount
1662         = build_int_2 (GET_MODE_BITSIZE (mode) - (bitsize + bitpos), 0);
1663       /* Maybe propagate the target for the shift.  */
1664       /* But not if we will return the result--could confuse integrate.c.  */
1665       rtx subtarget = (target != 0 && GET_CODE (target) == REG
1666                        && ! REG_FUNCTION_VALUE_P (target)
1667                        ? target : 0);
1668       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1669     }
1670
1671   return expand_shift (RSHIFT_EXPR, mode, op0,
1672                        build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1673                        target, 0);
1674 }
1675 \f
1676 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1677    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1678    complement of that if COMPLEMENT.  The mask is truncated if
1679    necessary to the width of mode MODE.  The mask is zero-extended if
1680    BITSIZE+BITPOS is too small for MODE.  */
1681
1682 static rtx
1683 mask_rtx (mode, bitpos, bitsize, complement)
1684      enum machine_mode mode;
1685      int bitpos, bitsize, complement;
1686 {
1687   HOST_WIDE_INT masklow, maskhigh;
1688
1689   if (bitpos < HOST_BITS_PER_WIDE_INT)
1690     masklow = (HOST_WIDE_INT) -1 << bitpos;
1691   else
1692     masklow = 0;
1693
1694   if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1695     masklow &= ((unsigned HOST_WIDE_INT) -1
1696                 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1697
1698   if (bitpos <= HOST_BITS_PER_WIDE_INT)
1699     maskhigh = -1;
1700   else
1701     maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1702
1703   if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1704     maskhigh &= ((unsigned HOST_WIDE_INT) -1
1705                  >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1706   else
1707     maskhigh = 0;
1708
1709   if (complement)
1710     {
1711       maskhigh = ~maskhigh;
1712       masklow = ~masklow;
1713     }
1714
1715   return immed_double_const (masklow, maskhigh, mode);
1716 }
1717
1718 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1719    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1720
1721 static rtx
1722 lshift_value (mode, value, bitpos, bitsize)
1723      enum machine_mode mode;
1724      rtx value;
1725      int bitpos, bitsize;
1726 {
1727   unsigned HOST_WIDE_INT v = INTVAL (value);
1728   HOST_WIDE_INT low, high;
1729
1730   if (bitsize < HOST_BITS_PER_WIDE_INT)
1731     v &= ~((HOST_WIDE_INT) -1 << bitsize);
1732
1733   if (bitpos < HOST_BITS_PER_WIDE_INT)
1734     {
1735       low = v << bitpos;
1736       high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1737     }
1738   else
1739     {
1740       low = 0;
1741       high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1742     }
1743
1744   return immed_double_const (low, high, mode);
1745 }
1746 \f
1747 /* Extract a bit field that is split across two words
1748    and return an RTX for the result.
1749
1750    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1751    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1752    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1753
1754 static rtx
1755 extract_split_bit_field (op0, bitsize, bitpos, unsignedp)
1756      rtx op0;
1757      unsigned HOST_WIDE_INT bitsize, bitpos;
1758      int unsignedp;
1759 {
1760   unsigned int unit;
1761   unsigned int bitsdone = 0;
1762   rtx result = NULL_RTX;
1763   int first = 1;
1764
1765   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1766      much at a time.  */
1767   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1768     unit = BITS_PER_WORD;
1769   else
1770     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1771
1772   while (bitsdone < bitsize)
1773     {
1774       unsigned HOST_WIDE_INT thissize;
1775       rtx part, word;
1776       unsigned HOST_WIDE_INT thispos;
1777       unsigned HOST_WIDE_INT offset;
1778
1779       offset = (bitpos + bitsdone) / unit;
1780       thispos = (bitpos + bitsdone) % unit;
1781
1782       /* THISSIZE must not overrun a word boundary.  Otherwise,
1783          extract_fixed_bit_field will call us again, and we will mutually
1784          recurse forever.  */
1785       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1786       thissize = MIN (thissize, unit - thispos);
1787
1788       /* If OP0 is a register, then handle OFFSET here.
1789
1790          When handling multiword bitfields, extract_bit_field may pass
1791          down a word_mode SUBREG of a larger REG for a bitfield that actually
1792          crosses a word boundary.  Thus, for a SUBREG, we must find
1793          the current word starting from the base register.  */
1794       if (GET_CODE (op0) == SUBREG)
1795         {
1796           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1797           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1798                                         GET_MODE (SUBREG_REG (op0)));
1799           offset = 0;
1800         }
1801       else if (GET_CODE (op0) == REG)
1802         {
1803           word = operand_subword_force (op0, offset, GET_MODE (op0));
1804           offset = 0;
1805         }
1806       else
1807         word = op0;
1808
1809       /* Extract the parts in bit-counting order,
1810          whose meaning is determined by BYTES_PER_UNIT.
1811          OFFSET is in UNITs, and UNIT is in bits.
1812          extract_fixed_bit_field wants offset in bytes.  */
1813       part = extract_fixed_bit_field (word_mode, word,
1814                                       offset * unit / BITS_PER_UNIT,
1815                                       thissize, thispos, 0, 1);
1816       bitsdone += thissize;
1817
1818       /* Shift this part into place for the result.  */
1819       if (BYTES_BIG_ENDIAN)
1820         {
1821           if (bitsize != bitsdone)
1822             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1823                                  build_int_2 (bitsize - bitsdone, 0), 0, 1);
1824         }
1825       else
1826         {
1827           if (bitsdone != thissize)
1828             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1829                                  build_int_2 (bitsdone - thissize, 0), 0, 1);
1830         }
1831
1832       if (first)
1833         result = part;
1834       else
1835         /* Combine the parts with bitwise or.  This works
1836            because we extracted each part as an unsigned bit field.  */
1837         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1838                                OPTAB_LIB_WIDEN);
1839
1840       first = 0;
1841     }
1842
1843   /* Unsigned bit field: we are done.  */
1844   if (unsignedp)
1845     return result;
1846   /* Signed bit field: sign-extend with two arithmetic shifts.  */
1847   result = expand_shift (LSHIFT_EXPR, word_mode, result,
1848                          build_int_2 (BITS_PER_WORD - bitsize, 0),
1849                          NULL_RTX, 0);
1850   return expand_shift (RSHIFT_EXPR, word_mode, result,
1851                        build_int_2 (BITS_PER_WORD - bitsize, 0), NULL_RTX, 0);
1852 }
1853 \f
1854 /* Add INC into TARGET.  */
1855
1856 void
1857 expand_inc (target, inc)
1858      rtx target, inc;
1859 {
1860   rtx value = expand_binop (GET_MODE (target), add_optab,
1861                             target, inc,
1862                             target, 0, OPTAB_LIB_WIDEN);
1863   if (value != target)
1864     emit_move_insn (target, value);
1865 }
1866
1867 /* Subtract DEC from TARGET.  */
1868
1869 void
1870 expand_dec (target, dec)
1871      rtx target, dec;
1872 {
1873   rtx value = expand_binop (GET_MODE (target), sub_optab,
1874                             target, dec,
1875                             target, 0, OPTAB_LIB_WIDEN);
1876   if (value != target)
1877     emit_move_insn (target, value);
1878 }
1879 \f
1880 /* Output a shift instruction for expression code CODE,
1881    with SHIFTED being the rtx for the value to shift,
1882    and AMOUNT the tree for the amount to shift by.
1883    Store the result in the rtx TARGET, if that is convenient.
1884    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
1885    Return the rtx for where the value is.  */
1886
1887 rtx
1888 expand_shift (code, mode, shifted, amount, target, unsignedp)
1889      enum tree_code code;
1890      enum machine_mode mode;
1891      rtx shifted;
1892      tree amount;
1893      rtx target;
1894      int unsignedp;
1895 {
1896   rtx op1, temp = 0;
1897   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
1898   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
1899   int try;
1900
1901   /* Previously detected shift-counts computed by NEGATE_EXPR
1902      and shifted in the other direction; but that does not work
1903      on all machines.  */
1904
1905   op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0);
1906
1907 #ifdef SHIFT_COUNT_TRUNCATED
1908   if (SHIFT_COUNT_TRUNCATED)
1909     {
1910       if (GET_CODE (op1) == CONST_INT
1911           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
1912               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
1913         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
1914                        % GET_MODE_BITSIZE (mode));
1915       else if (GET_CODE (op1) == SUBREG
1916                && SUBREG_BYTE (op1) == 0)
1917         op1 = SUBREG_REG (op1);
1918     }
1919 #endif
1920
1921   if (op1 == const0_rtx)
1922     return shifted;
1923
1924   for (try = 0; temp == 0 && try < 3; try++)
1925     {
1926       enum optab_methods methods;
1927
1928       if (try == 0)
1929         methods = OPTAB_DIRECT;
1930       else if (try == 1)
1931         methods = OPTAB_WIDEN;
1932       else
1933         methods = OPTAB_LIB_WIDEN;
1934
1935       if (rotate)
1936         {
1937           /* Widening does not work for rotation.  */
1938           if (methods == OPTAB_WIDEN)
1939             continue;
1940           else if (methods == OPTAB_LIB_WIDEN)
1941             {
1942               /* If we have been unable to open-code this by a rotation,
1943                  do it as the IOR of two shifts.  I.e., to rotate A
1944                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
1945                  where C is the bitsize of A.
1946
1947                  It is theoretically possible that the target machine might
1948                  not be able to perform either shift and hence we would
1949                  be making two libcalls rather than just the one for the
1950                  shift (similarly if IOR could not be done).  We will allow
1951                  this extremely unlikely lossage to avoid complicating the
1952                  code below.  */
1953
1954               rtx subtarget = target == shifted ? 0 : target;
1955               rtx temp1;
1956               tree type = TREE_TYPE (amount);
1957               tree new_amount = make_tree (type, op1);
1958               tree other_amount
1959                 = fold (build (MINUS_EXPR, type,
1960                                convert (type,
1961                                         build_int_2 (GET_MODE_BITSIZE (mode),
1962                                                      0)),
1963                                amount));
1964
1965               shifted = force_reg (mode, shifted);
1966
1967               temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
1968                                    mode, shifted, new_amount, subtarget, 1);
1969               temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
1970                                     mode, shifted, other_amount, 0, 1);
1971               return expand_binop (mode, ior_optab, temp, temp1, target,
1972                                    unsignedp, methods);
1973             }
1974
1975           temp = expand_binop (mode,
1976                                left ? rotl_optab : rotr_optab,
1977                                shifted, op1, target, unsignedp, methods);
1978
1979           /* If we don't have the rotate, but we are rotating by a constant
1980              that is in range, try a rotate in the opposite direction.  */
1981
1982           if (temp == 0 && GET_CODE (op1) == CONST_INT
1983               && INTVAL (op1) > 0
1984               && (unsigned int) INTVAL (op1) < GET_MODE_BITSIZE (mode))
1985             temp = expand_binop (mode,
1986                                  left ? rotr_optab : rotl_optab,
1987                                  shifted,
1988                                  GEN_INT (GET_MODE_BITSIZE (mode)
1989                                           - INTVAL (op1)),
1990                                  target, unsignedp, methods);
1991         }
1992       else if (unsignedp)
1993         temp = expand_binop (mode,
1994                              left ? ashl_optab : lshr_optab,
1995                              shifted, op1, target, unsignedp, methods);
1996
1997       /* Do arithmetic shifts.
1998          Also, if we are going to widen the operand, we can just as well
1999          use an arithmetic right-shift instead of a logical one.  */
2000       if (temp == 0 && ! rotate
2001           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2002         {
2003           enum optab_methods methods1 = methods;
2004
2005           /* If trying to widen a log shift to an arithmetic shift,
2006              don't accept an arithmetic shift of the same size.  */
2007           if (unsignedp)
2008             methods1 = OPTAB_MUST_WIDEN;
2009
2010           /* Arithmetic shift */
2011
2012           temp = expand_binop (mode,
2013                                left ? ashl_optab : ashr_optab,
2014                                shifted, op1, target, unsignedp, methods1);
2015         }
2016
2017       /* We used to try extzv here for logical right shifts, but that was
2018          only useful for one machine, the VAX, and caused poor code
2019          generation there for lshrdi3, so the code was deleted and a
2020          define_expand for lshrsi3 was added to vax.md.  */
2021     }
2022
2023   if (temp == 0)
2024     abort ();
2025   return temp;
2026 }
2027 \f
2028 enum alg_code { alg_zero, alg_m, alg_shift,
2029                   alg_add_t_m2, alg_sub_t_m2,
2030                   alg_add_factor, alg_sub_factor,
2031                   alg_add_t2_m, alg_sub_t2_m,
2032                   alg_add, alg_subtract, alg_factor, alg_shiftop };
2033
2034 /* This structure records a sequence of operations.
2035    `ops' is the number of operations recorded.
2036    `cost' is their total cost.
2037    The operations are stored in `op' and the corresponding
2038    logarithms of the integer coefficients in `log'.
2039
2040    These are the operations:
2041    alg_zero             total := 0;
2042    alg_m                total := multiplicand;
2043    alg_shift            total := total * coeff
2044    alg_add_t_m2         total := total + multiplicand * coeff;
2045    alg_sub_t_m2         total := total - multiplicand * coeff;
2046    alg_add_factor       total := total * coeff + total;
2047    alg_sub_factor       total := total * coeff - total;
2048    alg_add_t2_m         total := total * coeff + multiplicand;
2049    alg_sub_t2_m         total := total * coeff - multiplicand;
2050
2051    The first operand must be either alg_zero or alg_m.  */
2052
2053 struct algorithm
2054 {
2055   short cost;
2056   short ops;
2057   /* The size of the OP and LOG fields are not directly related to the
2058      word size, but the worst-case algorithms will be if we have few
2059      consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2060      In that case we will generate shift-by-2, add, shift-by-2, add,...,
2061      in total wordsize operations.  */
2062   enum alg_code op[MAX_BITS_PER_WORD];
2063   char log[MAX_BITS_PER_WORD];
2064 };
2065
2066 static void synth_mult                  PARAMS ((struct algorithm *,
2067                                                  unsigned HOST_WIDE_INT,
2068                                                  int));
2069 static unsigned HOST_WIDE_INT choose_multiplier PARAMS ((unsigned HOST_WIDE_INT,
2070                                                          int, int,
2071                                                          unsigned HOST_WIDE_INT *,
2072                                                          int *, int *));
2073 static unsigned HOST_WIDE_INT invert_mod2n      PARAMS ((unsigned HOST_WIDE_INT,
2074                                                          int));
2075 /* Compute and return the best algorithm for multiplying by T.
2076    The algorithm must cost less than cost_limit
2077    If retval.cost >= COST_LIMIT, no algorithm was found and all
2078    other field of the returned struct are undefined.  */
2079
2080 static void
2081 synth_mult (alg_out, t, cost_limit)
2082      struct algorithm *alg_out;
2083      unsigned HOST_WIDE_INT t;
2084      int cost_limit;
2085 {
2086   int m;
2087   struct algorithm *alg_in, *best_alg;
2088   int cost;
2089   unsigned HOST_WIDE_INT q;
2090
2091   /* Indicate that no algorithm is yet found.  If no algorithm
2092      is found, this value will be returned and indicate failure.  */
2093   alg_out->cost = cost_limit;
2094
2095   if (cost_limit <= 0)
2096     return;
2097
2098   /* t == 1 can be done in zero cost.  */
2099   if (t == 1)
2100     {
2101       alg_out->ops = 1;
2102       alg_out->cost = 0;
2103       alg_out->op[0] = alg_m;
2104       return;
2105     }
2106
2107   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2108      fail now.  */
2109   if (t == 0)
2110     {
2111       if (zero_cost >= cost_limit)
2112         return;
2113       else
2114         {
2115           alg_out->ops = 1;
2116           alg_out->cost = zero_cost;
2117           alg_out->op[0] = alg_zero;
2118           return;
2119         }
2120     }
2121
2122   /* We'll be needing a couple extra algorithm structures now.  */
2123
2124   alg_in = (struct algorithm *)alloca (sizeof (struct algorithm));
2125   best_alg = (struct algorithm *)alloca (sizeof (struct algorithm));
2126
2127   /* If we have a group of zero bits at the low-order part of T, try
2128      multiplying by the remaining bits and then doing a shift.  */
2129
2130   if ((t & 1) == 0)
2131     {
2132       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2133       if (m < BITS_PER_WORD)
2134         {
2135           q = t >> m;
2136           cost = shift_cost[m];
2137           synth_mult (alg_in, q, cost_limit - cost);
2138
2139           cost += alg_in->cost;
2140           if (cost < cost_limit)
2141             {
2142               struct algorithm *x;
2143               x = alg_in, alg_in = best_alg, best_alg = x;
2144               best_alg->log[best_alg->ops] = m;
2145               best_alg->op[best_alg->ops] = alg_shift;
2146               cost_limit = cost;
2147             }
2148         }
2149     }
2150
2151   /* If we have an odd number, add or subtract one.  */
2152   if ((t & 1) != 0)
2153     {
2154       unsigned HOST_WIDE_INT w;
2155
2156       for (w = 1; (w & t) != 0; w <<= 1)
2157         ;
2158       /* If T was -1, then W will be zero after the loop.  This is another
2159          case where T ends with ...111.  Handling this with (T + 1) and
2160          subtract 1 produces slightly better code and results in algorithm
2161          selection much faster than treating it like the ...0111 case
2162          below.  */
2163       if (w == 0
2164           || (w > 2
2165               /* Reject the case where t is 3.
2166                  Thus we prefer addition in that case.  */
2167               && t != 3))
2168         {
2169           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2170
2171           cost = add_cost;
2172           synth_mult (alg_in, t + 1, cost_limit - cost);
2173
2174           cost += alg_in->cost;
2175           if (cost < cost_limit)
2176             {
2177               struct algorithm *x;
2178               x = alg_in, alg_in = best_alg, best_alg = x;
2179               best_alg->log[best_alg->ops] = 0;
2180               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2181               cost_limit = cost;
2182             }
2183         }
2184       else
2185         {
2186           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2187
2188           cost = add_cost;
2189           synth_mult (alg_in, t - 1, cost_limit - cost);
2190
2191           cost += alg_in->cost;
2192           if (cost < cost_limit)
2193             {
2194               struct algorithm *x;
2195               x = alg_in, alg_in = best_alg, best_alg = x;
2196               best_alg->log[best_alg->ops] = 0;
2197               best_alg->op[best_alg->ops] = alg_add_t_m2;
2198               cost_limit = cost;
2199             }
2200         }
2201     }
2202
2203   /* Look for factors of t of the form
2204      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2205      If we find such a factor, we can multiply by t using an algorithm that
2206      multiplies by q, shift the result by m and add/subtract it to itself.
2207
2208      We search for large factors first and loop down, even if large factors
2209      are less probable than small; if we find a large factor we will find a
2210      good sequence quickly, and therefore be able to prune (by decreasing
2211      COST_LIMIT) the search.  */
2212
2213   for (m = floor_log2 (t - 1); m >= 2; m--)
2214     {
2215       unsigned HOST_WIDE_INT d;
2216
2217       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2218       if (t % d == 0 && t > d && m < BITS_PER_WORD)
2219         {
2220           cost = MIN (shiftadd_cost[m], add_cost + shift_cost[m]);
2221           synth_mult (alg_in, t / d, cost_limit - cost);
2222
2223           cost += alg_in->cost;
2224           if (cost < cost_limit)
2225             {
2226               struct algorithm *x;
2227               x = alg_in, alg_in = best_alg, best_alg = x;
2228               best_alg->log[best_alg->ops] = m;
2229               best_alg->op[best_alg->ops] = alg_add_factor;
2230               cost_limit = cost;
2231             }
2232           /* Other factors will have been taken care of in the recursion.  */
2233           break;
2234         }
2235
2236       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2237       if (t % d == 0 && t > d && m < BITS_PER_WORD)
2238         {
2239           cost = MIN (shiftsub_cost[m], add_cost + shift_cost[m]);
2240           synth_mult (alg_in, t / d, cost_limit - cost);
2241
2242           cost += alg_in->cost;
2243           if (cost < cost_limit)
2244             {
2245               struct algorithm *x;
2246               x = alg_in, alg_in = best_alg, best_alg = x;
2247               best_alg->log[best_alg->ops] = m;
2248               best_alg->op[best_alg->ops] = alg_sub_factor;
2249               cost_limit = cost;
2250             }
2251           break;
2252         }
2253     }
2254
2255   /* Try shift-and-add (load effective address) instructions,
2256      i.e. do a*3, a*5, a*9.  */
2257   if ((t & 1) != 0)
2258     {
2259       q = t - 1;
2260       q = q & -q;
2261       m = exact_log2 (q);
2262       if (m >= 0 && m < BITS_PER_WORD)
2263         {
2264           cost = shiftadd_cost[m];
2265           synth_mult (alg_in, (t - 1) >> m, cost_limit - cost);
2266
2267           cost += alg_in->cost;
2268           if (cost < cost_limit)
2269             {
2270               struct algorithm *x;
2271               x = alg_in, alg_in = best_alg, best_alg = x;
2272               best_alg->log[best_alg->ops] = m;
2273               best_alg->op[best_alg->ops] = alg_add_t2_m;
2274               cost_limit = cost;
2275             }
2276         }
2277
2278       q = t + 1;
2279       q = q & -q;
2280       m = exact_log2 (q);
2281       if (m >= 0 && m < BITS_PER_WORD)
2282         {
2283           cost = shiftsub_cost[m];
2284           synth_mult (alg_in, (t + 1) >> m, cost_limit - cost);
2285
2286           cost += alg_in->cost;
2287           if (cost < cost_limit)
2288             {
2289               struct algorithm *x;
2290               x = alg_in, alg_in = best_alg, best_alg = x;
2291               best_alg->log[best_alg->ops] = m;
2292               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2293               cost_limit = cost;
2294             }
2295         }
2296     }
2297
2298   /* If cost_limit has not decreased since we stored it in alg_out->cost,
2299      we have not found any algorithm.  */
2300   if (cost_limit == alg_out->cost)
2301     return;
2302
2303   /* If we are getting a too long sequence for `struct algorithm'
2304      to record, make this search fail.  */
2305   if (best_alg->ops == MAX_BITS_PER_WORD)
2306     return;
2307
2308   /* Copy the algorithm from temporary space to the space at alg_out.
2309      We avoid using structure assignment because the majority of
2310      best_alg is normally undefined, and this is a critical function.  */
2311   alg_out->ops = best_alg->ops + 1;
2312   alg_out->cost = cost_limit;
2313   memcpy (alg_out->op, best_alg->op,
2314           alg_out->ops * sizeof *alg_out->op);
2315   memcpy (alg_out->log, best_alg->log,
2316           alg_out->ops * sizeof *alg_out->log);
2317 }
2318 \f
2319 /* Perform a multiplication and return an rtx for the result.
2320    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
2321    TARGET is a suggestion for where to store the result (an rtx).
2322
2323    We check specially for a constant integer as OP1.
2324    If you want this check for OP0 as well, then before calling
2325    you should swap the two operands if OP0 would be constant.  */
2326
2327 rtx
2328 expand_mult (mode, op0, op1, target, unsignedp)
2329      enum machine_mode mode;
2330      rtx op0, op1, target;
2331      int unsignedp;
2332 {
2333   rtx const_op1 = op1;
2334
2335   /* synth_mult does an `unsigned int' multiply.  As long as the mode is
2336      less than or equal in size to `unsigned int' this doesn't matter.
2337      If the mode is larger than `unsigned int', then synth_mult works only
2338      if the constant value exactly fits in an `unsigned int' without any
2339      truncation.  This means that multiplying by negative values does
2340      not work; results are off by 2^32 on a 32 bit machine.  */
2341
2342   /* If we are multiplying in DImode, it may still be a win
2343      to try to work with shifts and adds.  */
2344   if (GET_CODE (op1) == CONST_DOUBLE
2345       && GET_MODE_CLASS (GET_MODE (op1)) == MODE_INT
2346       && HOST_BITS_PER_INT >= BITS_PER_WORD
2347       && CONST_DOUBLE_HIGH (op1) == 0)
2348     const_op1 = GEN_INT (CONST_DOUBLE_LOW (op1));
2349   else if (HOST_BITS_PER_INT < GET_MODE_BITSIZE (mode)
2350            && GET_CODE (op1) == CONST_INT
2351            && INTVAL (op1) < 0)
2352     const_op1 = 0;
2353
2354   /* We used to test optimize here, on the grounds that it's better to
2355      produce a smaller program when -O is not used.
2356      But this causes such a terrible slowdown sometimes
2357      that it seems better to use synth_mult always.  */
2358
2359   if (const_op1 && GET_CODE (const_op1) == CONST_INT
2360       && (unsignedp || ! flag_trapv))
2361     {
2362       struct algorithm alg;
2363       struct algorithm alg2;
2364       HOST_WIDE_INT val = INTVAL (op1);
2365       HOST_WIDE_INT val_so_far;
2366       rtx insn;
2367       int mult_cost;
2368       enum {basic_variant, negate_variant, add_variant} variant = basic_variant;
2369
2370       /* op0 must be register to make mult_cost match the precomputed
2371          shiftadd_cost array.  */
2372       op0 = force_reg (mode, op0);
2373
2374       /* Try to do the computation three ways: multiply by the negative of OP1
2375          and then negate, do the multiplication directly, or do multiplication
2376          by OP1 - 1.  */
2377
2378       mult_cost = rtx_cost (gen_rtx_MULT (mode, op0, op1), SET);
2379       mult_cost = MIN (12 * add_cost, mult_cost);
2380
2381       synth_mult (&alg, val, mult_cost);
2382
2383       /* This works only if the inverted value actually fits in an
2384          `unsigned int' */
2385       if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2386         {
2387           synth_mult (&alg2, - val,
2388                       (alg.cost < mult_cost ? alg.cost : mult_cost) - negate_cost);
2389           if (alg2.cost + negate_cost < alg.cost)
2390             alg = alg2, variant = negate_variant;
2391         }
2392
2393       /* This proves very useful for division-by-constant.  */
2394       synth_mult (&alg2, val - 1,
2395                   (alg.cost < mult_cost ? alg.cost : mult_cost) - add_cost);
2396       if (alg2.cost + add_cost < alg.cost)
2397         alg = alg2, variant = add_variant;
2398
2399       if (alg.cost < mult_cost)
2400         {
2401           /* We found something cheaper than a multiply insn.  */
2402           int opno;
2403           rtx accum, tem;
2404           enum machine_mode nmode;
2405
2406           op0 = protect_from_queue (op0, 0);
2407
2408           /* Avoid referencing memory over and over.
2409              For speed, but also for correctness when mem is volatile.  */
2410           if (GET_CODE (op0) == MEM)
2411             op0 = force_reg (mode, op0);
2412
2413           /* ACCUM starts out either as OP0 or as a zero, depending on
2414              the first operation.  */
2415
2416           if (alg.op[0] == alg_zero)
2417             {
2418               accum = copy_to_mode_reg (mode, const0_rtx);
2419               val_so_far = 0;
2420             }
2421           else if (alg.op[0] == alg_m)
2422             {
2423               accum = copy_to_mode_reg (mode, op0);
2424               val_so_far = 1;
2425             }
2426           else
2427             abort ();
2428
2429           for (opno = 1; opno < alg.ops; opno++)
2430             {
2431               int log = alg.log[opno];
2432               int preserve = preserve_subexpressions_p ();
2433               rtx shift_subtarget = preserve ? 0 : accum;
2434               rtx add_target
2435                 = (opno == alg.ops - 1 && target != 0 && variant != add_variant
2436                    && ! preserve)
2437                   ? target : 0;
2438               rtx accum_target = preserve ? 0 : accum;
2439
2440               switch (alg.op[opno])
2441                 {
2442                 case alg_shift:
2443                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2444                                         build_int_2 (log, 0), NULL_RTX, 0);
2445                   val_so_far <<= log;
2446                   break;
2447
2448                 case alg_add_t_m2:
2449                   tem = expand_shift (LSHIFT_EXPR, mode, op0,
2450                                       build_int_2 (log, 0), NULL_RTX, 0);
2451                   accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2452                                          add_target
2453                                          ? add_target : accum_target);
2454                   val_so_far += (HOST_WIDE_INT) 1 << log;
2455                   break;
2456
2457                 case alg_sub_t_m2:
2458                   tem = expand_shift (LSHIFT_EXPR, mode, op0,
2459                                       build_int_2 (log, 0), NULL_RTX, 0);
2460                   accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2461                                          add_target
2462                                          ? add_target : accum_target);
2463                   val_so_far -= (HOST_WIDE_INT) 1 << log;
2464                   break;
2465
2466                 case alg_add_t2_m:
2467                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2468                                         build_int_2 (log, 0), shift_subtarget,
2469                                         0);
2470                   accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2471                                          add_target
2472                                          ? add_target : accum_target);
2473                   val_so_far = (val_so_far << log) + 1;
2474                   break;
2475
2476                 case alg_sub_t2_m:
2477                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2478                                         build_int_2 (log, 0), shift_subtarget,
2479                                         0);
2480                   accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2481                                          add_target
2482                                          ? add_target : accum_target);
2483                   val_so_far = (val_so_far << log) - 1;
2484                   break;
2485
2486                 case alg_add_factor:
2487                   tem = expand_shift (LSHIFT_EXPR, mode, accum,
2488                                       build_int_2 (log, 0), NULL_RTX, 0);
2489                   accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2490                                          add_target
2491                                          ? add_target : accum_target);
2492                   val_so_far += val_so_far << log;
2493                   break;
2494
2495                 case alg_sub_factor:
2496                   tem = expand_shift (LSHIFT_EXPR, mode, accum,
2497                                       build_int_2 (log, 0), NULL_RTX, 0);
2498                   accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2499                                          (add_target ? add_target
2500                                           : preserve ? 0 : tem));
2501                   val_so_far = (val_so_far << log) - val_so_far;
2502                   break;
2503
2504                 default:
2505                   abort ();
2506                 }
2507
2508               /* Write a REG_EQUAL note on the last insn so that we can cse
2509                  multiplication sequences.  Note that if ACCUM is a SUBREG,
2510                  we've set the inner register and must properly indicate
2511                  that.  */
2512
2513               tem = op0, nmode = mode;
2514               if (GET_CODE (accum) == SUBREG)
2515                 {
2516                   nmode = GET_MODE (SUBREG_REG (accum));
2517                   tem = gen_lowpart (nmode, op0);
2518                 }
2519
2520               insn = get_last_insn ();
2521               set_unique_reg_note (insn,
2522                                    REG_EQUAL,
2523                                    gen_rtx_MULT (nmode, tem,
2524                                                  GEN_INT (val_so_far)));
2525             }
2526
2527           if (variant == negate_variant)
2528             {
2529               val_so_far = - val_so_far;
2530               accum = expand_unop (mode, neg_optab, accum, target, 0);
2531             }
2532           else if (variant == add_variant)
2533             {
2534               val_so_far = val_so_far + 1;
2535               accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
2536             }
2537
2538           if (val != val_so_far)
2539             abort ();
2540
2541           return accum;
2542         }
2543     }
2544
2545   /* This used to use umul_optab if unsigned, but for non-widening multiply
2546      there is no difference between signed and unsigned.  */
2547   op0 = expand_binop (mode,
2548                       ! unsignedp
2549                        && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
2550                        ? smulv_optab : smul_optab,
2551                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
2552   if (op0 == 0)
2553     abort ();
2554   return op0;
2555 }
2556 \f
2557 /* Return the smallest n such that 2**n >= X.  */
2558
2559 int
2560 ceil_log2 (x)
2561      unsigned HOST_WIDE_INT x;
2562 {
2563   return floor_log2 (x - 1) + 1;
2564 }
2565
2566 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
2567    replace division by D, and put the least significant N bits of the result
2568    in *MULTIPLIER_PTR and return the most significant bit.
2569
2570    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
2571    needed precision is in PRECISION (should be <= N).
2572
2573    PRECISION should be as small as possible so this function can choose
2574    multiplier more freely.
2575
2576    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
2577    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
2578
2579    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
2580    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
2581
2582 static
2583 unsigned HOST_WIDE_INT
2584 choose_multiplier (d, n, precision, multiplier_ptr, post_shift_ptr, lgup_ptr)
2585      unsigned HOST_WIDE_INT d;
2586      int n;
2587      int precision;
2588      unsigned HOST_WIDE_INT *multiplier_ptr;
2589      int *post_shift_ptr;
2590      int *lgup_ptr;
2591 {
2592   HOST_WIDE_INT mhigh_hi, mlow_hi;
2593   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
2594   int lgup, post_shift;
2595   int pow, pow2;
2596   unsigned HOST_WIDE_INT nl, dummy1;
2597   HOST_WIDE_INT nh, dummy2;
2598
2599   /* lgup = ceil(log2(divisor)); */
2600   lgup = ceil_log2 (d);
2601
2602   if (lgup > n)
2603     abort ();
2604
2605   pow = n + lgup;
2606   pow2 = n + lgup - precision;
2607
2608   if (pow == 2 * HOST_BITS_PER_WIDE_INT)
2609     {
2610       /* We could handle this with some effort, but this case is much better
2611          handled directly with a scc insn, so rely on caller using that.  */
2612       abort ();
2613     }
2614
2615   /* mlow = 2^(N + lgup)/d */
2616  if (pow >= HOST_BITS_PER_WIDE_INT)
2617     {
2618       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
2619       nl = 0;
2620     }
2621   else
2622     {
2623       nh = 0;
2624       nl = (unsigned HOST_WIDE_INT) 1 << pow;
2625     }
2626   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2627                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
2628
2629   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
2630   if (pow2 >= HOST_BITS_PER_WIDE_INT)
2631     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
2632   else
2633     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
2634   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2635                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
2636
2637   if (mhigh_hi && nh - d >= d)
2638     abort ();
2639   if (mhigh_hi > 1 || mlow_hi > 1)
2640     abort ();
2641   /* assert that mlow < mhigh.  */
2642   if (! (mlow_hi < mhigh_hi || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo)))
2643     abort ();
2644
2645   /* If precision == N, then mlow, mhigh exceed 2^N
2646      (but they do not exceed 2^(N+1)).  */
2647
2648   /* Reduce to lowest terms */
2649   for (post_shift = lgup; post_shift > 0; post_shift--)
2650     {
2651       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
2652       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
2653       if (ml_lo >= mh_lo)
2654         break;
2655
2656       mlow_hi = 0;
2657       mlow_lo = ml_lo;
2658       mhigh_hi = 0;
2659       mhigh_lo = mh_lo;
2660     }
2661
2662   *post_shift_ptr = post_shift;
2663   *lgup_ptr = lgup;
2664   if (n < HOST_BITS_PER_WIDE_INT)
2665     {
2666       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
2667       *multiplier_ptr = mhigh_lo & mask;
2668       return mhigh_lo >= mask;
2669     }
2670   else
2671     {
2672       *multiplier_ptr = mhigh_lo;
2673       return mhigh_hi;
2674     }
2675 }
2676
2677 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
2678    congruent to 1 (mod 2**N).  */
2679
2680 static unsigned HOST_WIDE_INT
2681 invert_mod2n (x, n)
2682      unsigned HOST_WIDE_INT x;
2683      int n;
2684 {
2685   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
2686
2687   /* The algorithm notes that the choice y = x satisfies
2688      x*y == 1 mod 2^3, since x is assumed odd.
2689      Each iteration doubles the number of bits of significance in y.  */
2690
2691   unsigned HOST_WIDE_INT mask;
2692   unsigned HOST_WIDE_INT y = x;
2693   int nbit = 3;
2694
2695   mask = (n == HOST_BITS_PER_WIDE_INT
2696           ? ~(unsigned HOST_WIDE_INT) 0
2697           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
2698
2699   while (nbit < n)
2700     {
2701       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
2702       nbit *= 2;
2703     }
2704   return y;
2705 }
2706
2707 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
2708    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
2709    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
2710    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
2711    become signed.
2712
2713    The result is put in TARGET if that is convenient.
2714
2715    MODE is the mode of operation.  */
2716
2717 rtx
2718 expand_mult_highpart_adjust (mode, adj_operand, op0, op1, target, unsignedp)
2719      enum machine_mode mode;
2720      rtx adj_operand, op0, op1, target;
2721      int unsignedp;
2722 {
2723   rtx tem;
2724   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
2725
2726   tem = expand_shift (RSHIFT_EXPR, mode, op0,
2727                       build_int_2 (GET_MODE_BITSIZE (mode) - 1, 0),
2728                       NULL_RTX, 0);
2729   tem = expand_and (mode, tem, op1, NULL_RTX);
2730   adj_operand
2731     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
2732                      adj_operand);
2733
2734   tem = expand_shift (RSHIFT_EXPR, mode, op1,
2735                       build_int_2 (GET_MODE_BITSIZE (mode) - 1, 0),
2736                       NULL_RTX, 0);
2737   tem = expand_and (mode, tem, op0, NULL_RTX);
2738   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
2739                           target);
2740
2741   return target;
2742 }
2743
2744 /* Emit code to multiply OP0 and CNST1, putting the high half of the result
2745    in TARGET if that is convenient, and return where the result is.  If the
2746    operation can not be performed, 0 is returned.
2747
2748    MODE is the mode of operation and result.
2749
2750    UNSIGNEDP nonzero means unsigned multiply.
2751
2752    MAX_COST is the total allowed cost for the expanded RTL.  */
2753
2754 rtx
2755 expand_mult_highpart (mode, op0, cnst1, target, unsignedp, max_cost)
2756      enum machine_mode mode;
2757      rtx op0, target;
2758      unsigned HOST_WIDE_INT cnst1;
2759      int unsignedp;
2760      int max_cost;
2761 {
2762   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
2763   optab mul_highpart_optab;
2764   optab moptab;
2765   rtx tem;
2766   int size = GET_MODE_BITSIZE (mode);
2767   rtx op1, wide_op1;
2768
2769   /* We can't support modes wider than HOST_BITS_PER_INT.  */
2770   if (size > HOST_BITS_PER_WIDE_INT)
2771     abort ();
2772
2773   op1 = GEN_INT (trunc_int_for_mode (cnst1, mode));
2774
2775   wide_op1
2776     = immed_double_const (cnst1,
2777                           (unsignedp
2778                            ? (HOST_WIDE_INT) 0
2779                            : -(cnst1 >> (HOST_BITS_PER_WIDE_INT - 1))),
2780                           wider_mode);
2781
2782   /* expand_mult handles constant multiplication of word_mode
2783      or narrower.  It does a poor job for large modes.  */
2784   if (size < BITS_PER_WORD
2785       && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
2786     {
2787       /* We have to do this, since expand_binop doesn't do conversion for
2788          multiply.  Maybe change expand_binop to handle widening multiply?  */
2789       op0 = convert_to_mode (wider_mode, op0, unsignedp);
2790
2791       /* We know that this can't have signed overflow, so pretend this is
2792          an unsigned multiply.  */
2793       tem = expand_mult (wider_mode, op0, wide_op1, NULL_RTX, 0);
2794       tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2795                           build_int_2 (size, 0), NULL_RTX, 1);
2796       return convert_modes (mode, wider_mode, tem, unsignedp);
2797     }
2798
2799   if (target == 0)
2800     target = gen_reg_rtx (mode);
2801
2802   /* Firstly, try using a multiplication insn that only generates the needed
2803      high part of the product, and in the sign flavor of unsignedp.  */
2804   if (mul_highpart_cost[(int) mode] < max_cost)
2805     {
2806       mul_highpart_optab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
2807       target = expand_binop (mode, mul_highpart_optab,
2808                              op0, op1, target, unsignedp, OPTAB_DIRECT);
2809       if (target)
2810         return target;
2811     }
2812
2813   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
2814      Need to adjust the result after the multiplication.  */
2815   if (size - 1 < BITS_PER_WORD
2816       && (mul_highpart_cost[(int) mode] + 2 * shift_cost[size-1] + 4 * add_cost
2817           < max_cost))
2818     {
2819       mul_highpart_optab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
2820       target = expand_binop (mode, mul_highpart_optab,
2821                              op0, op1, target, unsignedp, OPTAB_DIRECT);
2822       if (target)
2823         /* We used the wrong signedness.  Adjust the result.  */
2824         return expand_mult_highpart_adjust (mode, target, op0,
2825                                             op1, target, unsignedp);
2826     }
2827
2828   /* Try widening multiplication.  */
2829   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
2830   if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2831       && mul_widen_cost[(int) wider_mode] < max_cost)
2832     {
2833       op1 = force_reg (mode, op1);
2834       goto try;
2835     }
2836
2837   /* Try widening the mode and perform a non-widening multiplication.  */
2838   moptab = smul_optab;
2839   if (smul_optab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2840       && size - 1 < BITS_PER_WORD
2841       && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
2842     {
2843       op1 = wide_op1;
2844       goto try;
2845     }
2846
2847   /* Try widening multiplication of opposite signedness, and adjust.  */
2848   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
2849   if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2850       && size - 1 < BITS_PER_WORD
2851       && (mul_widen_cost[(int) wider_mode]
2852           + 2 * shift_cost[size-1] + 4 * add_cost < max_cost))
2853     {
2854       rtx regop1 = force_reg (mode, op1);
2855       tem = expand_binop (wider_mode, moptab, op0, regop1,
2856                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
2857       if (tem != 0)
2858         {
2859           /* Extract the high half of the just generated product.  */
2860           tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2861                               build_int_2 (size, 0), NULL_RTX, 1);
2862           tem = convert_modes (mode, wider_mode, tem, unsignedp);
2863           /* We used the wrong signedness.  Adjust the result.  */
2864           return expand_mult_highpart_adjust (mode, tem, op0, op1,
2865                                               target, unsignedp);
2866         }
2867     }
2868
2869   return 0;
2870
2871  try:
2872   /* Pass NULL_RTX as target since TARGET has wrong mode.  */
2873   tem = expand_binop (wider_mode, moptab, op0, op1,
2874                       NULL_RTX, unsignedp, OPTAB_WIDEN);
2875   if (tem == 0)
2876     return 0;
2877
2878   /* Extract the high half of the just generated product.  */
2879   if (mode == word_mode)
2880     {
2881       return gen_highpart (mode, tem);
2882     }
2883   else
2884     {
2885       tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2886                           build_int_2 (size, 0), NULL_RTX, 1);
2887       return convert_modes (mode, wider_mode, tem, unsignedp);
2888     }
2889 }
2890 \f
2891 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
2892    if that is convenient, and returning where the result is.
2893    You may request either the quotient or the remainder as the result;
2894    specify REM_FLAG nonzero to get the remainder.
2895
2896    CODE is the expression code for which kind of division this is;
2897    it controls how rounding is done.  MODE is the machine mode to use.
2898    UNSIGNEDP nonzero means do unsigned division.  */
2899
2900 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
2901    and then correct it by or'ing in missing high bits
2902    if result of ANDI is nonzero.
2903    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
2904    This could optimize to a bfexts instruction.
2905    But C doesn't use these operations, so their optimizations are
2906    left for later.  */
2907 /* ??? For modulo, we don't actually need the highpart of the first product,
2908    the low part will do nicely.  And for small divisors, the second multiply
2909    can also be a low-part only multiply or even be completely left out.
2910    E.g. to calculate the remainder of a division by 3 with a 32 bit
2911    multiply, multiply with 0x55555556 and extract the upper two bits;
2912    the result is exact for inputs up to 0x1fffffff.
2913    The input range can be reduced by using cross-sum rules.
2914    For odd divisors >= 3, the following table gives right shift counts
2915    so that if an number is shifted by an integer multiple of the given
2916    amount, the remainder stays the same:
2917    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
2918    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
2919    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
2920    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
2921    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
2922
2923    Cross-sum rules for even numbers can be derived by leaving as many bits
2924    to the right alone as the divisor has zeros to the right.
2925    E.g. if x is an unsigned 32 bit number:
2926    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
2927    */
2928
2929 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
2930
2931 rtx
2932 expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
2933      int rem_flag;
2934      enum tree_code code;
2935      enum machine_mode mode;
2936      rtx op0, op1, target;
2937      int unsignedp;
2938 {
2939   enum machine_mode compute_mode;
2940   rtx tquotient;
2941   rtx quotient = 0, remainder = 0;
2942   rtx last;
2943   int size;
2944   rtx insn, set;
2945   optab optab1, optab2;
2946   int op1_is_constant, op1_is_pow2;
2947   int max_cost, extra_cost;
2948   static HOST_WIDE_INT last_div_const = 0;
2949
2950   op1_is_constant = GET_CODE (op1) == CONST_INT;
2951   op1_is_pow2 = (op1_is_constant
2952                  && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
2953                       || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1))))));
2954
2955   /*
2956      This is the structure of expand_divmod:
2957
2958      First comes code to fix up the operands so we can perform the operations
2959      correctly and efficiently.
2960
2961      Second comes a switch statement with code specific for each rounding mode.
2962      For some special operands this code emits all RTL for the desired
2963      operation, for other cases, it generates only a quotient and stores it in
2964      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
2965      to indicate that it has not done anything.
2966
2967      Last comes code that finishes the operation.  If QUOTIENT is set and
2968      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
2969      QUOTIENT is not set, it is computed using trunc rounding.
2970
2971      We try to generate special code for division and remainder when OP1 is a
2972      constant.  If |OP1| = 2**n we can use shifts and some other fast
2973      operations.  For other values of OP1, we compute a carefully selected
2974      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
2975      by m.
2976
2977      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
2978      half of the product.  Different strategies for generating the product are
2979      implemented in expand_mult_highpart.
2980
2981      If what we actually want is the remainder, we generate that by another
2982      by-constant multiplication and a subtraction.  */
2983
2984   /* We shouldn't be called with OP1 == const1_rtx, but some of the
2985      code below will malfunction if we are, so check here and handle
2986      the special case if so.  */
2987   if (op1 == const1_rtx)
2988     return rem_flag ? const0_rtx : op0;
2989
2990     /* When dividing by -1, we could get an overflow.
2991      negv_optab can handle overflows.  */
2992   if (! unsignedp && op1 == constm1_rtx)
2993     {
2994       if (rem_flag)
2995         return const0_rtx;
2996       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
2997                         ? negv_optab : neg_optab, op0, target, 0);
2998     }
2999
3000   if (target
3001       /* Don't use the function value register as a target
3002          since we have to read it as well as write it,
3003          and function-inlining gets confused by this.  */
3004       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3005           /* Don't clobber an operand while doing a multi-step calculation.  */
3006           || ((rem_flag || op1_is_constant)
3007               && (reg_mentioned_p (target, op0)
3008                   || (GET_CODE (op0) == MEM && GET_CODE (target) == MEM)))
3009           || reg_mentioned_p (target, op1)
3010           || (GET_CODE (op1) == MEM && GET_CODE (target) == MEM)))
3011     target = 0;
3012
3013   /* Get the mode in which to perform this computation.  Normally it will
3014      be MODE, but sometimes we can't do the desired operation in MODE.
3015      If so, pick a wider mode in which we can do the operation.  Convert
3016      to that mode at the start to avoid repeated conversions.
3017
3018      First see what operations we need.  These depend on the expression
3019      we are evaluating.  (We assume that divxx3 insns exist under the
3020      same conditions that modxx3 insns and that these insns don't normally
3021      fail.  If these assumptions are not correct, we may generate less
3022      efficient code in some cases.)
3023
3024      Then see if we find a mode in which we can open-code that operation
3025      (either a division, modulus, or shift).  Finally, check for the smallest
3026      mode for which we can do the operation with a library call.  */
3027
3028   /* We might want to refine this now that we have division-by-constant
3029      optimization.  Since expand_mult_highpart tries so many variants, it is
3030      not straightforward to generalize this.  Maybe we should make an array
3031      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3032
3033   optab1 = (op1_is_pow2 ? (unsignedp ? lshr_optab : ashr_optab)
3034             : (unsignedp ? udiv_optab : sdiv_optab));
3035   optab2 = (op1_is_pow2 ? optab1 : (unsignedp ? udivmod_optab : sdivmod_optab));
3036
3037   for (compute_mode = mode; compute_mode != VOIDmode;
3038        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3039     if (optab1->handlers[(int) compute_mode].insn_code != CODE_FOR_nothing
3040         || optab2->handlers[(int) compute_mode].insn_code != CODE_FOR_nothing)
3041       break;
3042
3043   if (compute_mode == VOIDmode)
3044     for (compute_mode = mode; compute_mode != VOIDmode;
3045          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3046       if (optab1->handlers[(int) compute_mode].libfunc
3047           || optab2->handlers[(int) compute_mode].libfunc)
3048         break;
3049
3050   /* If we still couldn't find a mode, use MODE, but we'll probably abort
3051      in expand_binop.  */
3052   if (compute_mode == VOIDmode)
3053     compute_mode = mode;
3054
3055   if (target && GET_MODE (target) == compute_mode)
3056     tquotient = target;
3057   else
3058     tquotient = gen_reg_rtx (compute_mode);
3059
3060   size = GET_MODE_BITSIZE (compute_mode);
3061 #if 0
3062   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3063      (mode), and thereby get better code when OP1 is a constant.  Do that
3064      later.  It will require going over all usages of SIZE below.  */
3065   size = GET_MODE_BITSIZE (mode);
3066 #endif
3067
3068   /* Only deduct something for a REM if the last divide done was
3069      for a different constant.   Then set the constant of the last
3070      divide.  */
3071   max_cost = div_cost[(int) compute_mode]
3072     - (rem_flag && ! (last_div_const != 0 && op1_is_constant
3073                       && INTVAL (op1) == last_div_const)
3074        ? mul_cost[(int) compute_mode] + add_cost : 0);
3075
3076   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3077
3078   /* Now convert to the best mode to use.  */
3079   if (compute_mode != mode)
3080     {
3081       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3082       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3083
3084       /* convert_modes may have placed op1 into a register, so we
3085          must recompute the following.  */
3086       op1_is_constant = GET_CODE (op1) == CONST_INT;
3087       op1_is_pow2 = (op1_is_constant
3088                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3089                           || (! unsignedp
3090                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3091     }
3092
3093   /* If one of the operands is a volatile MEM, copy it into a register.  */
3094
3095   if (GET_CODE (op0) == MEM && MEM_VOLATILE_P (op0))
3096     op0 = force_reg (compute_mode, op0);
3097   if (GET_CODE (op1) == MEM && MEM_VOLATILE_P (op1))
3098     op1 = force_reg (compute_mode, op1);
3099
3100   /* If we need the remainder or if OP1 is constant, we need to
3101      put OP0 in a register in case it has any queued subexpressions.  */
3102   if (rem_flag || op1_is_constant)
3103     op0 = force_reg (compute_mode, op0);
3104
3105   last = get_last_insn ();
3106
3107   /* Promote floor rounding to trunc rounding for unsigned operations.  */
3108   if (unsignedp)
3109     {
3110       if (code == FLOOR_DIV_EXPR)
3111         code = TRUNC_DIV_EXPR;
3112       if (code == FLOOR_MOD_EXPR)
3113         code = TRUNC_MOD_EXPR;
3114       if (code == EXACT_DIV_EXPR && op1_is_pow2)
3115         code = TRUNC_DIV_EXPR;
3116     }
3117
3118   if (op1 != const0_rtx)
3119     switch (code)
3120       {
3121       case TRUNC_MOD_EXPR:
3122       case TRUNC_DIV_EXPR:
3123         if (op1_is_constant)
3124           {
3125             if (unsignedp)
3126               {
3127                 unsigned HOST_WIDE_INT mh, ml;
3128                 int pre_shift, post_shift;
3129                 int dummy;
3130                 unsigned HOST_WIDE_INT d = INTVAL (op1);
3131
3132                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3133                   {
3134                     pre_shift = floor_log2 (d);
3135                     if (rem_flag)
3136                       {
3137                         remainder
3138                           = expand_binop (compute_mode, and_optab, op0,
3139                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3140                                           remainder, 1,
3141                                           OPTAB_LIB_WIDEN);
3142                         if (remainder)
3143                           return gen_lowpart (mode, remainder);
3144                       }
3145                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3146                                              build_int_2 (pre_shift, 0),
3147                                              tquotient, 1);
3148                   }
3149                 else if (size <= HOST_BITS_PER_WIDE_INT)
3150                   {
3151                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
3152                       {
3153                         /* Most significant bit of divisor is set; emit an scc
3154                            insn.  */
3155                         quotient = emit_store_flag (tquotient, GEU, op0, op1,
3156                                                     compute_mode, 1, 1);
3157                         if (quotient == 0)
3158                           goto fail1;
3159                       }
3160                     else
3161                       {
3162                         /* Find a suitable multiplier and right shift count
3163                            instead of multiplying with D.  */
3164
3165                         mh = choose_multiplier (d, size, size,
3166                                                 &ml, &post_shift, &dummy);
3167
3168                         /* If the suggested multiplier is more than SIZE bits,
3169                            we can do better for even divisors, using an
3170                            initial right shift.  */
3171                         if (mh != 0 && (d & 1) == 0)
3172                           {
3173                             pre_shift = floor_log2 (d & -d);
3174                             mh = choose_multiplier (d >> pre_shift, size,
3175                                                     size - pre_shift,
3176                                                     &ml, &post_shift, &dummy);
3177                             if (mh)
3178                               abort ();
3179                           }
3180                         else
3181                           pre_shift = 0;
3182
3183                         if (mh != 0)
3184                           {
3185                             rtx t1, t2, t3, t4;
3186
3187                             if (post_shift - 1 >= BITS_PER_WORD)
3188                               goto fail1;
3189
3190                             extra_cost = (shift_cost[post_shift - 1]
3191                                           + shift_cost[1] + 2 * add_cost);
3192                             t1 = expand_mult_highpart (compute_mode, op0, ml,
3193                                                        NULL_RTX, 1,
3194                                                        max_cost - extra_cost);
3195                             if (t1 == 0)
3196                               goto fail1;
3197                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
3198                                                                op0, t1),
3199                                                 NULL_RTX);
3200                             t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3201                                                build_int_2 (1, 0), NULL_RTX,1);
3202                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
3203                                                               t1, t3),
3204                                                 NULL_RTX);
3205                             quotient
3206                               = expand_shift (RSHIFT_EXPR, compute_mode, t4,
3207                                               build_int_2 (post_shift - 1, 0),
3208                                               tquotient, 1);
3209                           }
3210                         else
3211                           {
3212                             rtx t1, t2;
3213
3214                             if (pre_shift >= BITS_PER_WORD
3215                                 || post_shift >= BITS_PER_WORD)
3216                               goto fail1;
3217
3218                             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3219                                                build_int_2 (pre_shift, 0),
3220                                                NULL_RTX, 1);
3221                             extra_cost = (shift_cost[pre_shift]
3222                                           + shift_cost[post_shift]);
3223                             t2 = expand_mult_highpart (compute_mode, t1, ml,
3224                                                        NULL_RTX, 1,
3225                                                        max_cost - extra_cost);
3226                             if (t2 == 0)
3227                               goto fail1;
3228                             quotient
3229                               = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3230                                               build_int_2 (post_shift, 0),
3231                                               tquotient, 1);
3232                           }
3233                       }
3234                   }
3235                 else            /* Too wide mode to use tricky code */
3236                   break;
3237
3238                 insn = get_last_insn ();
3239                 if (insn != last
3240                     && (set = single_set (insn)) != 0
3241                     && SET_DEST (set) == quotient)
3242                   set_unique_reg_note (insn,
3243                                        REG_EQUAL,
3244                                        gen_rtx_UDIV (compute_mode, op0, op1));
3245               }
3246             else                /* TRUNC_DIV, signed */
3247               {
3248                 unsigned HOST_WIDE_INT ml;
3249                 int lgup, post_shift;
3250                 HOST_WIDE_INT d = INTVAL (op1);
3251                 unsigned HOST_WIDE_INT abs_d = d >= 0 ? d : -d;
3252
3253                 /* n rem d = n rem -d */
3254                 if (rem_flag && d < 0)
3255                   {
3256                     d = abs_d;
3257                     op1 = GEN_INT (trunc_int_for_mode (abs_d, compute_mode));
3258                   }
3259
3260                 if (d == 1)
3261                   quotient = op0;
3262                 else if (d == -1)
3263                   quotient = expand_unop (compute_mode, neg_optab, op0,
3264                                           tquotient, 0);
3265                 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
3266                   {
3267                     /* This case is not handled correctly below.  */
3268                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
3269                                                 compute_mode, 1, 1);
3270                     if (quotient == 0)
3271                       goto fail1;
3272                   }
3273                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
3274                          && (rem_flag ? smod_pow2_cheap : sdiv_pow2_cheap)
3275                          /* ??? The cheap metric is computed only for
3276                             word_mode.  If this operation is wider, this may
3277                             not be so.  Assume true if the optab has an
3278                             expander for this mode.  */
3279                          && (((rem_flag ? smod_optab : sdiv_optab)
3280                               ->handlers[(int) compute_mode].insn_code
3281                               != CODE_FOR_nothing)
3282                              || (sdivmod_optab->handlers[(int) compute_mode]
3283                                  .insn_code != CODE_FOR_nothing)))
3284                   ;
3285                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
3286                   {
3287                     lgup = floor_log2 (abs_d);
3288                     if (BRANCH_COST < 1 || (abs_d != 2 && BRANCH_COST < 3))
3289                       {
3290                         rtx label = gen_label_rtx ();
3291                         rtx t1;
3292
3293                         t1 = copy_to_mode_reg (compute_mode, op0);
3294                         do_cmp_and_jump (t1, const0_rtx, GE,
3295                                          compute_mode, label);
3296                         expand_inc (t1, GEN_INT (trunc_int_for_mode
3297                                                  (abs_d - 1, compute_mode)));
3298                         emit_label (label);
3299                         quotient = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3300                                                  build_int_2 (lgup, 0),
3301                                                  tquotient, 0);
3302                       }
3303                     else
3304                       {
3305                         rtx t1, t2, t3;
3306                         t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3307                                            build_int_2 (size - 1, 0),
3308                                            NULL_RTX, 0);
3309                         t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3310                                            build_int_2 (size - lgup, 0),
3311                                            NULL_RTX, 1);
3312                         t3 = force_operand (gen_rtx_PLUS (compute_mode,
3313                                                           op0, t2),
3314                                             NULL_RTX);
3315                         quotient = expand_shift (RSHIFT_EXPR, compute_mode, t3,
3316                                                  build_int_2 (lgup, 0),
3317                                                  tquotient, 0);
3318                       }
3319
3320                     /* We have computed OP0 / abs(OP1).  If OP1 is negative, negate
3321                        the quotient.  */
3322                     if (d < 0)
3323                       {
3324                         insn = get_last_insn ();
3325                         if (insn != last
3326                             && (set = single_set (insn)) != 0
3327                             && SET_DEST (set) == quotient
3328                             && abs_d < ((unsigned HOST_WIDE_INT) 1
3329                                         << (HOST_BITS_PER_WIDE_INT - 1)))
3330                           set_unique_reg_note (insn,
3331                                                REG_EQUAL,
3332                                                gen_rtx_DIV (compute_mode,
3333                                                             op0,
3334                                                             GEN_INT
3335                                                             (trunc_int_for_mode
3336                                                              (abs_d,
3337                                                               compute_mode))));
3338
3339                         quotient = expand_unop (compute_mode, neg_optab,
3340                                                 quotient, quotient, 0);
3341                       }
3342                   }
3343                 else if (size <= HOST_BITS_PER_WIDE_INT)
3344                   {
3345                     choose_multiplier (abs_d, size, size - 1,
3346                                        &ml, &post_shift, &lgup);
3347                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
3348                       {
3349                         rtx t1, t2, t3;
3350
3351                         if (post_shift >= BITS_PER_WORD
3352                             || size - 1 >= BITS_PER_WORD)
3353                           goto fail1;
3354
3355                         extra_cost = (shift_cost[post_shift]
3356                                       + shift_cost[size - 1] + add_cost);
3357                         t1 = expand_mult_highpart (compute_mode, op0, ml,
3358                                                    NULL_RTX, 0,
3359                                                    max_cost - extra_cost);
3360                         if (t1 == 0)
3361                           goto fail1;
3362                         t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3363                                            build_int_2 (post_shift, 0), NULL_RTX, 0);
3364                         t3 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3365                                            build_int_2 (size - 1, 0), NULL_RTX, 0);
3366                         if (d < 0)
3367                           quotient
3368                             = force_operand (gen_rtx_MINUS (compute_mode,
3369                                                             t3, t2),
3370                                              tquotient);
3371                         else
3372                           quotient
3373                             = force_operand (gen_rtx_MINUS (compute_mode,
3374                                                             t2, t3),
3375                                              tquotient);
3376                       }
3377                     else
3378                       {
3379                         rtx t1, t2, t3, t4;
3380
3381                         if (post_shift >= BITS_PER_WORD
3382                             || size - 1 >= BITS_PER_WORD)
3383                           goto fail1;
3384
3385                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
3386                         extra_cost = (shift_cost[post_shift]
3387                                       + shift_cost[size - 1] + 2 * add_cost);
3388                         t1 = expand_mult_highpart (compute_mode, op0, ml,
3389                                                    NULL_RTX, 0,
3390                                                    max_cost - extra_cost);
3391                         if (t1 == 0)
3392                           goto fail1;
3393                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
3394                                                           t1, op0),
3395                                             NULL_RTX);
3396                         t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3397                                            build_int_2 (post_shift, 0),
3398                                            NULL_RTX, 0);
3399                         t4 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3400                                            build_int_2 (size - 1, 0),
3401                                            NULL_RTX, 0);
3402                         if (d < 0)
3403                           quotient
3404                             = force_operand (gen_rtx_MINUS (compute_mode,
3405                                                             t4, t3),
3406                                              tquotient);
3407                         else
3408                           quotient
3409                             = force_operand (gen_rtx_MINUS (compute_mode,
3410                                                             t3, t4),
3411                                              tquotient);
3412                       }
3413                   }
3414                 else            /* Too wide mode to use tricky code */
3415                   break;
3416
3417                 insn = get_last_insn ();
3418                 if (insn != last
3419                     && (set = single_set (insn)) != 0
3420                     && SET_DEST (set) == quotient)
3421                   set_unique_reg_note (insn,
3422                                        REG_EQUAL,
3423                                        gen_rtx_DIV (compute_mode, op0, op1));
3424               }
3425             break;
3426           }
3427       fail1:
3428         delete_insns_since (last);
3429         break;
3430
3431       case FLOOR_DIV_EXPR:
3432       case FLOOR_MOD_EXPR:
3433       /* We will come here only for signed operations.  */
3434         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
3435           {
3436             unsigned HOST_WIDE_INT mh, ml;
3437             int pre_shift, lgup, post_shift;
3438             HOST_WIDE_INT d = INTVAL (op1);
3439
3440             if (d > 0)
3441               {
3442                 /* We could just as easily deal with negative constants here,
3443                    but it does not seem worth the trouble for GCC 2.6.  */
3444                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3445                   {
3446                     pre_shift = floor_log2 (d);
3447                     if (rem_flag)
3448                       {
3449                         remainder = expand_binop (compute_mode, and_optab, op0,
3450                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3451                                                   remainder, 0, OPTAB_LIB_WIDEN);
3452                         if (remainder)
3453                           return gen_lowpart (mode, remainder);
3454                       }
3455                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3456                                              build_int_2 (pre_shift, 0),
3457                                              tquotient, 0);
3458                   }
3459                 else
3460                   {
3461                     rtx t1, t2, t3, t4;
3462
3463                     mh = choose_multiplier (d, size, size - 1,
3464                                             &ml, &post_shift, &lgup);
3465                     if (mh)
3466                       abort ();
3467
3468                     if (post_shift < BITS_PER_WORD
3469                         && size - 1 < BITS_PER_WORD)
3470                       {
3471                         t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3472                                            build_int_2 (size - 1, 0),
3473                                            NULL_RTX, 0);
3474                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
3475                                            NULL_RTX, 0, OPTAB_WIDEN);
3476                         extra_cost = (shift_cost[post_shift]
3477                                       + shift_cost[size - 1] + 2 * add_cost);
3478                         t3 = expand_mult_highpart (compute_mode, t2, ml,
3479                                                    NULL_RTX, 1,
3480                                                    max_cost - extra_cost);
3481                         if (t3 != 0)
3482                           {
3483                             t4 = expand_shift (RSHIFT_EXPR, compute_mode, t3,
3484                                                build_int_2 (post_shift, 0),
3485                                                NULL_RTX, 1);
3486                             quotient = expand_binop (compute_mode, xor_optab,
3487                                                      t4, t1, tquotient, 0,
3488                                                      OPTAB_WIDEN);
3489                           }
3490                       }
3491                   }
3492               }
3493             else
3494               {
3495                 rtx nsign, t1, t2, t3, t4;
3496                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
3497                                                   op0, constm1_rtx), NULL_RTX);
3498                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
3499                                    0, OPTAB_WIDEN);
3500                 nsign = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3501                                       build_int_2 (size - 1, 0), NULL_RTX, 0);
3502                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
3503                                     NULL_RTX);
3504                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
3505                                     NULL_RTX, 0);
3506                 if (t4)
3507                   {
3508                     rtx t5;
3509                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
3510                                       NULL_RTX, 0);
3511                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
3512                                                             t4, t5),
3513                                               tquotient);
3514                   }
3515               }
3516           }
3517
3518         if (quotient != 0)
3519           break;
3520         delete_insns_since (last);
3521
3522         /* Try using an instruction that produces both the quotient and
3523            remainder, using truncation.  We can easily compensate the quotient
3524            or remainder to get floor rounding, once we have the remainder.
3525            Notice that we compute also the final remainder value here,
3526            and return the result right away.  */
3527         if (target == 0 || GET_MODE (target) != compute_mode)
3528           target = gen_reg_rtx (compute_mode);
3529
3530         if (rem_flag)
3531           {
3532             remainder
3533               = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode);
3534             quotient = gen_reg_rtx (compute_mode);
3535           }
3536         else
3537           {
3538             quotient
3539               = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode);
3540             remainder = gen_reg_rtx (compute_mode);
3541           }
3542
3543         if (expand_twoval_binop (sdivmod_optab, op0, op1,
3544                                  quotient, remainder, 0))
3545           {
3546             /* This could be computed with a branch-less sequence.
3547                Save that for later.  */
3548             rtx tem;
3549             rtx label = gen_label_rtx ();
3550             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
3551             tem = expand_binop (compute_mode, xor_optab, op0, op1,
3552                                 NULL_RTX, 0, OPTAB_WIDEN);
3553             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
3554             expand_dec (quotient, const1_rtx);
3555             expand_inc (remainder, op1);
3556             emit_label (label);
3557             return gen_lowpart (mode, rem_flag ? remainder : quotient);
3558           }
3559
3560         /* No luck with division elimination or divmod.  Have to do it
3561            by conditionally adjusting op0 *and* the result.  */
3562         {
3563           rtx label1, label2, label3, label4, label5;
3564           rtx adjusted_op0;
3565           rtx tem;
3566
3567           quotient = gen_reg_rtx (compute_mode);
3568           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3569           label1 = gen_label_rtx ();
3570           label2 = gen_label_rtx ();
3571           label3 = gen_label_rtx ();
3572           label4 = gen_label_rtx ();
3573           label5 = gen_label_rtx ();
3574           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
3575           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
3576           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3577                               quotient, 0, OPTAB_LIB_WIDEN);
3578           if (tem != quotient)
3579             emit_move_insn (quotient, tem);
3580           emit_jump_insn (gen_jump (label5));
3581           emit_barrier ();
3582           emit_label (label1);
3583           expand_inc (adjusted_op0, const1_rtx);
3584           emit_jump_insn (gen_jump (label4));
3585           emit_barrier ();
3586           emit_label (label2);
3587           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
3588           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3589                               quotient, 0, OPTAB_LIB_WIDEN);
3590           if (tem != quotient)
3591             emit_move_insn (quotient, tem);
3592           emit_jump_insn (gen_jump (label5));
3593           emit_barrier ();
3594           emit_label (label3);
3595           expand_dec (adjusted_op0, const1_rtx);
3596           emit_label (label4);
3597           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3598                               quotient, 0, OPTAB_LIB_WIDEN);
3599           if (tem != quotient)
3600             emit_move_insn (quotient, tem);
3601           expand_dec (quotient, const1_rtx);
3602           emit_label (label5);
3603         }
3604         break;
3605
3606       case CEIL_DIV_EXPR:
3607       case CEIL_MOD_EXPR:
3608         if (unsignedp)
3609           {
3610             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
3611               {
3612                 rtx t1, t2, t3;
3613                 unsigned HOST_WIDE_INT d = INTVAL (op1);
3614                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3615                                    build_int_2 (floor_log2 (d), 0),
3616                                    tquotient, 1);
3617                 t2 = expand_binop (compute_mode, and_optab, op0,
3618                                    GEN_INT (d - 1),
3619                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3620                 t3 = gen_reg_rtx (compute_mode);
3621                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
3622                                       compute_mode, 1, 1);
3623                 if (t3 == 0)
3624                   {
3625                     rtx lab;
3626                     lab = gen_label_rtx ();
3627                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
3628                     expand_inc (t1, const1_rtx);
3629                     emit_label (lab);
3630                     quotient = t1;
3631                   }
3632                 else
3633                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
3634                                                           t1, t3),
3635                                             tquotient);
3636                 break;
3637               }
3638
3639             /* Try using an instruction that produces both the quotient and
3640                remainder, using truncation.  We can easily compensate the
3641                quotient or remainder to get ceiling rounding, once we have the
3642                remainder.  Notice that we compute also the final remainder
3643                value here, and return the result right away.  */
3644             if (target == 0 || GET_MODE (target) != compute_mode)
3645               target = gen_reg_rtx (compute_mode);
3646
3647             if (rem_flag)
3648               {
3649                 remainder = (GET_CODE (target) == REG
3650                              ? target : gen_reg_rtx (compute_mode));
3651                 quotient = gen_reg_rtx (compute_mode);
3652               }
3653             else
3654               {
3655                 quotient = (GET_CODE (target) == REG
3656                             ? target : gen_reg_rtx (compute_mode));
3657                 remainder = gen_reg_rtx (compute_mode);
3658               }
3659
3660             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
3661                                      remainder, 1))
3662               {
3663                 /* This could be computed with a branch-less sequence.
3664                    Save that for later.  */
3665                 rtx label = gen_label_rtx ();
3666                 do_cmp_and_jump (remainder, const0_rtx, EQ,
3667                                  compute_mode, label);
3668                 expand_inc (quotient, const1_rtx);
3669                 expand_dec (remainder, op1);
3670                 emit_label (label);
3671                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
3672               }
3673
3674             /* No luck with division elimination or divmod.  Have to do it
3675                by conditionally adjusting op0 *and* the result.  */
3676             {
3677               rtx label1, label2;
3678               rtx adjusted_op0, tem;
3679
3680               quotient = gen_reg_rtx (compute_mode);
3681               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3682               label1 = gen_label_rtx ();
3683               label2 = gen_label_rtx ();
3684               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
3685                                compute_mode, label1);
3686               emit_move_insn  (quotient, const0_rtx);
3687               emit_jump_insn (gen_jump (label2));
3688               emit_barrier ();
3689               emit_label (label1);
3690               expand_dec (adjusted_op0, const1_rtx);
3691               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
3692                                   quotient, 1, OPTAB_LIB_WIDEN);
3693               if (tem != quotient)
3694                 emit_move_insn (quotient, tem);
3695               expand_inc (quotient, const1_rtx);
3696               emit_label (label2);
3697             }
3698           }
3699         else /* signed */
3700           {
3701             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3702                 && INTVAL (op1) >= 0)
3703               {
3704                 /* This is extremely similar to the code for the unsigned case
3705                    above.  For 2.7 we should merge these variants, but for
3706                    2.6.1 I don't want to touch the code for unsigned since that
3707                    get used in C.  The signed case will only be used by other
3708                    languages (Ada).  */
3709
3710                 rtx t1, t2, t3;
3711                 unsigned HOST_WIDE_INT d = INTVAL (op1);
3712                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3713                                    build_int_2 (floor_log2 (d), 0),
3714                                    tquotient, 0);
3715                 t2 = expand_binop (compute_mode, and_optab, op0,
3716                                    GEN_INT (d - 1),
3717                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3718                 t3 = gen_reg_rtx (compute_mode);
3719                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
3720                                       compute_mode, 1, 1);
3721                 if (t3 == 0)
3722                   {
3723                     rtx lab;
3724                     lab = gen_label_rtx ();
3725                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
3726                     expand_inc (t1, const1_rtx);
3727                     emit_label (lab);
3728                     quotient = t1;
3729                   }
3730                 else
3731                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
3732                                                           t1, t3),
3733                                             tquotient);
3734                 break;
3735               }
3736
3737             /* Try using an instruction that produces both the quotient and
3738                remainder, using truncation.  We can easily compensate the
3739                quotient or remainder to get ceiling rounding, once we have the
3740                remainder.  Notice that we compute also the final remainder
3741                value here, and return the result right away.  */
3742             if (target == 0 || GET_MODE (target) != compute_mode)
3743               target = gen_reg_rtx (compute_mode);
3744             if (rem_flag)
3745               {
3746                 remainder= (GET_CODE (target) == REG
3747                             ? target : gen_reg_rtx (compute_mode));
3748                 quotient = gen_reg_rtx (compute_mode);
3749               }
3750             else
3751               {
3752                 quotient = (GET_CODE (target) == REG
3753                             ? target : gen_reg_rtx (compute_mode));
3754                 remainder = gen_reg_rtx (compute_mode);
3755               }
3756
3757             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
3758                                      remainder, 0))
3759               {
3760                 /* This could be computed with a branch-less sequence.
3761                    Save that for later.  */
3762                 rtx tem;
3763                 rtx label = gen_label_rtx ();
3764                 do_cmp_and_jump (remainder, const0_rtx, EQ,
3765                                  compute_mode, label);
3766                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
3767                                     NULL_RTX, 0, OPTAB_WIDEN);
3768                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
3769                 expand_inc (quotient, const1_rtx);
3770                 expand_dec (remainder, op1);
3771                 emit_label (label);
3772                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
3773               }
3774
3775             /* No luck with division elimination or divmod.  Have to do it
3776                by conditionally adjusting op0 *and* the result.  */
3777             {
3778               rtx label1, label2, label3, label4, label5;
3779               rtx adjusted_op0;
3780               rtx tem;
3781
3782               quotient = gen_reg_rtx (compute_mode);
3783               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3784               label1 = gen_label_rtx ();
3785               label2 = gen_label_rtx ();
3786               label3 = gen_label_rtx ();
3787               label4 = gen_label_rtx ();
3788               label5 = gen_label_rtx ();
3789               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
3790               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
3791                                compute_mode, label1);
3792               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3793                                   quotient, 0, OPTAB_LIB_WIDEN);
3794               if (tem != quotient)
3795                 emit_move_insn (quotient, tem);
3796               emit_jump_insn (gen_jump (label5));
3797               emit_barrier ();
3798               emit_label (label1);
3799               expand_dec (adjusted_op0, const1_rtx);
3800               emit_jump_insn (gen_jump (label4));
3801               emit_barrier ();
3802               emit_label (label2);
3803               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
3804                                compute_mode, label3);
3805               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3806                                   quotient, 0, OPTAB_LIB_WIDEN);
3807               if (tem != quotient)
3808                 emit_move_insn (quotient, tem);
3809               emit_jump_insn (gen_jump (label5));
3810               emit_barrier ();
3811               emit_label (label3);
3812               expand_inc (adjusted_op0, const1_rtx);
3813               emit_label (label4);
3814               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3815                                   quotient, 0, OPTAB_LIB_WIDEN);
3816               if (tem != quotient)
3817                 emit_move_insn (quotient, tem);
3818               expand_inc (quotient, const1_rtx);
3819               emit_label (label5);
3820             }
3821           }
3822         break;
3823
3824       case EXACT_DIV_EXPR:
3825         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
3826           {
3827             HOST_WIDE_INT d = INTVAL (op1);
3828             unsigned HOST_WIDE_INT ml;
3829             int pre_shift;
3830             rtx t1;
3831
3832             pre_shift = floor_log2 (d & -d);
3833             ml = invert_mod2n (d >> pre_shift, size);
3834             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3835                                build_int_2 (pre_shift, 0), NULL_RTX, unsignedp);
3836             quotient = expand_mult (compute_mode, t1,
3837                                     GEN_INT (trunc_int_for_mode
3838                                              (ml, compute_mode)),
3839                                     NULL_RTX, 0);
3840
3841             insn = get_last_insn ();
3842             set_unique_reg_note (insn,
3843                                  REG_EQUAL,
3844                                  gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
3845                                                  compute_mode,
3846                                                  op0, op1));
3847           }
3848         break;
3849
3850       case ROUND_DIV_EXPR:
3851       case ROUND_MOD_EXPR:
3852         if (unsignedp)
3853           {
3854             rtx tem;
3855             rtx label;
3856             label = gen_label_rtx ();
3857             quotient = gen_reg_rtx (compute_mode);
3858             remainder = gen_reg_rtx (compute_mode);
3859             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
3860               {
3861                 rtx tem;
3862                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
3863                                          quotient, 1, OPTAB_LIB_WIDEN);
3864                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
3865                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
3866                                           remainder, 1, OPTAB_LIB_WIDEN);
3867               }
3868             tem = plus_constant (op1, -1);
3869             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
3870                                 build_int_2 (1, 0), NULL_RTX, 1);
3871             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
3872             expand_inc (quotient, const1_rtx);
3873             expand_dec (remainder, op1);
3874             emit_label (label);
3875           }
3876         else
3877           {
3878             rtx abs_rem, abs_op1, tem, mask;
3879             rtx label;
3880             label = gen_label_rtx ();
3881             quotient = gen_reg_rtx (compute_mode);
3882             remainder = gen_reg_rtx (compute_mode);
3883             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
3884               {
3885                 rtx tem;
3886                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
3887                                          quotient, 0, OPTAB_LIB_WIDEN);
3888                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
3889                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
3890                                           remainder, 0, OPTAB_LIB_WIDEN);
3891               }
3892             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
3893             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
3894             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
3895                                 build_int_2 (1, 0), NULL_RTX, 1);
3896             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
3897             tem = expand_binop (compute_mode, xor_optab, op0, op1,
3898                                 NULL_RTX, 0, OPTAB_WIDEN);
3899             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
3900                                 build_int_2 (size - 1, 0), NULL_RTX, 0);
3901             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
3902                                 NULL_RTX, 0, OPTAB_WIDEN);
3903             tem = expand_binop (compute_mode, sub_optab, tem, mask,
3904                                 NULL_RTX, 0, OPTAB_WIDEN);
3905             expand_inc (quotient, tem);
3906             tem = expand_binop (compute_mode, xor_optab, mask, op1,
3907                                 NULL_RTX, 0, OPTAB_WIDEN);
3908             tem = expand_binop (compute_mode, sub_optab, tem, mask,
3909                                 NULL_RTX, 0, OPTAB_WIDEN);
3910             expand_dec (remainder, tem);
3911             emit_label (label);
3912           }
3913         return gen_lowpart (mode, rem_flag ? remainder : quotient);
3914
3915       default:
3916         abort ();
3917       }
3918
3919   if (quotient == 0)
3920     {
3921       if (target && GET_MODE (target) != compute_mode)
3922         target = 0;
3923
3924       if (rem_flag)
3925         {
3926           /* Try to produce the remainder without producing the quotient.
3927              If we seem to have a divmod pattern that does not require widening,
3928              don't try widening here.  We should really have an WIDEN argument
3929              to expand_twoval_binop, since what we'd really like to do here is
3930              1) try a mod insn in compute_mode
3931              2) try a divmod insn in compute_mode
3932              3) try a div insn in compute_mode and multiply-subtract to get
3933                 remainder
3934              4) try the same things with widening allowed.  */
3935           remainder
3936             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
3937                                  op0, op1, target,
3938                                  unsignedp,
3939                                  ((optab2->handlers[(int) compute_mode].insn_code
3940                                    != CODE_FOR_nothing)
3941                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
3942           if (remainder == 0)
3943             {
3944               /* No luck there.  Can we do remainder and divide at once
3945                  without a library call?  */
3946               remainder = gen_reg_rtx (compute_mode);
3947               if (! expand_twoval_binop ((unsignedp
3948                                           ? udivmod_optab
3949                                           : sdivmod_optab),
3950                                          op0, op1,
3951                                          NULL_RTX, remainder, unsignedp))
3952                 remainder = 0;
3953             }
3954
3955           if (remainder)
3956             return gen_lowpart (mode, remainder);
3957         }
3958
3959       /* Produce the quotient.  Try a quotient insn, but not a library call.
3960          If we have a divmod in this mode, use it in preference to widening
3961          the div (for this test we assume it will not fail). Note that optab2
3962          is set to the one of the two optabs that the call below will use.  */
3963       quotient
3964         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
3965                              op0, op1, rem_flag ? NULL_RTX : target,
3966                              unsignedp,
3967                              ((optab2->handlers[(int) compute_mode].insn_code
3968                                != CODE_FOR_nothing)
3969                               ? OPTAB_DIRECT : OPTAB_WIDEN));
3970
3971       if (quotient == 0)
3972         {
3973           /* No luck there.  Try a quotient-and-remainder insn,
3974              keeping the quotient alone.  */
3975           quotient = gen_reg_rtx (compute_mode);
3976           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
3977                                      op0, op1,
3978                                      quotient, NULL_RTX, unsignedp))
3979             {
3980               quotient = 0;
3981               if (! rem_flag)
3982                 /* Still no luck.  If we are not computing the remainder,
3983                    use a library call for the quotient.  */
3984                 quotient = sign_expand_binop (compute_mode,
3985                                               udiv_optab, sdiv_optab,
3986                                               op0, op1, target,
3987                                               unsignedp, OPTAB_LIB_WIDEN);
3988             }
3989         }
3990     }
3991
3992   if (rem_flag)
3993     {
3994       if (target && GET_MODE (target) != compute_mode)
3995         target = 0;
3996
3997       if (quotient == 0)
3998         /* No divide instruction either.  Use library for remainder.  */
3999         remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4000                                        op0, op1, target,
4001                                        unsignedp, OPTAB_LIB_WIDEN);
4002       else
4003         {
4004           /* We divided.  Now finish doing X - Y * (X / Y).  */
4005           remainder = expand_mult (compute_mode, quotient, op1,
4006                                    NULL_RTX, unsignedp);
4007           remainder = expand_binop (compute_mode, sub_optab, op0,
4008                                     remainder, target, unsignedp,
4009                                     OPTAB_LIB_WIDEN);
4010         }
4011     }
4012
4013   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4014 }
4015 \f
4016 /* Return a tree node with data type TYPE, describing the value of X.
4017    Usually this is an RTL_EXPR, if there is no obvious better choice.
4018    X may be an expression, however we only support those expressions
4019    generated by loop.c.  */
4020
4021 tree
4022 make_tree (type, x)
4023      tree type;
4024      rtx x;
4025 {
4026   tree t;
4027
4028   switch (GET_CODE (x))
4029     {
4030     case CONST_INT:
4031       t = build_int_2 (INTVAL (x),
4032                        (TREE_UNSIGNED (type)
4033                         && (GET_MODE_BITSIZE (TYPE_MODE (type)) < HOST_BITS_PER_WIDE_INT))
4034                        || INTVAL (x) >= 0 ? 0 : -1);
4035       TREE_TYPE (t) = type;
4036       return t;
4037
4038     case CONST_DOUBLE:
4039       if (GET_MODE (x) == VOIDmode)
4040         {
4041           t = build_int_2 (CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4042           TREE_TYPE (t) = type;
4043         }
4044       else
4045         {
4046           REAL_VALUE_TYPE d;
4047
4048           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4049           t = build_real (type, d);
4050         }
4051
4052       return t;
4053
4054     case CONST_VECTOR:
4055       {
4056         int i, units;
4057         rtx elt;
4058         tree t = NULL_TREE;
4059
4060         units = CONST_VECTOR_NUNITS (x);
4061
4062         /* Build a tree with vector elements.  */
4063         for (i = units - 1; i >= 0; --i)
4064           {
4065             elt = CONST_VECTOR_ELT (x, i);
4066             t = tree_cons (NULL_TREE, make_tree (type, elt), t);
4067           }
4068
4069         return build_vector (type, t);
4070       }
4071
4072     case PLUS:
4073       return fold (build (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4074                           make_tree (type, XEXP (x, 1))));
4075
4076     case MINUS:
4077       return fold (build (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4078                           make_tree (type, XEXP (x, 1))));
4079
4080     case NEG:
4081       return fold (build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0))));
4082
4083     case MULT:
4084       return fold (build (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4085                           make_tree (type, XEXP (x, 1))));
4086
4087     case ASHIFT:
4088       return fold (build (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4089                           make_tree (type, XEXP (x, 1))));
4090
4091     case LSHIFTRT:
4092       return fold (convert (type,
4093                             build (RSHIFT_EXPR, unsigned_type (type),
4094                                    make_tree (unsigned_type (type),
4095                                               XEXP (x, 0)),
4096                                    make_tree (type, XEXP (x, 1)))));
4097
4098     case ASHIFTRT:
4099       return fold (convert (type,
4100                             build (RSHIFT_EXPR, signed_type (type),
4101                                    make_tree (signed_type (type), XEXP (x, 0)),
4102                                    make_tree (type, XEXP (x, 1)))));
4103
4104     case DIV:
4105       if (TREE_CODE (type) != REAL_TYPE)
4106         t = signed_type (type);
4107       else
4108         t = type;
4109
4110       return fold (convert (type,
4111                             build (TRUNC_DIV_EXPR, t,
4112                                    make_tree (t, XEXP (x, 0)),
4113                                    make_tree (t, XEXP (x, 1)))));
4114     case UDIV:
4115       t = unsigned_type (type);
4116       return fold (convert (type,
4117                             build (TRUNC_DIV_EXPR, t,
4118                                    make_tree (t, XEXP (x, 0)),
4119                                    make_tree (t, XEXP (x, 1)))));
4120    default:
4121       t = make_node (RTL_EXPR);
4122       TREE_TYPE (t) = type;
4123
4124 #ifdef POINTERS_EXTEND_UNSIGNED
4125       /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
4126          ptr_mode.  So convert.  */
4127       if (POINTER_TYPE_P (type) && GET_MODE (x) != TYPE_MODE (type))
4128         x = convert_memory_address (TYPE_MODE (type), x);
4129 #endif
4130
4131       RTL_EXPR_RTL (t) = x;
4132       /* There are no insns to be output
4133          when this rtl_expr is used.  */
4134       RTL_EXPR_SEQUENCE (t) = 0;
4135       return t;
4136     }
4137 }
4138
4139 /* Return an rtx representing the value of X * MULT + ADD.
4140    TARGET is a suggestion for where to store the result (an rtx).
4141    MODE is the machine mode for the computation.
4142    X and MULT must have mode MODE.  ADD may have a different mode.
4143    So can X (defaults to same as MODE).
4144    UNSIGNEDP is non-zero to do unsigned multiplication.
4145    This may emit insns.  */
4146
4147 rtx
4148 expand_mult_add (x, target, mult, add, mode, unsignedp)
4149      rtx x, target, mult, add;
4150      enum machine_mode mode;
4151      int unsignedp;
4152 {
4153   tree type = type_for_mode (mode, unsignedp);
4154   tree add_type = (GET_MODE (add) == VOIDmode
4155                    ? type : type_for_mode (GET_MODE (add), unsignedp));
4156   tree result =  fold (build (PLUS_EXPR, type,
4157                               fold (build (MULT_EXPR, type,
4158                                            make_tree (type, x),
4159                                            make_tree (type, mult))),
4160                               make_tree (add_type, add)));
4161
4162   return expand_expr (result, target, VOIDmode, 0);
4163 }
4164 \f
4165 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
4166    and returning TARGET.
4167
4168    If TARGET is 0, a pseudo-register or constant is returned.  */
4169
4170 rtx
4171 expand_and (mode, op0, op1, target)
4172      enum machine_mode mode;
4173      rtx op0, op1, target;
4174 {
4175   rtx tem = 0;
4176
4177   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
4178     tem = simplify_binary_operation (AND, mode, op0, op1);
4179   if (tem == 0)
4180     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
4181
4182   if (target == 0)
4183     target = tem;
4184   else if (tem != target)
4185     emit_move_insn (target, tem);
4186   return target;
4187 }
4188 \f
4189 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
4190    and storing in TARGET.  Normally return TARGET.
4191    Return 0 if that cannot be done.
4192
4193    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
4194    it is VOIDmode, they cannot both be CONST_INT.
4195
4196    UNSIGNEDP is for the case where we have to widen the operands
4197    to perform the operation.  It says to use zero-extension.
4198
4199    NORMALIZEP is 1 if we should convert the result to be either zero
4200    or one.  Normalize is -1 if we should convert the result to be
4201    either zero or -1.  If NORMALIZEP is zero, the result will be left
4202    "raw" out of the scc insn.  */
4203
4204 rtx
4205 emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep)
4206      rtx target;
4207      enum rtx_code code;
4208      rtx op0, op1;
4209      enum machine_mode mode;
4210      int unsignedp;
4211      int normalizep;
4212 {
4213   rtx subtarget;
4214   enum insn_code icode;
4215   enum machine_mode compare_mode;
4216   enum machine_mode target_mode = GET_MODE (target);
4217   rtx tem;
4218   rtx last = get_last_insn ();
4219   rtx pattern, comparison;
4220
4221   /* ??? Ok to do this and then fail? */
4222   op0 = protect_from_queue (op0, 0);
4223   op1 = protect_from_queue (op1, 0);
4224
4225   if (unsignedp)
4226     code = unsigned_condition (code);
4227
4228   /* If one operand is constant, make it the second one.  Only do this
4229      if the other operand is not constant as well.  */
4230
4231   if (swap_commutative_operands_p (op0, op1))
4232     {
4233       tem = op0;
4234       op0 = op1;
4235       op1 = tem;
4236       code = swap_condition (code);
4237     }
4238
4239   if (mode == VOIDmode)
4240     mode = GET_MODE (op0);
4241
4242   /* For some comparisons with 1 and -1, we can convert this to
4243      comparisons with zero.  This will often produce more opportunities for
4244      store-flag insns.  */
4245
4246   switch (code)
4247     {
4248     case LT:
4249       if (op1 == const1_rtx)
4250         op1 = const0_rtx, code = LE;
4251       break;
4252     case LE:
4253       if (op1 == constm1_rtx)
4254         op1 = const0_rtx, code = LT;
4255       break;
4256     case GE:
4257       if (op1 == const1_rtx)
4258         op1 = const0_rtx, code = GT;
4259       break;
4260     case GT:
4261       if (op1 == constm1_rtx)
4262         op1 = const0_rtx, code = GE;
4263       break;
4264     case GEU:
4265       if (op1 == const1_rtx)
4266         op1 = const0_rtx, code = NE;
4267       break;
4268     case LTU:
4269       if (op1 == const1_rtx)
4270         op1 = const0_rtx, code = EQ;
4271       break;
4272     default:
4273       break;
4274     }
4275
4276   /* If we are comparing a double-word integer with zero, we can convert
4277      the comparison into one involving a single word.  */
4278   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
4279       && GET_MODE_CLASS (mode) == MODE_INT
4280       && op1 == const0_rtx)
4281     {
4282       if (code == EQ || code == NE)
4283         {
4284           /* Do a logical OR of the two words and compare the result.  */
4285           rtx op0h = gen_highpart (word_mode, op0);
4286           rtx op0l = gen_lowpart (word_mode, op0);
4287           rtx op0both = expand_binop (word_mode, ior_optab, op0h, op0l,
4288                                       NULL_RTX, unsignedp, OPTAB_DIRECT);
4289           if (op0both != 0)
4290             return emit_store_flag (target, code, op0both, op1, word_mode,
4291                                     unsignedp, normalizep);
4292         }
4293       else if (code == LT || code == GE)
4294         /* If testing the sign bit, can just test on high word.  */
4295         return emit_store_flag (target, code, gen_highpart (word_mode, op0),
4296                                 op1, word_mode, unsignedp, normalizep);
4297     }
4298
4299   /* From now on, we won't change CODE, so set ICODE now.  */
4300   icode = setcc_gen_code[(int) code];
4301
4302   /* If this is A < 0 or A >= 0, we can do this by taking the ones
4303      complement of A (for GE) and shifting the sign bit to the low bit.  */
4304   if (op1 == const0_rtx && (code == LT || code == GE)
4305       && GET_MODE_CLASS (mode) == MODE_INT
4306       && (normalizep || STORE_FLAG_VALUE == 1
4307           || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4308               && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
4309                   == (HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
4310     {
4311       subtarget = target;
4312
4313       /* If the result is to be wider than OP0, it is best to convert it
4314          first.  If it is to be narrower, it is *incorrect* to convert it
4315          first.  */
4316       if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
4317         {
4318           op0 = protect_from_queue (op0, 0);
4319           op0 = convert_modes (target_mode, mode, op0, 0);
4320           mode = target_mode;
4321         }
4322
4323       if (target_mode != mode)
4324         subtarget = 0;
4325
4326       if (code == GE)
4327         op0 = expand_unop (mode, one_cmpl_optab, op0,
4328                            ((STORE_FLAG_VALUE == 1 || normalizep)
4329                             ? 0 : subtarget), 0);
4330
4331       if (STORE_FLAG_VALUE == 1 || normalizep)
4332         /* If we are supposed to produce a 0/1 value, we want to do
4333            a logical shift from the sign bit to the low-order bit; for
4334            a -1/0 value, we do an arithmetic shift.  */
4335         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
4336                             size_int (GET_MODE_BITSIZE (mode) - 1),
4337                             subtarget, normalizep != -1);
4338
4339       if (mode != target_mode)
4340         op0 = convert_modes (target_mode, mode, op0, 0);
4341
4342       return op0;
4343     }
4344
4345   if (icode != CODE_FOR_nothing)
4346     {
4347       insn_operand_predicate_fn pred;
4348
4349       /* We think we may be able to do this with a scc insn.  Emit the
4350          comparison and then the scc insn.
4351
4352          compare_from_rtx may call emit_queue, which would be deleted below
4353          if the scc insn fails.  So call it ourselves before setting LAST.
4354          Likewise for do_pending_stack_adjust.  */
4355
4356       emit_queue ();
4357       do_pending_stack_adjust ();
4358       last = get_last_insn ();
4359
4360       comparison
4361         = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX);
4362       if (GET_CODE (comparison) == CONST_INT)
4363         return (comparison == const0_rtx ? const0_rtx
4364                 : normalizep == 1 ? const1_rtx
4365                 : normalizep == -1 ? constm1_rtx
4366                 : const_true_rtx);
4367
4368       /* The code of COMPARISON may not match CODE if compare_from_rtx
4369          decided to swap its operands and reverse the original code.
4370
4371          We know that compare_from_rtx returns either a CONST_INT or
4372          a new comparison code, so it is safe to just extract the
4373          code from COMPARISON.  */
4374       code = GET_CODE (comparison);
4375
4376       /* Get a reference to the target in the proper mode for this insn.  */
4377       compare_mode = insn_data[(int) icode].operand[0].mode;
4378       subtarget = target;
4379       pred = insn_data[(int) icode].operand[0].predicate;
4380       if (preserve_subexpressions_p ()
4381           || ! (*pred) (subtarget, compare_mode))
4382         subtarget = gen_reg_rtx (compare_mode);
4383
4384       pattern = GEN_FCN (icode) (subtarget);
4385       if (pattern)
4386         {
4387           emit_insn (pattern);
4388
4389           /* If we are converting to a wider mode, first convert to
4390              TARGET_MODE, then normalize.  This produces better combining
4391              opportunities on machines that have a SIGN_EXTRACT when we are
4392              testing a single bit.  This mostly benefits the 68k.
4393
4394              If STORE_FLAG_VALUE does not have the sign bit set when
4395              interpreted in COMPARE_MODE, we can do this conversion as
4396              unsigned, which is usually more efficient.  */
4397           if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (compare_mode))
4398             {
4399               convert_move (target, subtarget,
4400                             (GET_MODE_BITSIZE (compare_mode)
4401                              <= HOST_BITS_PER_WIDE_INT)
4402                             && 0 == (STORE_FLAG_VALUE
4403                                      & ((HOST_WIDE_INT) 1
4404                                         << (GET_MODE_BITSIZE (compare_mode) -1))));
4405               op0 = target;
4406               compare_mode = target_mode;
4407             }
4408           else
4409             op0 = subtarget;
4410
4411           /* If we want to keep subexpressions around, don't reuse our
4412              last target.  */
4413
4414           if (preserve_subexpressions_p ())
4415             subtarget = 0;
4416
4417           /* Now normalize to the proper value in COMPARE_MODE.  Sometimes
4418              we don't have to do anything.  */
4419           if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
4420             ;
4421           /* STORE_FLAG_VALUE might be the most negative number, so write
4422              the comparison this way to avoid a compiler-time warning.  */
4423           else if (- normalizep == STORE_FLAG_VALUE)
4424             op0 = expand_unop (compare_mode, neg_optab, op0, subtarget, 0);
4425
4426           /* We don't want to use STORE_FLAG_VALUE < 0 below since this
4427              makes it hard to use a value of just the sign bit due to
4428              ANSI integer constant typing rules.  */
4429           else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_WIDE_INT
4430                    && (STORE_FLAG_VALUE
4431                        & ((HOST_WIDE_INT) 1
4432                           << (GET_MODE_BITSIZE (compare_mode) - 1))))
4433             op0 = expand_shift (RSHIFT_EXPR, compare_mode, op0,
4434                                 size_int (GET_MODE_BITSIZE (compare_mode) - 1),
4435                                 subtarget, normalizep == 1);
4436           else if (STORE_FLAG_VALUE & 1)
4437             {
4438               op0 = expand_and (compare_mode, op0, const1_rtx, subtarget);
4439               if (normalizep == -1)
4440                 op0 = expand_unop (compare_mode, neg_optab, op0, op0, 0);
4441             }
4442           else
4443             abort ();
4444
4445           /* If we were converting to a smaller mode, do the
4446              conversion now.  */
4447           if (target_mode != compare_mode)
4448             {
4449               convert_move (target, op0, 0);
4450               return target;
4451             }
4452           else
4453             return op0;
4454         }
4455     }
4456
4457   delete_insns_since (last);
4458
4459   /* If expensive optimizations, use different pseudo registers for each
4460      insn, instead of reusing the same pseudo.  This leads to better CSE,
4461      but slows down the compiler, since there are more pseudos */
4462   subtarget = (!flag_expensive_optimizations
4463                && (target_mode == mode)) ? target : NULL_RTX;
4464
4465   /* If we reached here, we can't do this with a scc insn.  However, there
4466      are some comparisons that can be done directly.  For example, if
4467      this is an equality comparison of integers, we can try to exclusive-or
4468      (or subtract) the two operands and use a recursive call to try the
4469      comparison with zero.  Don't do any of these cases if branches are
4470      very cheap.  */
4471
4472   if (BRANCH_COST > 0
4473       && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
4474       && op1 != const0_rtx)
4475     {
4476       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
4477                           OPTAB_WIDEN);
4478
4479       if (tem == 0)
4480         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
4481                             OPTAB_WIDEN);
4482       if (tem != 0)
4483         tem = emit_store_flag (target, code, tem, const0_rtx,
4484                                mode, unsignedp, normalizep);
4485       if (tem == 0)
4486         delete_insns_since (last);
4487       return tem;
4488     }
4489
4490   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
4491      the constant zero.  Reject all other comparisons at this point.  Only
4492      do LE and GT if branches are expensive since they are expensive on
4493      2-operand machines.  */
4494
4495   if (BRANCH_COST == 0
4496       || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
4497       || (code != EQ && code != NE
4498           && (BRANCH_COST <= 1 || (code != LE && code != GT))))
4499     return 0;
4500
4501   /* See what we need to return.  We can only return a 1, -1, or the
4502      sign bit.  */
4503
4504   if (normalizep == 0)
4505     {
4506       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
4507         normalizep = STORE_FLAG_VALUE;
4508
4509       else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4510                && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
4511                    == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
4512         ;
4513       else
4514         return 0;
4515     }
4516
4517   /* Try to put the result of the comparison in the sign bit.  Assume we can't
4518      do the necessary operation below.  */
4519
4520   tem = 0;
4521
4522   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
4523      the sign bit set.  */
4524
4525   if (code == LE)
4526     {
4527       /* This is destructive, so SUBTARGET can't be OP0.  */
4528       if (rtx_equal_p (subtarget, op0))
4529         subtarget = 0;
4530
4531       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
4532                           OPTAB_WIDEN);
4533       if (tem)
4534         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
4535                             OPTAB_WIDEN);
4536     }
4537
4538   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
4539      number of bits in the mode of OP0, minus one.  */
4540
4541   if (code == GT)
4542     {
4543       if (rtx_equal_p (subtarget, op0))
4544         subtarget = 0;
4545
4546       tem = expand_shift (RSHIFT_EXPR, mode, op0,
4547                           size_int (GET_MODE_BITSIZE (mode) - 1),
4548                           subtarget, 0);
4549       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
4550                           OPTAB_WIDEN);
4551     }
4552
4553   if (code == EQ || code == NE)
4554     {
4555       /* For EQ or NE, one way to do the comparison is to apply an operation
4556          that converts the operand into a positive number if it is non-zero
4557          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
4558          for NE we negate.  This puts the result in the sign bit.  Then we
4559          normalize with a shift, if needed.
4560
4561          Two operations that can do the above actions are ABS and FFS, so try
4562          them.  If that doesn't work, and MODE is smaller than a full word,
4563          we can use zero-extension to the wider mode (an unsigned conversion)
4564          as the operation.  */
4565
4566       /* Note that ABS doesn't yield a positive number for INT_MIN, but
4567          that is compensated by the subsequent overflow when subtracting
4568          one / negating.  */
4569
4570       if (abs_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
4571         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
4572       else if (ffs_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
4573         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
4574       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
4575         {
4576           op0 = protect_from_queue (op0, 0);
4577           tem = convert_modes (word_mode, mode, op0, 1);
4578           mode = word_mode;
4579         }
4580
4581       if (tem != 0)
4582         {
4583           if (code == EQ)
4584             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
4585                                 0, OPTAB_WIDEN);
4586           else
4587             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
4588         }
4589
4590       /* If we couldn't do it that way, for NE we can "or" the two's complement
4591          of the value with itself.  For EQ, we take the one's complement of
4592          that "or", which is an extra insn, so we only handle EQ if branches
4593          are expensive.  */
4594
4595       if (tem == 0 && (code == NE || BRANCH_COST > 1))
4596         {
4597           if (rtx_equal_p (subtarget, op0))
4598             subtarget = 0;
4599
4600           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
4601           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
4602                               OPTAB_WIDEN);
4603
4604           if (tem && code == EQ)
4605             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
4606         }
4607     }
4608
4609   if (tem && normalizep)
4610     tem = expand_shift (RSHIFT_EXPR, mode, tem,
4611                         size_int (GET_MODE_BITSIZE (mode) - 1),
4612                         subtarget, normalizep == 1);
4613
4614   if (tem)
4615     {
4616       if (GET_MODE (tem) != target_mode)
4617         {
4618           convert_move (target, tem, 0);
4619           tem = target;
4620         }
4621       else if (!subtarget)
4622         {
4623           emit_move_insn (target, tem);
4624           tem = target;
4625         }
4626     }
4627   else
4628     delete_insns_since (last);
4629
4630   return tem;
4631 }
4632
4633 /* Like emit_store_flag, but always succeeds.  */
4634
4635 rtx
4636 emit_store_flag_force (target, code, op0, op1, mode, unsignedp, normalizep)
4637      rtx target;
4638      enum rtx_code code;
4639      rtx op0, op1;
4640      enum machine_mode mode;
4641      int unsignedp;
4642      int normalizep;
4643 {
4644   rtx tem, label;
4645
4646   /* First see if emit_store_flag can do the job.  */
4647   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
4648   if (tem != 0)
4649     return tem;
4650
4651   if (normalizep == 0)
4652     normalizep = 1;
4653
4654   /* If this failed, we have to do this with set/compare/jump/set code.  */
4655
4656   if (GET_CODE (target) != REG
4657       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
4658     target = gen_reg_rtx (GET_MODE (target));
4659
4660   emit_move_insn (target, const1_rtx);
4661   label = gen_label_rtx ();
4662   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
4663                            NULL_RTX, label);
4664
4665   emit_move_insn (target, const0_rtx);
4666   emit_label (label);
4667
4668   return target;
4669 }
4670 \f
4671 /* Perform possibly multi-word comparison and conditional jump to LABEL
4672    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE
4673
4674    The algorithm is based on the code in expr.c:do_jump.
4675
4676    Note that this does not perform a general comparison.  Only variants
4677    generated within expmed.c are correctly handled, others abort (but could
4678    be handled if needed).  */
4679
4680 static void
4681 do_cmp_and_jump (arg1, arg2, op, mode, label)
4682      rtx arg1, arg2, label;
4683      enum rtx_code op;
4684      enum machine_mode mode;
4685 {
4686   /* If this mode is an integer too wide to compare properly,
4687      compare word by word.  Rely on cse to optimize constant cases.  */
4688
4689   if (GET_MODE_CLASS (mode) == MODE_INT
4690       && ! can_compare_p (op, mode, ccp_jump))
4691     {
4692       rtx label2 = gen_label_rtx ();
4693
4694       switch (op)
4695         {
4696         case LTU:
4697           do_jump_by_parts_greater_rtx (mode, 1, arg2, arg1, label2, label);
4698           break;
4699
4700         case LEU:
4701           do_jump_by_parts_greater_rtx (mode, 1, arg1, arg2, label, label2);
4702           break;
4703
4704         case LT:
4705           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label2, label);
4706           break;
4707
4708         case GT:
4709           do_jump_by_parts_greater_rtx (mode, 0, arg1, arg2, label2, label);
4710           break;
4711
4712         case GE:
4713           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label, label2);
4714           break;
4715
4716           /* do_jump_by_parts_equality_rtx compares with zero.  Luckily
4717              that's the only equality operations we do */
4718         case EQ:
4719           if (arg2 != const0_rtx || mode != GET_MODE(arg1))
4720             abort ();
4721           do_jump_by_parts_equality_rtx (arg1, label2, label);
4722           break;
4723
4724         case NE:
4725           if (arg2 != const0_rtx || mode != GET_MODE(arg1))
4726             abort ();
4727           do_jump_by_parts_equality_rtx (arg1, label, label2);
4728           break;
4729
4730         default:
4731           abort ();
4732         }
4733
4734       emit_label (label2);
4735     }
4736   else
4737     emit_cmp_and_jump_insns (arg1, arg2, op, NULL_RTX, mode, 0, label);
4738 }