gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 88, 89, 92-97, 1998 Free Software Foundation, Inc.
   4
   5 This file is part of GNU CC.
   6
   7 GNU CC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU CC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU CC; see the file COPYING.  If not, write to
  19 the Free Software Foundation, 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.  */
  21
  22
  23 #include "config.h"
  24 #include "system.h"
  25 #include "rtl.h"
  26 #include "tree.h"
  27 #include "flags.h"
  28 #include "insn-flags.h"
  29 #include "insn-codes.h"
  30 #include "insn-config.h"
  31 #include "expr.h"
  32 #include "real.h"
  33 #include "recog.h"
  34
  35 static void store_fixed_bit_field       PROTO((rtx, int, int, int, rtx, int));
  36 static void store_split_bit_field       PROTO((rtx, int, int, rtx, int));
  37 static rtx extract_fixed_bit_field      PROTO((enum machine_mode, rtx, int,
  38                                                int, int, rtx, int, int));
  39 static rtx mask_rtx                     PROTO((enum machine_mode, int,
  40                                                int, int));
  41 static rtx lshift_value                 PROTO((enum machine_mode, rtx,
  42                                                int, int));
  43 static rtx extract_split_bit_field      PROTO((rtx, int, int, int, int));
  44 static void do_cmp_and_jump             PROTO((rtx, rtx, enum rtx_code,
  45                                                enum machine_mode, rtx));
  46
  47 #define CEIL(x,y) (((x) + (y) - 1) / (y))
  48
  49 /* Non-zero means divides or modulus operations are relatively cheap for
  50    powers of two, so don't use branches; emit the operation instead.
  51    Usually, this will mean that the MD file will emit non-branch
  52    sequences.  */
  53
  54 static int sdiv_pow2_cheap, smod_pow2_cheap;
  55
  56 #ifndef SLOW_UNALIGNED_ACCESS
  57 #define SLOW_UNALIGNED_ACCESS STRICT_ALIGNMENT
  58 #endif
  59
  60 /* For compilers that support multiple targets with different word sizes,
  61    MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
  62    is the H8/300(H) compiler.  */
  63
  64 #ifndef MAX_BITS_PER_WORD
  65 #define MAX_BITS_PER_WORD BITS_PER_WORD
  66 #endif
  67
  68 /* Cost of various pieces of RTL.  Note that some of these are indexed by shift count,
  69    and some by mode.  */
  70 static int add_cost, negate_cost, zero_cost;
  71 static int shift_cost[MAX_BITS_PER_WORD];
  72 static int shiftadd_cost[MAX_BITS_PER_WORD];
  73 static int shiftsub_cost[MAX_BITS_PER_WORD];
  74 static int mul_cost[NUM_MACHINE_MODES];
  75 static int div_cost[NUM_MACHINE_MODES];
  76 static int mul_widen_cost[NUM_MACHINE_MODES];
  77 static int mul_highpart_cost[NUM_MACHINE_MODES];
  78
  79 void
  80 init_expmed ()
  81 {
  82   char *free_point;
  83   /* This is "some random pseudo register" for purposes of calling recog
  84      to see what insns exist.  */
  85   rtx reg = gen_rtx_REG (word_mode, 10000);
  86   rtx shift_insn, shiftadd_insn, shiftsub_insn;
  87   int dummy;
  88   int m;
  89   enum machine_mode mode, wider_mode;
  90
  91   start_sequence ();
  92
  93   /* Since we are on the permanent obstack, we must be sure we save this
  94      spot AFTER we call start_sequence, since it will reuse the rtl it
  95      makes.  */
  96   free_point = (char *) oballoc (0);
  97
  98   reg = gen_rtx (REG, word_mode, 10000);
  99
 100   zero_cost = rtx_cost (const0_rtx, 0);
 101   add_cost = rtx_cost (gen_rtx_PLUS (word_mode, reg, reg), SET);
 102
 103   shift_insn = emit_insn (gen_rtx_SET (VOIDmode, reg,
 104                                        gen_rtx_ASHIFT (word_mode, reg,
 105                                                        const0_rtx)));
 106
 107   shiftadd_insn
 108     = emit_insn (gen_rtx_SET (VOIDmode, reg,
 109                               gen_rtx_PLUS (word_mode,
 110                                             gen_rtx_MULT (word_mode,
 111                                                           reg, const0_rtx),
 112                                             reg)));
 113
 114   shiftsub_insn
 115     = emit_insn (gen_rtx_SET (VOIDmode, reg,
 116                               gen_rtx_MINUS (word_mode,
 117                                              gen_rtx_MULT (word_mode,
 118                                                            reg, const0_rtx),
 119                                              reg)));
 120
 121   init_recog ();
 122
 123   shift_cost[0] = 0;
 124   shiftadd_cost[0] = shiftsub_cost[0] = add_cost;
 125
 126   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 127     {
 128       shift_cost[m] = shiftadd_cost[m] = shiftsub_cost[m] = 32000;
 129
 130       XEXP (SET_SRC (PATTERN (shift_insn)), 1) = GEN_INT (m);
 131       if (recog (PATTERN (shift_insn), shift_insn, &dummy) >= 0)
 132         shift_cost[m] = rtx_cost (SET_SRC (PATTERN (shift_insn)), SET);
 133
 134       XEXP (XEXP (SET_SRC (PATTERN (shiftadd_insn)), 0), 1)
 135         = GEN_INT ((HOST_WIDE_INT) 1 << m);
 136       if (recog (PATTERN (shiftadd_insn), shiftadd_insn, &dummy) >= 0)
 137         shiftadd_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftadd_insn)), SET);
 138
 139       XEXP (XEXP (SET_SRC (PATTERN (shiftsub_insn)), 0), 1)
 140         = GEN_INT ((HOST_WIDE_INT) 1 << m);
 141       if (recog (PATTERN (shiftsub_insn), shiftsub_insn, &dummy) >= 0)
 142         shiftsub_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftsub_insn)), SET);
 143     }
 144
 145   negate_cost = rtx_cost (gen_rtx_NEG (word_mode, reg), SET);
 146
 147   sdiv_pow2_cheap
 148     = (rtx_cost (gen_rtx_DIV (word_mode, reg, GEN_INT (32)), SET)
 149        <= 2 * add_cost);
 150   smod_pow2_cheap
 151     = (rtx_cost (gen_rtx_MOD (word_mode, reg, GEN_INT (32)), SET)
 152        <= 2 * add_cost);
 153
 154   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 155        mode != VOIDmode;
 156        mode = GET_MODE_WIDER_MODE (mode))
 157     {
 158       reg = gen_rtx_REG (mode, 10000);
 159       div_cost[(int) mode] = rtx_cost (gen_rtx_UDIV (mode, reg, reg), SET);
 160       mul_cost[(int) mode] = rtx_cost (gen_rtx_MULT (mode, reg, reg), SET);
 161       wider_mode = GET_MODE_WIDER_MODE (mode);
 162       if (wider_mode != VOIDmode)
 163         {
 164           mul_widen_cost[(int) wider_mode]
 165             = rtx_cost (gen_rtx_MULT (wider_mode,
 166                                       gen_rtx_ZERO_EXTEND (wider_mode, reg),
 167                                       gen_rtx_ZERO_EXTEND (wider_mode, reg)),
 168                         SET);
 169           mul_highpart_cost[(int) mode]
 170             = rtx_cost (gen_rtx_TRUNCATE
 171                         (mode,
 172                          gen_rtx_LSHIFTRT
 173                          (wider_mode,
 174                           gen_rtx_MULT (wider_mode,
 175                                         gen_rtx_ZERO_EXTEND (wider_mode, reg),
 176                                         gen_rtx_ZERO_EXTEND (wider_mode, reg)),
 177                           GEN_INT (GET_MODE_BITSIZE (mode)))),
 178                         SET);
 179         }
 180     }
 181
 182   /* Free the objects we just allocated.  */
 183   end_sequence ();
 184   obfree (free_point);
 185 }
 186
 187 /* Return an rtx representing minus the value of X.
 188    MODE is the intended mode of the result,
 189    useful if X is a CONST_INT.  */
 190
 191 rtx
 192 negate_rtx (mode, x)
 193      enum machine_mode mode;
 194      rtx x;
 195 {
 196   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 197
 198   if (result == 0)
 199     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 200
 201   return result;
 202 }
 203 \f
 204 /* Generate code to store value from rtx VALUE
 205    into a bit-field within structure STR_RTX
 206    containing BITSIZE bits starting at bit BITNUM.
 207    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
 208    ALIGN is the alignment that STR_RTX is known to have, measured in bytes.
 209    TOTAL_SIZE is the size of the structure in bytes, or -1 if varying.  */
 210
 211 /* ??? Note that there are two different ideas here for how
 212    to determine the size to count bits within, for a register.
 213    One is BITS_PER_WORD, and the other is the size of operand 3
 214    of the insv pattern.
 215
 216    If operand 3 of the insv pattern is VOIDmode, then we will use BITS_PER_WORD
 217    else, we use the mode of operand 3.  */
 218
 219 rtx
 220 store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, align, total_size)
 221      rtx str_rtx;
 222      register int bitsize;
 223      int bitnum;
 224      enum machine_mode fieldmode;
 225      rtx value;
 226      int align;
 227      int total_size;
 228 {
 229   int unit = (GET_CODE (str_rtx) == MEM) ? BITS_PER_UNIT : BITS_PER_WORD;
 230   register int offset = bitnum / unit;
 231   register int bitpos = bitnum % unit;
 232   register rtx op0 = str_rtx;
 233 #ifdef HAVE_insv
 234   int insv_bitsize;
 235
 236   if (insn_operand_mode[(int) CODE_FOR_insv][3] == VOIDmode)
 237     insv_bitsize = GET_MODE_BITSIZE (word_mode);
 238   else
 239     insv_bitsize = GET_MODE_BITSIZE (insn_operand_mode[(int) CODE_FOR_insv][3]);
 240 #endif
 241
 242   if (GET_CODE (str_rtx) == MEM && ! MEM_IN_STRUCT_P (str_rtx))
 243     abort ();
 244
 245   /* Discount the part of the structure before the desired byte.
 246      We need to know how many bytes are safe to reference after it.  */
 247   if (total_size >= 0)
 248     total_size -= (bitpos / BIGGEST_ALIGNMENT
 249                    * (BIGGEST_ALIGNMENT / BITS_PER_UNIT));
 250
 251   while (GET_CODE (op0) == SUBREG)
 252     {
 253       /* The following line once was done only if WORDS_BIG_ENDIAN,
 254          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 255          meaningful at a much higher level; when structures are copied
 256          between memory and regs, the higher-numbered regs
 257          always get higher addresses.  */
 258       offset += SUBREG_WORD (op0);
 259       /* We used to adjust BITPOS here, but now we do the whole adjustment
 260          right after the loop.  */
 261       op0 = SUBREG_REG (op0);
 262     }
 263
 264   /* Make sure we are playing with integral modes.  Pun with subregs
 265      if we aren't.  */
 266   {
 267     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 268     if (imode != GET_MODE (op0))
 269       {
 270         if (GET_CODE (op0) == MEM)
 271           op0 = change_address (op0, imode, NULL_RTX);
 272         else if (imode != BLKmode)
 273           op0 = gen_lowpart (imode, op0);
 274         else
 275           abort ();
 276       }
 277   }
 278
 279   /* If OP0 is a register, BITPOS must count within a word.
 280      But as we have it, it counts within whatever size OP0 now has.
 281      On a bigendian machine, these are not the same, so convert.  */
 282   if (BYTES_BIG_ENDIAN
 283       && GET_CODE (op0) != MEM
 284       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 285     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 286
 287   value = protect_from_queue (value, 0);
 288
 289   if (flag_force_mem)
 290     value = force_not_mem (value);
 291
 292   /* Note that the adjustment of BITPOS above has no effect on whether
 293      BITPOS is 0 in a REG bigger than a word.  */
 294   if (GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 295       && (GET_CODE (op0) != MEM
 296           || ! SLOW_UNALIGNED_ACCESS
 297           || (offset * BITS_PER_UNIT % bitsize == 0
 298               && align % GET_MODE_SIZE (fieldmode) == 0))
 299       && bitpos == 0 && bitsize == GET_MODE_BITSIZE (fieldmode))
 300     {
 301       /* Storing in a full-word or multi-word field in a register
 302          can be done with just SUBREG.  */
 303       if (GET_MODE (op0) != fieldmode)
 304         {
 305           if (GET_CODE (op0) == SUBREG)
 306             {
 307               if (GET_MODE (SUBREG_REG (op0)) == fieldmode
 308                   || GET_MODE_CLASS (fieldmode) == MODE_INT
 309                   || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT)
 310                 op0 = SUBREG_REG (op0);
 311               else
 312                 /* Else we've got some float mode source being extracted into
 313                    a different float mode destination -- this combination of
 314                    subregs results in Severe Tire Damage.  */
 315                 abort ();
 316             }
 317           if (GET_CODE (op0) == REG)
 318             op0 = gen_rtx_SUBREG (fieldmode, op0, offset);
 319           else
 320             op0 = change_address (op0, fieldmode,
 321                                   plus_constant (XEXP (op0, 0), offset));
 322         }
 323       emit_move_insn (op0, value);
 324       return value;
 325     }
 326
 327   /* Storing an lsb-aligned field in a register
 328      can be done with a movestrict instruction.  */
 329
 330   if (GET_CODE (op0) != MEM
 331       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 332       && bitsize == GET_MODE_BITSIZE (fieldmode)
 333       && (GET_MODE (op0) == fieldmode
 334           || (movstrict_optab->handlers[(int) fieldmode].insn_code
 335               != CODE_FOR_nothing)))
 336     {
 337       /* Get appropriate low part of the value being stored.  */
 338       if (GET_CODE (value) == CONST_INT || GET_CODE (value) == REG)
 339         value = gen_lowpart (fieldmode, value);
 340       else if (!(GET_CODE (value) == SYMBOL_REF
 341                  || GET_CODE (value) == LABEL_REF
 342                  || GET_CODE (value) == CONST))
 343         value = convert_to_mode (fieldmode, value, 0);
 344
 345       if (GET_MODE (op0) == fieldmode)
 346         emit_move_insn (op0, value);
 347       else
 348         {
 349           int icode = movstrict_optab->handlers[(int) fieldmode].insn_code;
 350           if (! (*insn_operand_predicate[icode][1]) (value, fieldmode))
 351             value = copy_to_mode_reg (fieldmode, value);
 352
 353           if (GET_CODE (op0) == SUBREG)
 354             {
 355               if (GET_MODE (SUBREG_REG (op0)) == fieldmode
 356                   || GET_MODE_CLASS (fieldmode) == MODE_INT
 357                   || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT)
 358                 op0 = SUBREG_REG (op0);
 359               else
 360                 /* Else we've got some float mode source being extracted into
 361                    a different float mode destination -- this combination of
 362                    subregs results in Severe Tire Damage.  */
 363                 abort ();
 364             }
 365
 366           emit_insn (GEN_FCN (icode)
 367                    (gen_rtx_SUBREG (fieldmode, op0, offset), value));
 368         }
 369       return value;
 370     }
 371
 372   /* Handle fields bigger than a word.  */
 373
 374   if (bitsize > BITS_PER_WORD)
 375     {
 376       /* Here we transfer the words of the field
 377          in the order least significant first.
 378          This is because the most significant word is the one which may
 379          be less than full.
 380          However, only do that if the value is not BLKmode.  */
 381
 382       int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 383
 384       int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 385       int i;
 386
 387       /* This is the mode we must force value to, so that there will be enough
 388          subwords to extract.  Note that fieldmode will often (always?) be
 389          VOIDmode, because that is what store_field uses to indicate that this
 390          is a bit field, but passing VOIDmode to operand_subword_force will
 391          result in an abort.  */
 392       fieldmode = mode_for_size (nwords * BITS_PER_WORD, MODE_INT, 0);
 393
 394       for (i = 0; i < nwords; i++)
 395         {
 396           /* If I is 0, use the low-order word in both field and target;
 397              if I is 1, use the next to lowest word; and so on.  */
 398           int wordnum = (backwards ? nwords - i - 1 : i);
 399           int bit_offset = (backwards
 400                             ? MAX (bitsize - (i + 1) * BITS_PER_WORD, 0)
 401                             : i * BITS_PER_WORD);
 402           store_bit_field (op0, MIN (BITS_PER_WORD,
 403                                      bitsize - i * BITS_PER_WORD),
 404                            bitnum + bit_offset, word_mode,
 405                            operand_subword_force (value, wordnum,
 406                                                   (GET_MODE (value) == VOIDmode
 407                                                    ? fieldmode
 408                                                    : GET_MODE (value))),
 409                            align, total_size);
 410         }
 411       return value;
 412     }
 413
 414   /* From here on we can assume that the field to be stored in is
 415      a full-word (whatever type that is), since it is shorter than a word.  */
 416
 417   /* OFFSET is the number of words or bytes (UNIT says which)
 418      from STR_RTX to the first word or byte containing part of the field.  */
 419
 420   if (GET_CODE (op0) != MEM)
 421     {
 422       if (offset != 0
 423           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 424         {
 425           if (GET_CODE (op0) != REG)
 426             op0 = copy_to_reg (op0);
 427           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 428                                 op0, offset);
 429         }
 430       offset = 0;
 431     }
 432   else
 433     {
 434       op0 = protect_from_queue (op0, 1);
 435     }
 436
 437   /* If VALUE is a floating-point mode, access it as an integer of the
 438      corresponding size.  This can occur on a machine with 64 bit registers
 439      that uses SFmode for float.  This can also occur for unaligned float
 440      structure fields.  */
 441   if (GET_MODE_CLASS (GET_MODE (value)) == MODE_FLOAT)
 442     {
 443       if (GET_CODE (value) != REG)
 444         value = copy_to_reg (value);
 445       value = gen_rtx_SUBREG (word_mode, value, 0);
 446     }
 447
 448   /* Now OFFSET is nonzero only if OP0 is memory
 449      and is therefore always measured in bytes.  */
 450
 451 #ifdef HAVE_insv
 452   if (HAVE_insv
 453       && GET_MODE (value) != BLKmode
 454       && !(bitsize == 1 && GET_CODE (value) == CONST_INT)
 455       /* Ensure insv's size is wide enough for this field.  */
 456       && (insv_bitsize >= bitsize)
 457       && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 458             && (bitsize + bitpos > insv_bitsize)))
 459     {
 460       int xbitpos = bitpos;
 461       rtx value1;
 462       rtx xop0 = op0;
 463       rtx last = get_last_insn ();
 464       rtx pat;
 465       enum machine_mode maxmode;
 466       int save_volatile_ok = volatile_ok;
 467
 468       maxmode = insn_operand_mode[(int) CODE_FOR_insv][3];
 469       if (maxmode == VOIDmode)
 470         maxmode = word_mode;
 471
 472       volatile_ok = 1;
 473
 474       /* If this machine's insv can only insert into a register, copy OP0
 475          into a register and save it back later.  */
 476       /* This used to check flag_force_mem, but that was a serious
 477          de-optimization now that flag_force_mem is enabled by -O2.  */
 478       if (GET_CODE (op0) == MEM
 479           && ! ((*insn_operand_predicate[(int) CODE_FOR_insv][0])
 480                 (op0, VOIDmode)))
 481         {
 482           rtx tempreg;
 483           enum machine_mode bestmode;
 484
 485           /* Get the mode to use for inserting into this field.  If OP0 is
 486              BLKmode, get the smallest mode consistent with the alignment. If
 487              OP0 is a non-BLKmode object that is no wider than MAXMODE, use its
 488              mode. Otherwise, use the smallest mode containing the field.  */
 489
 490           if (GET_MODE (op0) == BLKmode
 491               || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (maxmode))
 492             bestmode
 493               = get_best_mode (bitsize, bitnum, align * BITS_PER_UNIT, maxmode,
 494                                MEM_VOLATILE_P (op0));
 495           else
 496             bestmode = GET_MODE (op0);
 497
 498           if (bestmode == VOIDmode
 499               || (SLOW_UNALIGNED_ACCESS && GET_MODE_SIZE (bestmode) > align))
 500             goto insv_loses;
 501
 502           /* Adjust address to point to the containing unit of that mode.  */
 503           unit = GET_MODE_BITSIZE (bestmode);
 504           /* Compute offset as multiple of this unit, counting in bytes.  */
 505           offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 506           bitpos = bitnum % unit;
 507           op0 = change_address (op0, bestmode,
 508                                 plus_constant (XEXP (op0, 0), offset));
 509
 510           /* Fetch that unit, store the bitfield in it, then store the unit.  */
 511           tempreg = copy_to_reg (op0);
 512           store_bit_field (tempreg, bitsize, bitpos, fieldmode, value,
 513                            align, total_size);
 514           emit_move_insn (op0, tempreg);
 515           return value;
 516         }
 517       volatile_ok = save_volatile_ok;
 518
 519       /* Add OFFSET into OP0's address.  */
 520       if (GET_CODE (xop0) == MEM)
 521         xop0 = change_address (xop0, byte_mode,
 522                                plus_constant (XEXP (xop0, 0), offset));
 523
 524       /* If xop0 is a register, we need it in MAXMODE
 525          to make it acceptable to the format of insv.  */
 526       if (GET_CODE (xop0) == SUBREG)
 527         /* We can't just change the mode, because this might clobber op0,
 528            and we will need the original value of op0 if insv fails.  */
 529         xop0 = gen_rtx_SUBREG (maxmode, SUBREG_REG (xop0), SUBREG_WORD (xop0));
 530       if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
 531         xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
 532
 533       /* On big-endian machines, we count bits from the most significant.
 534          If the bit field insn does not, we must invert.  */
 535
 536       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 537         xbitpos = unit - bitsize - xbitpos;
 538
 539       /* We have been counting XBITPOS within UNIT.
 540          Count instead within the size of the register.  */
 541       if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
 542         xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
 543
 544       unit = GET_MODE_BITSIZE (maxmode);
 545
 546       /* Convert VALUE to maxmode (which insv insn wants) in VALUE1.  */
 547       value1 = value;
 548       if (GET_MODE (value) != maxmode)
 549         {
 550           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 551             {
 552               /* Optimization: Don't bother really extending VALUE
 553                  if it has all the bits we will actually use.  However,
 554                  if we must narrow it, be sure we do it correctly.  */
 555
 556               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (maxmode))
 557                 {
 558                   /* Avoid making subreg of a subreg, or of a mem.  */
 559                   if (GET_CODE (value1) != REG)
 560                     value1 = copy_to_reg (value1);
 561                   value1 = gen_rtx_SUBREG (maxmode, value1, 0);
 562                 }
 563               else
 564                 value1 = gen_lowpart (maxmode, value1);
 565             }
 566           else if (!CONSTANT_P (value))
 567             /* Parse phase is supposed to make VALUE's data type
 568                match that of the component reference, which is a type
 569                at least as wide as the field; so VALUE should have
 570                a mode that corresponds to that type.  */
 571             abort ();
 572         }
 573
 574       /* If this machine's insv insists on a register,
 575          get VALUE1 into a register.  */
 576       if (! ((*insn_operand_predicate[(int) CODE_FOR_insv][3])
 577              (value1, maxmode)))
 578         value1 = force_reg (maxmode, value1);
 579
 580       pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
 581       if (pat)
 582         emit_insn (pat);
 583       else
 584         {
 585           delete_insns_since (last);
 586           store_fixed_bit_field (op0, offset, bitsize, bitpos, value, align);
 587         }
 588     }
 589   else
 590     insv_loses:
 591 #endif
 592     /* Insv is not available; store using shifts and boolean ops.  */
 593     store_fixed_bit_field (op0, offset, bitsize, bitpos, value, align);
 594   return value;
 595 }
 596 \f
 597 /* Use shifts and boolean operations to store VALUE
 598    into a bit field of width BITSIZE
 599    in a memory location specified by OP0 except offset by OFFSET bytes.
 600      (OFFSET must be 0 if OP0 is a register.)
 601    The field starts at position BITPOS within the byte.
 602     (If OP0 is a register, it may be a full word or a narrower mode,
 603      but BITPOS still counts within a full word,
 604      which is significant on bigendian machines.)
 605    STRUCT_ALIGN is the alignment the structure is known to have (in bytes).
 606
 607    Note that protect_from_queue has already been done on OP0 and VALUE.  */
 608
 609 static void
 610 store_fixed_bit_field (op0, offset, bitsize, bitpos, value, struct_align)
 611      register rtx op0;
 612      register int offset, bitsize, bitpos;
 613      register rtx value;
 614      int struct_align;
 615 {
 616   register enum machine_mode mode;
 617   int total_bits = BITS_PER_WORD;
 618   rtx subtarget, temp;
 619   int all_zero = 0;
 620   int all_one = 0;
 621
 622   if (! SLOW_UNALIGNED_ACCESS)
 623     struct_align = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
 624
 625   /* There is a case not handled here:
 626      a structure with a known alignment of just a halfword
 627      and a field split across two aligned halfwords within the structure.
 628      Or likewise a structure with a known alignment of just a byte
 629      and a field split across two bytes.
 630      Such cases are not supposed to be able to occur.  */
 631
 632   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 633     {
 634       if (offset != 0)
 635         abort ();
 636       /* Special treatment for a bit field split across two registers.  */
 637       if (bitsize + bitpos > BITS_PER_WORD)
 638         {
 639           store_split_bit_field (op0, bitsize, bitpos,
 640                                  value, BITS_PER_WORD);
 641           return;
 642         }
 643     }
 644   else
 645     {
 646       /* Get the proper mode to use for this field.  We want a mode that
 647          includes the entire field.  If such a mode would be larger than
 648          a word, we won't be doing the extraction the normal way.  */
 649
 650       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 651                             struct_align * BITS_PER_UNIT, word_mode,
 652                             GET_CODE (op0) == MEM && MEM_VOLATILE_P (op0));
 653
 654       if (mode == VOIDmode)
 655         {
 656           /* The only way this should occur is if the field spans word
 657              boundaries.  */
 658           store_split_bit_field (op0,
 659                                  bitsize, bitpos + offset * BITS_PER_UNIT,
 660                                  value, struct_align);
 661           return;
 662         }
 663
 664       total_bits = GET_MODE_BITSIZE (mode);
 665
 666       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 667          be in the range 0 to total_bits-1, and put any excess bytes in
 668          OFFSET.  */
 669       if (bitpos >= total_bits)
 670         {
 671           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 672           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 673                      * BITS_PER_UNIT);
 674         }
 675
 676       /* Get ref to an aligned byte, halfword, or word containing the field.
 677          Adjust BITPOS to be position within a word,
 678          and OFFSET to be the offset of that word.
 679          Then alter OP0 to refer to that word.  */
 680       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 681       offset -= (offset % (total_bits / BITS_PER_UNIT));
 682       op0 = change_address (op0, mode,
 683                             plus_constant (XEXP (op0, 0), offset));
 684     }
 685
 686   mode = GET_MODE (op0);
 687
 688   /* Now MODE is either some integral mode for a MEM as OP0,
 689      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 690      The bit field is contained entirely within OP0.
 691      BITPOS is the starting bit number within OP0.
 692      (OP0's mode may actually be narrower than MODE.)  */
 693
 694   if (BYTES_BIG_ENDIAN)
 695       /* BITPOS is the distance between our msb
 696          and that of the containing datum.
 697          Convert it to the distance from the lsb.  */
 698       bitpos = total_bits - bitsize - bitpos;
 699
 700   /* Now BITPOS is always the distance between our lsb
 701      and that of OP0.  */
 702
 703   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 704      we must first convert its mode to MODE.  */
 705
 706   if (GET_CODE (value) == CONST_INT)
 707     {
 708       register HOST_WIDE_INT v = INTVAL (value);
 709
 710       if (bitsize < HOST_BITS_PER_WIDE_INT)
 711         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 712
 713       if (v == 0)
 714         all_zero = 1;
 715       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 716                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 717                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 718         all_one = 1;
 719
 720       value = lshift_value (mode, value, bitpos, bitsize);
 721     }
 722   else
 723     {
 724       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 725                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 726
 727       if (GET_MODE (value) != mode)
 728         {
 729           if ((GET_CODE (value) == REG || GET_CODE (value) == SUBREG)
 730               && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
 731             value = gen_lowpart (mode, value);
 732           else
 733             value = convert_to_mode (mode, value, 1);
 734         }
 735
 736       if (must_and)
 737         value = expand_binop (mode, and_optab, value,
 738                               mask_rtx (mode, 0, bitsize, 0),
 739                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
 740       if (bitpos > 0)
 741         value = expand_shift (LSHIFT_EXPR, mode, value,
 742                               build_int_2 (bitpos, 0), NULL_RTX, 1);
 743     }
 744
 745   /* Now clear the chosen bits in OP0,
 746      except that if VALUE is -1 we need not bother.  */
 747
 748   subtarget = (GET_CODE (op0) == REG || ! flag_force_mem) ? op0 : 0;
 749
 750   if (! all_one)
 751     {
 752       temp = expand_binop (mode, and_optab, op0,
 753                            mask_rtx (mode, bitpos, bitsize, 1),
 754                            subtarget, 1, OPTAB_LIB_WIDEN);
 755       subtarget = temp;
 756     }
 757   else
 758     temp = op0;
 759
 760   /* Now logical-or VALUE into OP0, unless it is zero.  */
 761
 762   if (! all_zero)
 763     temp = expand_binop (mode, ior_optab, temp, value,
 764                          subtarget, 1, OPTAB_LIB_WIDEN);
 765   if (op0 != temp)
 766     emit_move_insn (op0, temp);
 767 }
 768 \f
 769 /* Store a bit field that is split across multiple accessible memory objects.
 770
 771    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
 772    BITSIZE is the field width; BITPOS the position of its first bit
 773    (within the word).
 774    VALUE is the value to store.
 775    ALIGN is the known alignment of OP0, measured in bytes.
 776    This is also the size of the memory objects to be used.
 777
 778    This does not yet handle fields wider than BITS_PER_WORD.  */
 779
 780 static void
 781 store_split_bit_field (op0, bitsize, bitpos, value, align)
 782      rtx op0;
 783      int bitsize, bitpos;
 784      rtx value;
 785      int align;
 786 {
 787   int unit;
 788   int bitsdone = 0;
 789
 790   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
 791      much at a time.  */
 792   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 793     unit = BITS_PER_WORD;
 794   else
 795     unit = MIN (align * BITS_PER_UNIT, BITS_PER_WORD);
 796
 797   /* If VALUE is a constant other than a CONST_INT, get it into a register in
 798      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
 799      that VALUE might be a floating-point constant.  */
 800   if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
 801     {
 802       rtx word = gen_lowpart_common (word_mode, value);
 803
 804       if (word && (value != word))
 805         value = word;
 806       else
 807         value = gen_lowpart_common (word_mode,
 808                                     force_reg (GET_MODE (value) != VOIDmode
 809                                                ? GET_MODE (value)
 810                                                : word_mode, value));
 811     }
 812   else if (GET_CODE (value) == ADDRESSOF)
 813     value = copy_to_reg (value);
 814
 815   while (bitsdone < bitsize)
 816     {
 817       int thissize;
 818       rtx part, word;
 819       int thispos;
 820       int offset;
 821
 822       offset = (bitpos + bitsdone) / unit;
 823       thispos = (bitpos + bitsdone) % unit;
 824
 825       /* THISSIZE must not overrun a word boundary.  Otherwise,
 826          store_fixed_bit_field will call us again, and we will mutually
 827          recurse forever.  */
 828       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
 829       thissize = MIN (thissize, unit - thispos);
 830
 831       if (BYTES_BIG_ENDIAN)
 832         {
 833           int total_bits;
 834
 835           /* We must do an endian conversion exactly the same way as it is
 836              done in extract_bit_field, so that the two calls to
 837              extract_fixed_bit_field will have comparable arguments.  */
 838           if (GET_CODE (value) != MEM || GET_MODE (value) == BLKmode)
 839             total_bits = BITS_PER_WORD;
 840           else
 841             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
 842
 843           /* Fetch successively less significant portions.  */
 844           if (GET_CODE (value) == CONST_INT)
 845             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 846                              >> (bitsize - bitsdone - thissize))
 847                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
 848           else
 849             /* The args are chosen so that the last part includes the
 850                lsb.  Give extract_bit_field the value it needs (with
 851                endianness compensation) to fetch the piece we want.
 852
 853                ??? We have no idea what the alignment of VALUE is, so
 854                we have to use a guess.  */
 855             part
 856               = extract_fixed_bit_field
 857                 (word_mode, value, 0, thissize,
 858                  total_bits - bitsize + bitsdone, NULL_RTX, 1,
 859                  GET_MODE (value) == VOIDmode
 860                  ? UNITS_PER_WORD
 861                  : (GET_MODE (value) == BLKmode
 862                     ? 1
 863                     : GET_MODE_ALIGNMENT (GET_MODE (value)) / BITS_PER_UNIT));
 864         }
 865       else
 866         {
 867           /* Fetch successively more significant portions.  */
 868           if (GET_CODE (value) == CONST_INT)
 869             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 870                              >> bitsdone)
 871                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
 872           else
 873             part
 874               = extract_fixed_bit_field
 875                 (word_mode, value, 0, thissize, bitsdone, NULL_RTX, 1,
 876                  GET_MODE (value) == VOIDmode
 877                  ? UNITS_PER_WORD
 878                  : (GET_MODE (value) == BLKmode
 879                     ? 1
 880                     : GET_MODE_ALIGNMENT (GET_MODE (value)) / BITS_PER_UNIT));
 881         }
 882
 883       /* If OP0 is a register, then handle OFFSET here.
 884
 885          When handling multiword bitfields, extract_bit_field may pass
 886          down a word_mode SUBREG of a larger REG for a bitfield that actually
 887          crosses a word boundary.  Thus, for a SUBREG, we must find
 888          the current word starting from the base register.  */
 889       if (GET_CODE (op0) == SUBREG)
 890         {
 891           word = operand_subword_force (SUBREG_REG (op0),
 892                                         SUBREG_WORD (op0) + offset,
 893                                         GET_MODE (SUBREG_REG (op0)));
 894           offset = 0;
 895         }
 896       else if (GET_CODE (op0) == REG)
 897         {
 898           word = operand_subword_force (op0, offset, GET_MODE (op0));
 899           offset = 0;
 900         }
 901       else
 902         word = op0;
 903
 904       /* OFFSET is in UNITs, and UNIT is in bits.
 905          store_fixed_bit_field wants offset in bytes.  */
 906       store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT,
 907                              thissize, thispos, part, align);
 908       bitsdone += thissize;
 909     }
 910 }
 911 \f
 912 /* Generate code to extract a byte-field from STR_RTX
 913    containing BITSIZE bits, starting at BITNUM,
 914    and put it in TARGET if possible (if TARGET is nonzero).
 915    Regardless of TARGET, we return the rtx for where the value is placed.
 916    It may be a QUEUED.
 917
 918    STR_RTX is the structure containing the byte (a REG or MEM).
 919    UNSIGNEDP is nonzero if this is an unsigned bit field.
 920    MODE is the natural mode of the field value once extracted.
 921    TMODE is the mode the caller would like the value to have;
 922    but the value may be returned with type MODE instead.
 923
 924    ALIGN is the alignment that STR_RTX is known to have, measured in bytes.
 925    TOTAL_SIZE is the size in bytes of the containing structure,
 926    or -1 if varying.
 927
 928    If a TARGET is specified and we can store in it at no extra cost,
 929    we do so, and return TARGET.
 930    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
 931    if they are equally easy.  */
 932
 933 rtx
 934 extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
 935                    target, mode, tmode, align, total_size)
 936      rtx str_rtx;
 937      register int bitsize;
 938      int bitnum;
 939      int unsignedp;
 940      rtx target;
 941      enum machine_mode mode, tmode;
 942      int align;
 943      int total_size;
 944 {
 945   int unit = (GET_CODE (str_rtx) == MEM) ? BITS_PER_UNIT : BITS_PER_WORD;
 946   register int offset = bitnum / unit;
 947   register int bitpos = bitnum % unit;
 948   register rtx op0 = str_rtx;
 949   rtx spec_target = target;
 950   rtx spec_target_subreg = 0;
 951 #ifdef HAVE_extv
 952   int extv_bitsize;
 953 #endif
 954 #ifdef HAVE_extzv
 955   int extzv_bitsize;
 956 #endif
 957
 958 #ifdef HAVE_extv
 959   if (insn_operand_mode[(int) CODE_FOR_extv][0] == VOIDmode)
 960     extv_bitsize = GET_MODE_BITSIZE (word_mode);
 961   else
 962     extv_bitsize = GET_MODE_BITSIZE (insn_operand_mode[(int) CODE_FOR_extv][0]);
 963 #endif
 964
 965 #ifdef HAVE_extzv
 966   if (insn_operand_mode[(int) CODE_FOR_extzv][0] == VOIDmode)
 967     extzv_bitsize = GET_MODE_BITSIZE (word_mode);
 968   else
 969     extzv_bitsize
 970       = GET_MODE_BITSIZE (insn_operand_mode[(int) CODE_FOR_extzv][0]);
 971 #endif
 972
 973   /* Discount the part of the structure before the desired byte.
 974      We need to know how many bytes are safe to reference after it.  */
 975   if (total_size >= 0)
 976     total_size -= (bitpos / BIGGEST_ALIGNMENT
 977                    * (BIGGEST_ALIGNMENT / BITS_PER_UNIT));
 978
 979   if (tmode == VOIDmode)
 980     tmode = mode;
 981   while (GET_CODE (op0) == SUBREG)
 982     {
 983       int outer_size = GET_MODE_BITSIZE (GET_MODE (op0));
 984       int inner_size = GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op0)));
 985
 986       offset += SUBREG_WORD (op0);
 987
 988       inner_size = MIN (inner_size, BITS_PER_WORD);
 989
 990       if (BYTES_BIG_ENDIAN && (outer_size < inner_size))
 991         {
 992           bitpos += inner_size - outer_size;
 993           if (bitpos > unit)
 994             {
 995               offset += (bitpos / unit);
 996               bitpos %= unit;
 997             }
 998         }
 999
1000       op0 = SUBREG_REG (op0);
1001     }
1002
1003   /* Make sure we are playing with integral modes.  Pun with subregs
1004      if we aren't.  */
1005   {
1006     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1007     if (imode != GET_MODE (op0))
1008       {
1009         if (GET_CODE (op0) == MEM)
1010           op0 = change_address (op0, imode, NULL_RTX);
1011         else if (imode != BLKmode)
1012           op0 = gen_lowpart (imode, op0);
1013         else
1014           abort ();
1015       }
1016   }
1017
1018   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1019      If that's wrong, the solution is to test for it and set TARGET to 0
1020      if needed.  */
1021
1022   /* If OP0 is a register, BITPOS must count within a word.
1023      But as we have it, it counts within whatever size OP0 now has.
1024      On a bigendian machine, these are not the same, so convert.  */
1025   if (BYTES_BIG_ENDIAN
1026       && GET_CODE (op0) != MEM
1027       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1028     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1029
1030   /* Extracting a full-word or multi-word value
1031      from a structure in a register or aligned memory.
1032      This can be done with just SUBREG.
1033      So too extracting a subword value in
1034      the least significant part of the register.  */
1035
1036   if (((GET_CODE (op0) != MEM
1037         && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
1038                                   GET_MODE_BITSIZE (GET_MODE (op0))))
1039        || (GET_CODE (op0) == MEM
1040            && (! SLOW_UNALIGNED_ACCESS
1041                || (offset * BITS_PER_UNIT % bitsize == 0
1042                    && align * BITS_PER_UNIT % bitsize == 0))))
1043       && ((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1044            && bitpos % BITS_PER_WORD == 0)
1045           || (mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0) != BLKmode
1046               /* ??? The big endian test here is wrong.  This is correct
1047                  if the value is in a register, and if mode_for_size is not
1048                  the same mode as op0.  This causes us to get unnecessarily
1049                  inefficient code from the Thumb port when -mbig-endian.  */
1050               && (BYTES_BIG_ENDIAN
1051                   ? bitpos + bitsize == BITS_PER_WORD
1052                   : bitpos == 0))))
1053     {
1054       enum machine_mode mode1
1055         = mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0);
1056
1057       if (mode1 != GET_MODE (op0))
1058         {
1059           if (GET_CODE (op0) == SUBREG)
1060             {
1061               if (GET_MODE (SUBREG_REG (op0)) == mode1
1062                   || GET_MODE_CLASS (mode1) == MODE_INT
1063                   || GET_MODE_CLASS (mode1) == MODE_PARTIAL_INT)
1064                 op0 = SUBREG_REG (op0);
1065               else
1066                 /* Else we've got some float mode source being extracted into
1067                    a different float mode destination -- this combination of
1068                    subregs results in Severe Tire Damage.  */
1069                 abort ();
1070             }
1071           if (GET_CODE (op0) == REG)
1072             op0 = gen_rtx_SUBREG (mode1, op0, offset);
1073           else
1074             op0 = change_address (op0, mode1,
1075                                   plus_constant (XEXP (op0, 0), offset));
1076         }
1077       if (mode1 != mode)
1078         return convert_to_mode (tmode, op0, unsignedp);
1079       return op0;
1080     }
1081
1082   /* Handle fields bigger than a word.  */
1083
1084   if (bitsize > BITS_PER_WORD)
1085     {
1086       /* Here we transfer the words of the field
1087          in the order least significant first.
1088          This is because the most significant word is the one which may
1089          be less than full.  */
1090
1091       int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1092       int i;
1093
1094       if (target == 0 || GET_CODE (target) != REG)
1095         target = gen_reg_rtx (mode);
1096
1097       /* Indicate for flow that the entire target reg is being set.  */
1098       emit_insn (gen_rtx_CLOBBER (VOIDmode, target));
1099
1100       for (i = 0; i < nwords; i++)
1101         {
1102           /* If I is 0, use the low-order word in both field and target;
1103              if I is 1, use the next to lowest word; and so on.  */
1104           /* Word number in TARGET to use.  */
1105           int wordnum = (WORDS_BIG_ENDIAN
1106                          ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1107                          : i);
1108           /* Offset from start of field in OP0.  */
1109           int bit_offset = (WORDS_BIG_ENDIAN
1110                             ? MAX (0, bitsize - (i + 1) * BITS_PER_WORD)
1111                             : i * BITS_PER_WORD);
1112           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1113           rtx result_part
1114             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1115                                            bitsize - i * BITS_PER_WORD),
1116                                  bitnum + bit_offset,
1117                                  1, target_part, mode, word_mode,
1118                                  align, total_size);
1119
1120           if (target_part == 0)
1121             abort ();
1122
1123           if (result_part != target_part)
1124             emit_move_insn (target_part, result_part);
1125         }
1126
1127       if (unsignedp)
1128         {
1129           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1130              need to be zero'd out.  */
1131           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1132             {
1133               int i,total_words;
1134
1135               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1136               for (i = nwords; i < total_words; i++)
1137                 {
1138                   int wordnum = WORDS_BIG_ENDIAN ? total_words - i - 1 : i;
1139                   rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1140                   emit_move_insn (target_part, const0_rtx);
1141                 }
1142             }
1143           return target;
1144         }
1145
1146       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1147       target = expand_shift (LSHIFT_EXPR, mode, target,
1148                              build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1149                              NULL_RTX, 0);
1150       return expand_shift (RSHIFT_EXPR, mode, target,
1151                            build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1152                            NULL_RTX, 0);
1153     }
1154
1155   /* From here on we know the desired field is smaller than a word
1156      so we can assume it is an integer.  So we can safely extract it as one
1157      size of integer, if necessary, and then truncate or extend
1158      to the size that is wanted.  */
1159
1160   /* OFFSET is the number of words or bytes (UNIT says which)
1161      from STR_RTX to the first word or byte containing part of the field.  */
1162
1163   if (GET_CODE (op0) != MEM)
1164     {
1165       if (offset != 0
1166           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1167         {
1168           if (GET_CODE (op0) != REG)
1169             op0 = copy_to_reg (op0);
1170           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1171                                 op0, offset);
1172         }
1173       offset = 0;
1174     }
1175   else
1176     {
1177       op0 = protect_from_queue (str_rtx, 1);
1178     }
1179
1180   /* Now OFFSET is nonzero only for memory operands.  */
1181
1182   if (unsignedp)
1183     {
1184 #ifdef HAVE_extzv
1185       if (HAVE_extzv
1186           && (extzv_bitsize >= bitsize)
1187           && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1188                 && (bitsize + bitpos > extzv_bitsize)))
1189         {
1190           int xbitpos = bitpos, xoffset = offset;
1191           rtx bitsize_rtx, bitpos_rtx;
1192           rtx last = get_last_insn ();
1193           rtx xop0 = op0;
1194           rtx xtarget = target;
1195           rtx xspec_target = spec_target;
1196           rtx xspec_target_subreg = spec_target_subreg;
1197           rtx pat;
1198           enum machine_mode maxmode;
1199
1200           maxmode = insn_operand_mode[(int) CODE_FOR_extzv][0];
1201           if (maxmode == VOIDmode)
1202             maxmode = word_mode;
1203
1204           if (GET_CODE (xop0) == MEM)
1205             {
1206               int save_volatile_ok = volatile_ok;
1207               volatile_ok = 1;
1208
1209               /* Is the memory operand acceptable?  */
1210               if (! ((*insn_operand_predicate[(int) CODE_FOR_extzv][1])
1211                      (xop0, GET_MODE (xop0))))
1212                 {
1213                   /* No, load into a reg and extract from there.  */
1214                   enum machine_mode bestmode;
1215
1216                   /* Get the mode to use for inserting into this field.  If
1217                      OP0 is BLKmode, get the smallest mode consistent with the
1218                      alignment. If OP0 is a non-BLKmode object that is no
1219                      wider than MAXMODE, use its mode. Otherwise, use the
1220                      smallest mode containing the field.  */
1221
1222                   if (GET_MODE (xop0) == BLKmode
1223                       || (GET_MODE_SIZE (GET_MODE (op0))
1224                           > GET_MODE_SIZE (maxmode)))
1225                     bestmode = get_best_mode (bitsize, bitnum,
1226                                               align * BITS_PER_UNIT, maxmode,
1227                                               MEM_VOLATILE_P (xop0));
1228                   else
1229                     bestmode = GET_MODE (xop0);
1230
1231                   if (bestmode == VOIDmode
1232                       || (SLOW_UNALIGNED_ACCESS && GET_MODE_SIZE (bestmode) > align))
1233                     goto extzv_loses;
1234
1235                   /* Compute offset as multiple of this unit,
1236                      counting in bytes.  */
1237                   unit = GET_MODE_BITSIZE (bestmode);
1238                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1239                   xbitpos = bitnum % unit;
1240                   xop0 = change_address (xop0, bestmode,
1241                                          plus_constant (XEXP (xop0, 0),
1242                                                         xoffset));
1243                   /* Fetch it to a register in that size.  */
1244                   xop0 = force_reg (bestmode, xop0);
1245
1246                   /* XBITPOS counts within UNIT, which is what is expected.  */
1247                 }
1248               else
1249                 /* Get ref to first byte containing part of the field.  */
1250                 xop0 = change_address (xop0, byte_mode,
1251                                        plus_constant (XEXP (xop0, 0), xoffset));
1252
1253               volatile_ok = save_volatile_ok;
1254             }
1255
1256           /* If op0 is a register, we need it in MAXMODE (which is usually
1257              SImode). to make it acceptable to the format of extzv.  */
1258           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1259             goto extzv_loses;
1260           if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
1261             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1262
1263           /* On big-endian machines, we count bits from the most significant.
1264              If the bit field insn does not, we must invert.  */
1265           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1266             xbitpos = unit - bitsize - xbitpos;
1267
1268           /* Now convert from counting within UNIT to counting in MAXMODE.  */
1269           if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
1270             xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
1271
1272           unit = GET_MODE_BITSIZE (maxmode);
1273
1274           if (xtarget == 0
1275               || (flag_force_mem && GET_CODE (xtarget) == MEM))
1276             xtarget = xspec_target = gen_reg_rtx (tmode);
1277
1278           if (GET_MODE (xtarget) != maxmode)
1279             {
1280               if (GET_CODE (xtarget) == REG)
1281                 {
1282                   int wider = (GET_MODE_SIZE (maxmode)
1283                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1284                   xtarget = gen_lowpart (maxmode, xtarget);
1285                   if (wider)
1286                     xspec_target_subreg = xtarget;
1287                 }
1288               else
1289                 xtarget = gen_reg_rtx (maxmode);
1290             }
1291
1292           /* If this machine's extzv insists on a register target,
1293              make sure we have one.  */
1294           if (! ((*insn_operand_predicate[(int) CODE_FOR_extzv][0])
1295                  (xtarget, maxmode)))
1296             xtarget = gen_reg_rtx (maxmode);
1297
1298           bitsize_rtx = GEN_INT (bitsize);
1299           bitpos_rtx = GEN_INT (xbitpos);
1300
1301           pat = gen_extzv (protect_from_queue (xtarget, 1),
1302                            xop0, bitsize_rtx, bitpos_rtx);
1303           if (pat)
1304             {
1305               emit_insn (pat);
1306               target = xtarget;
1307               spec_target = xspec_target;
1308               spec_target_subreg = xspec_target_subreg;
1309             }
1310           else
1311             {
1312               delete_insns_since (last);
1313               target = extract_fixed_bit_field (tmode, op0, offset, bitsize,
1314                                                 bitpos, target, 1, align);
1315             }
1316         }
1317       else
1318         extzv_loses:
1319 #endif
1320         target = extract_fixed_bit_field (tmode, op0, offset, bitsize, bitpos,
1321                                           target, 1, align);
1322     }
1323   else
1324     {
1325 #ifdef HAVE_extv
1326       if (HAVE_extv
1327           && (extv_bitsize >= bitsize)
1328           && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1329                 && (bitsize + bitpos > extv_bitsize)))
1330         {
1331           int xbitpos = bitpos, xoffset = offset;
1332           rtx bitsize_rtx, bitpos_rtx;
1333           rtx last = get_last_insn ();
1334           rtx xop0 = op0, xtarget = target;
1335           rtx xspec_target = spec_target;
1336           rtx xspec_target_subreg = spec_target_subreg;
1337           rtx pat;
1338           enum machine_mode maxmode;
1339
1340           maxmode = insn_operand_mode[(int) CODE_FOR_extv][0];
1341           if (maxmode == VOIDmode)
1342             maxmode = word_mode;
1343
1344           if (GET_CODE (xop0) == MEM)
1345             {
1346               /* Is the memory operand acceptable?  */
1347               if (! ((*insn_operand_predicate[(int) CODE_FOR_extv][1])
1348                      (xop0, GET_MODE (xop0))))
1349                 {
1350                   /* No, load into a reg and extract from there.  */
1351                   enum machine_mode bestmode;
1352
1353                   /* Get the mode to use for inserting into this field.  If
1354                      OP0 is BLKmode, get the smallest mode consistent with the
1355                      alignment. If OP0 is a non-BLKmode object that is no
1356                      wider than MAXMODE, use its mode. Otherwise, use the
1357                      smallest mode containing the field.  */
1358
1359                   if (GET_MODE (xop0) == BLKmode
1360                       || (GET_MODE_SIZE (GET_MODE (op0))
1361                           > GET_MODE_SIZE (maxmode)))
1362                     bestmode = get_best_mode (bitsize, bitnum,
1363                                               align * BITS_PER_UNIT, maxmode,
1364                                               MEM_VOLATILE_P (xop0));
1365                   else
1366                     bestmode = GET_MODE (xop0);
1367
1368                   if (bestmode == VOIDmode
1369                       || (SLOW_UNALIGNED_ACCESS && GET_MODE_SIZE (bestmode) > align))
1370                     goto extv_loses;
1371
1372                   /* Compute offset as multiple of this unit,
1373                      counting in bytes.  */
1374                   unit = GET_MODE_BITSIZE (bestmode);
1375                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1376                   xbitpos = bitnum % unit;
1377                   xop0 = change_address (xop0, bestmode,
1378                                          plus_constant (XEXP (xop0, 0),
1379                                                         xoffset));
1380                   /* Fetch it to a register in that size.  */
1381                   xop0 = force_reg (bestmode, xop0);
1382
1383                   /* XBITPOS counts within UNIT, which is what is expected.  */
1384                 }
1385               else
1386                 /* Get ref to first byte containing part of the field.  */
1387                 xop0 = change_address (xop0, byte_mode,
1388                                        plus_constant (XEXP (xop0, 0), xoffset));
1389             }
1390
1391           /* If op0 is a register, we need it in MAXMODE (which is usually
1392              SImode) to make it acceptable to the format of extv.  */
1393           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1394             goto extv_loses;
1395           if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
1396             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1397
1398           /* On big-endian machines, we count bits from the most significant.
1399              If the bit field insn does not, we must invert.  */
1400           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1401             xbitpos = unit - bitsize - xbitpos;
1402
1403           /* XBITPOS counts within a size of UNIT.
1404              Adjust to count within a size of MAXMODE.  */
1405           if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
1406             xbitpos += (GET_MODE_BITSIZE (maxmode) - unit);
1407
1408           unit = GET_MODE_BITSIZE (maxmode);
1409
1410           if (xtarget == 0
1411               || (flag_force_mem && GET_CODE (xtarget) == MEM))
1412             xtarget = xspec_target = gen_reg_rtx (tmode);
1413
1414           if (GET_MODE (xtarget) != maxmode)
1415             {
1416               if (GET_CODE (xtarget) == REG)
1417                 {
1418                   int wider = (GET_MODE_SIZE (maxmode)
1419                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1420                   xtarget = gen_lowpart (maxmode, xtarget);
1421                   if (wider)
1422                     xspec_target_subreg = xtarget;
1423                 }
1424               else
1425                 xtarget = gen_reg_rtx (maxmode);
1426             }
1427
1428           /* If this machine's extv insists on a register target,
1429              make sure we have one.  */
1430           if (! ((*insn_operand_predicate[(int) CODE_FOR_extv][0])
1431                  (xtarget, maxmode)))
1432             xtarget = gen_reg_rtx (maxmode);
1433
1434           bitsize_rtx = GEN_INT (bitsize);
1435           bitpos_rtx = GEN_INT (xbitpos);
1436
1437           pat = gen_extv (protect_from_queue (xtarget, 1),
1438                           xop0, bitsize_rtx, bitpos_rtx);
1439           if (pat)
1440             {
1441               emit_insn (pat);
1442               target = xtarget;
1443               spec_target = xspec_target;
1444               spec_target_subreg = xspec_target_subreg;
1445             }
1446           else
1447             {
1448               delete_insns_since (last);
1449               target = extract_fixed_bit_field (tmode, op0, offset, bitsize,
1450                                                 bitpos, target, 0, align);
1451             }
1452         }
1453       else
1454         extv_loses:
1455 #endif
1456         target = extract_fixed_bit_field (tmode, op0, offset, bitsize, bitpos,
1457                                           target, 0, align);
1458     }
1459   if (target == spec_target)
1460     return target;
1461   if (target == spec_target_subreg)
1462     return spec_target;
1463   if (GET_MODE (target) != tmode && GET_MODE (target) != mode)
1464     {
1465       /* If the target mode is floating-point, first convert to the
1466          integer mode of that size and then access it as a floating-point
1467          value via a SUBREG.  */
1468       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1469         {
1470           target = convert_to_mode (mode_for_size (GET_MODE_BITSIZE (tmode),
1471                                                    MODE_INT, 0),
1472                                     target, unsignedp);
1473           if (GET_CODE (target) != REG)
1474             target = copy_to_reg (target);
1475           return gen_rtx_SUBREG (tmode, target, 0);
1476         }
1477       else
1478         return convert_to_mode (tmode, target, unsignedp);
1479     }
1480   return target;
1481 }
1482 \f
1483 /* Extract a bit field using shifts and boolean operations
1484    Returns an rtx to represent the value.
1485    OP0 addresses a register (word) or memory (byte).
1486    BITPOS says which bit within the word or byte the bit field starts in.
1487    OFFSET says how many bytes farther the bit field starts;
1488     it is 0 if OP0 is a register.
1489    BITSIZE says how many bits long the bit field is.
1490     (If OP0 is a register, it may be narrower than a full word,
1491      but BITPOS still counts within a full word,
1492      which is significant on bigendian machines.)
1493
1494    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1495    If TARGET is nonzero, attempts to store the value there
1496    and return TARGET, but this is not guaranteed.
1497    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.
1498
1499    ALIGN is the alignment that STR_RTX is known to have, measured in bytes.  */
1500
1501 static rtx
1502 extract_fixed_bit_field (tmode, op0, offset, bitsize, bitpos,
1503                          target, unsignedp, align)
1504      enum machine_mode tmode;
1505      register rtx op0, target;
1506      register int offset, bitsize, bitpos;
1507      int unsignedp;
1508      int align;
1509 {
1510   int total_bits = BITS_PER_WORD;
1511   enum machine_mode mode;
1512
1513   if (GET_CODE (op0) == SUBREG || GET_CODE (op0) == REG)
1514     {
1515       /* Special treatment for a bit field split across two registers.  */
1516       if (bitsize + bitpos > BITS_PER_WORD)
1517         return extract_split_bit_field (op0, bitsize, bitpos,
1518                                         unsignedp, align);
1519     }
1520   else
1521     {
1522       /* Get the proper mode to use for this field.  We want a mode that
1523          includes the entire field.  If such a mode would be larger than
1524          a word, we won't be doing the extraction the normal way.  */
1525
1526       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1527                             align * BITS_PER_UNIT, word_mode,
1528                             GET_CODE (op0) == MEM && MEM_VOLATILE_P (op0));
1529
1530       if (mode == VOIDmode)
1531         /* The only way this should occur is if the field spans word
1532            boundaries.  */
1533         return extract_split_bit_field (op0, bitsize,
1534                                         bitpos + offset * BITS_PER_UNIT,
1535                                         unsignedp, align);
1536
1537       total_bits = GET_MODE_BITSIZE (mode);
1538
1539       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1540          be in the range 0 to total_bits-1, and put any excess bytes in
1541          OFFSET.  */
1542       if (bitpos >= total_bits)
1543         {
1544           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1545           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1546                      * BITS_PER_UNIT);
1547         }
1548
1549       /* Get ref to an aligned byte, halfword, or word containing the field.
1550          Adjust BITPOS to be position within a word,
1551          and OFFSET to be the offset of that word.
1552          Then alter OP0 to refer to that word.  */
1553       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1554       offset -= (offset % (total_bits / BITS_PER_UNIT));
1555       op0 = change_address (op0, mode,
1556                             plus_constant (XEXP (op0, 0), offset));
1557     }
1558
1559   mode = GET_MODE (op0);
1560
1561   if (BYTES_BIG_ENDIAN)
1562     {
1563       /* BITPOS is the distance between our msb and that of OP0.
1564          Convert it to the distance from the lsb.  */
1565
1566       bitpos = total_bits - bitsize - bitpos;
1567     }
1568
1569   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1570      We have reduced the big-endian case to the little-endian case.  */
1571
1572   if (unsignedp)
1573     {
1574       if (bitpos)
1575         {
1576           /* If the field does not already start at the lsb,
1577              shift it so it does.  */
1578           tree amount = build_int_2 (bitpos, 0);
1579           /* Maybe propagate the target for the shift.  */
1580           /* But not if we will return it--could confuse integrate.c.  */
1581           rtx subtarget = (target != 0 && GET_CODE (target) == REG
1582                            && !REG_FUNCTION_VALUE_P (target)
1583                            ? target : 0);
1584           if (tmode != mode) subtarget = 0;
1585           op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1586         }
1587       /* Convert the value to the desired mode.  */
1588       if (mode != tmode)
1589         op0 = convert_to_mode (tmode, op0, 1);
1590
1591       /* Unless the msb of the field used to be the msb when we shifted,
1592          mask out the upper bits.  */
1593
1594       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize
1595 #if 0
1596 #ifdef SLOW_ZERO_EXTEND
1597           /* Always generate an `and' if
1598              we just zero-extended op0 and SLOW_ZERO_EXTEND, since it
1599              will combine fruitfully with the zero-extend.  */
1600           || tmode != mode
1601 #endif
1602 #endif
1603           )
1604         return expand_binop (GET_MODE (op0), and_optab, op0,
1605                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1606                              target, 1, OPTAB_LIB_WIDEN);
1607       return op0;
1608     }
1609
1610   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1611      then arithmetic-shift its lsb to the lsb of the word.  */
1612   op0 = force_reg (mode, op0);
1613   if (mode != tmode)
1614     target = 0;
1615
1616   /* Find the narrowest integer mode that contains the field.  */
1617
1618   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1619        mode = GET_MODE_WIDER_MODE (mode))
1620     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1621       {
1622         op0 = convert_to_mode (mode, op0, 0);
1623         break;
1624       }
1625
1626   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1627     {
1628       tree amount = build_int_2 (GET_MODE_BITSIZE (mode) - (bitsize + bitpos), 0);
1629       /* Maybe propagate the target for the shift.  */
1630       /* But not if we will return the result--could confuse integrate.c.  */
1631       rtx subtarget = (target != 0 && GET_CODE (target) == REG
1632                        && ! REG_FUNCTION_VALUE_P (target)
1633                        ? target : 0);
1634       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1635     }
1636
1637   return expand_shift (RSHIFT_EXPR, mode, op0,
1638                        build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1639                        target, 0);
1640 }
1641 \f
1642 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1643    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1644    complement of that if COMPLEMENT.  The mask is truncated if
1645    necessary to the width of mode MODE.  The mask is zero-extended if
1646    BITSIZE+BITPOS is too small for MODE.  */
1647
1648 static rtx
1649 mask_rtx (mode, bitpos, bitsize, complement)
1650      enum machine_mode mode;
1651      int bitpos, bitsize, complement;
1652 {
1653   HOST_WIDE_INT masklow, maskhigh;
1654
1655   if (bitpos < HOST_BITS_PER_WIDE_INT)
1656     masklow = (HOST_WIDE_INT) -1 << bitpos;
1657   else
1658     masklow = 0;
1659
1660   if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1661     masklow &= ((unsigned HOST_WIDE_INT) -1
1662                 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1663
1664   if (bitpos <= HOST_BITS_PER_WIDE_INT)
1665     maskhigh = -1;
1666   else
1667     maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1668
1669   if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1670     maskhigh &= ((unsigned HOST_WIDE_INT) -1
1671                  >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1672   else
1673     maskhigh = 0;
1674
1675   if (complement)
1676     {
1677       maskhigh = ~maskhigh;
1678       masklow = ~masklow;
1679     }
1680
1681   return immed_double_const (masklow, maskhigh, mode);
1682 }
1683
1684 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1685    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1686
1687 static rtx
1688 lshift_value (mode, value, bitpos, bitsize)
1689      enum machine_mode mode;
1690      rtx value;
1691      int bitpos, bitsize;
1692 {
1693   unsigned HOST_WIDE_INT v = INTVAL (value);
1694   HOST_WIDE_INT low, high;
1695
1696   if (bitsize < HOST_BITS_PER_WIDE_INT)
1697     v &= ~((HOST_WIDE_INT) -1 << bitsize);
1698
1699   if (bitpos < HOST_BITS_PER_WIDE_INT)
1700     {
1701       low = v << bitpos;
1702       high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1703     }
1704   else
1705     {
1706       low = 0;
1707       high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1708     }
1709
1710   return immed_double_const (low, high, mode);
1711 }
1712 \f
1713 /* Extract a bit field that is split across two words
1714    and return an RTX for the result.
1715
1716    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1717    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1718    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
1719
1720    ALIGN is the known alignment of OP0, measured in bytes.
1721    This is also the size of the memory objects to be used.  */
1722
1723 static rtx
1724 extract_split_bit_field (op0, bitsize, bitpos, unsignedp, align)
1725      rtx op0;
1726      int bitsize, bitpos, unsignedp, align;
1727 {
1728   int unit;
1729   int bitsdone = 0;
1730   rtx result = NULL_RTX;
1731   int first = 1;
1732
1733   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1734      much at a time.  */
1735   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1736     unit = BITS_PER_WORD;
1737   else
1738     unit = MIN (align * BITS_PER_UNIT, BITS_PER_WORD);
1739
1740   while (bitsdone < bitsize)
1741     {
1742       int thissize;
1743       rtx part, word;
1744       int thispos;
1745       int offset;
1746
1747       offset = (bitpos + bitsdone) / unit;
1748       thispos = (bitpos + bitsdone) % unit;
1749
1750       /* THISSIZE must not overrun a word boundary.  Otherwise,
1751          extract_fixed_bit_field will call us again, and we will mutually
1752          recurse forever.  */
1753       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1754       thissize = MIN (thissize, unit - thispos);
1755
1756       /* If OP0 is a register, then handle OFFSET here.
1757
1758          When handling multiword bitfields, extract_bit_field may pass
1759          down a word_mode SUBREG of a larger REG for a bitfield that actually
1760          crosses a word boundary.  Thus, for a SUBREG, we must find
1761          the current word starting from the base register.  */
1762       if (GET_CODE (op0) == SUBREG)
1763         {
1764           word = operand_subword_force (SUBREG_REG (op0),
1765                                         SUBREG_WORD (op0) + offset,
1766                                         GET_MODE (SUBREG_REG (op0)));
1767           offset = 0;
1768         }
1769       else if (GET_CODE (op0) == REG)
1770         {
1771           word = operand_subword_force (op0, offset, GET_MODE (op0));
1772           offset = 0;
1773         }
1774       else
1775         word = op0;
1776
1777       /* Extract the parts in bit-counting order,
1778          whose meaning is determined by BYTES_PER_UNIT.
1779          OFFSET is in UNITs, and UNIT is in bits.
1780          extract_fixed_bit_field wants offset in bytes.  */
1781       part = extract_fixed_bit_field (word_mode, word,
1782                                       offset * unit / BITS_PER_UNIT,
1783                                       thissize, thispos, 0, 1, align);
1784       bitsdone += thissize;
1785
1786       /* Shift this part into place for the result.  */
1787       if (BYTES_BIG_ENDIAN)
1788         {
1789           if (bitsize != bitsdone)
1790             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1791                                  build_int_2 (bitsize - bitsdone, 0), 0, 1);
1792         }
1793       else
1794         {
1795           if (bitsdone != thissize)
1796             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1797                                  build_int_2 (bitsdone - thissize, 0), 0, 1);
1798         }
1799
1800       if (first)
1801         result = part;
1802       else
1803         /* Combine the parts with bitwise or.  This works
1804            because we extracted each part as an unsigned bit field.  */
1805         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1806                                OPTAB_LIB_WIDEN);
1807
1808       first = 0;
1809     }
1810
1811   /* Unsigned bit field: we are done.  */
1812   if (unsignedp)
1813     return result;
1814   /* Signed bit field: sign-extend with two arithmetic shifts.  */
1815   result = expand_shift (LSHIFT_EXPR, word_mode, result,
1816                          build_int_2 (BITS_PER_WORD - bitsize, 0),
1817                          NULL_RTX, 0);
1818   return expand_shift (RSHIFT_EXPR, word_mode, result,
1819                        build_int_2 (BITS_PER_WORD - bitsize, 0), NULL_RTX, 0);
1820 }
1821 \f
1822 /* Add INC into TARGET.  */
1823
1824 void
1825 expand_inc (target, inc)
1826      rtx target, inc;
1827 {
1828   rtx value = expand_binop (GET_MODE (target), add_optab,
1829                             target, inc,
1830                             target, 0, OPTAB_LIB_WIDEN);
1831   if (value != target)
1832     emit_move_insn (target, value);
1833 }
1834
1835 /* Subtract DEC from TARGET.  */
1836
1837 void
1838 expand_dec (target, dec)
1839      rtx target, dec;
1840 {
1841   rtx value = expand_binop (GET_MODE (target), sub_optab,
1842                             target, dec,
1843                             target, 0, OPTAB_LIB_WIDEN);
1844   if (value != target)
1845     emit_move_insn (target, value);
1846 }
1847 \f
1848 /* Output a shift instruction for expression code CODE,
1849    with SHIFTED being the rtx for the value to shift,
1850    and AMOUNT the tree for the amount to shift by.
1851    Store the result in the rtx TARGET, if that is convenient.
1852    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
1853    Return the rtx for where the value is.  */
1854
1855 rtx
1856 expand_shift (code, mode, shifted, amount, target, unsignedp)
1857      enum tree_code code;
1858      register enum machine_mode mode;
1859      rtx shifted;
1860      tree amount;
1861      register rtx target;
1862      int unsignedp;
1863 {
1864   register rtx op1, temp = 0;
1865   register int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
1866   register int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
1867   int try;
1868
1869   /* Previously detected shift-counts computed by NEGATE_EXPR
1870      and shifted in the other direction; but that does not work
1871      on all machines.  */
1872
1873   op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0);
1874
1875 #ifdef SHIFT_COUNT_TRUNCATED
1876   if (SHIFT_COUNT_TRUNCATED)
1877     {
1878       if (GET_CODE (op1) == CONST_INT
1879           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
1880               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
1881         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
1882                        % GET_MODE_BITSIZE (mode));
1883       else if (GET_CODE (op1) == SUBREG
1884                && SUBREG_WORD (op1) == 0)
1885         op1 = SUBREG_REG (op1);
1886     }
1887 #endif
1888
1889   if (op1 == const0_rtx)
1890     return shifted;
1891
1892   for (try = 0; temp == 0 && try < 3; try++)
1893     {
1894       enum optab_methods methods;
1895
1896       if (try == 0)
1897         methods = OPTAB_DIRECT;
1898       else if (try == 1)
1899         methods = OPTAB_WIDEN;
1900       else
1901         methods = OPTAB_LIB_WIDEN;
1902
1903       if (rotate)
1904         {
1905           /* Widening does not work for rotation.  */
1906           if (methods == OPTAB_WIDEN)
1907             continue;
1908           else if (methods == OPTAB_LIB_WIDEN)
1909             {
1910               /* If we have been unable to open-code this by a rotation,
1911                  do it as the IOR of two shifts.  I.e., to rotate A
1912                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
1913                  where C is the bitsize of A.
1914
1915                  It is theoretically possible that the target machine might
1916                  not be able to perform either shift and hence we would
1917                  be making two libcalls rather than just the one for the
1918                  shift (similarly if IOR could not be done).  We will allow
1919                  this extremely unlikely lossage to avoid complicating the
1920                  code below.  */
1921
1922               rtx subtarget = target == shifted ? 0 : target;
1923               rtx temp1;
1924               tree type = TREE_TYPE (amount);
1925               tree new_amount = make_tree (type, op1);
1926               tree other_amount
1927                 = fold (build (MINUS_EXPR, type,
1928                                convert (type,
1929                                         build_int_2 (GET_MODE_BITSIZE (mode),
1930                                                      0)),
1931                                amount));
1932
1933               shifted = force_reg (mode, shifted);
1934
1935               temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
1936                                    mode, shifted, new_amount, subtarget, 1);
1937               temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
1938                                     mode, shifted, other_amount, 0, 1);
1939               return expand_binop (mode, ior_optab, temp, temp1, target,
1940                                    unsignedp, methods);
1941             }
1942
1943           temp = expand_binop (mode,
1944                                left ? rotl_optab : rotr_optab,
1945                                shifted, op1, target, unsignedp, methods);
1946
1947           /* If we don't have the rotate, but we are rotating by a constant
1948              that is in range, try a rotate in the opposite direction.  */
1949
1950           if (temp == 0 && GET_CODE (op1) == CONST_INT
1951               && INTVAL (op1) > 0 && INTVAL (op1) < GET_MODE_BITSIZE (mode))
1952             temp = expand_binop (mode,
1953                                  left ? rotr_optab : rotl_optab,
1954                                  shifted,
1955                                  GEN_INT (GET_MODE_BITSIZE (mode)
1956                                           - INTVAL (op1)),
1957                                  target, unsignedp, methods);
1958         }
1959       else if (unsignedp)
1960         temp = expand_binop (mode,
1961                              left ? ashl_optab : lshr_optab,
1962                              shifted, op1, target, unsignedp, methods);
1963
1964       /* Do arithmetic shifts.
1965          Also, if we are going to widen the operand, we can just as well
1966          use an arithmetic right-shift instead of a logical one.  */
1967       if (temp == 0 && ! rotate
1968           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
1969         {
1970           enum optab_methods methods1 = methods;
1971
1972           /* If trying to widen a log shift to an arithmetic shift,
1973              don't accept an arithmetic shift of the same size.  */
1974           if (unsignedp)
1975             methods1 = OPTAB_MUST_WIDEN;
1976
1977           /* Arithmetic shift */
1978
1979           temp = expand_binop (mode,
1980                                left ? ashl_optab : ashr_optab,
1981                                shifted, op1, target, unsignedp, methods1);
1982         }
1983
1984       /* We used to try extzv here for logical right shifts, but that was
1985          only useful for one machine, the VAX, and caused poor code
1986          generation there for lshrdi3, so the code was deleted and a
1987          define_expand for lshrsi3 was added to vax.md.  */
1988     }
1989
1990   if (temp == 0)
1991     abort ();
1992   return temp;
1993 }
1994 \f
1995 enum alg_code { alg_zero, alg_m, alg_shift,
1996                   alg_add_t_m2, alg_sub_t_m2,
1997                   alg_add_factor, alg_sub_factor,
1998                   alg_add_t2_m, alg_sub_t2_m,
1999                   alg_add, alg_subtract, alg_factor, alg_shiftop };
2000
2001 /* This structure records a sequence of operations.
2002    `ops' is the number of operations recorded.
2003    `cost' is their total cost.
2004    The operations are stored in `op' and the corresponding
2005    logarithms of the integer coefficients in `log'.
2006
2007    These are the operations:
2008    alg_zero             total := 0;
2009    alg_m                total := multiplicand;
2010    alg_shift            total := total * coeff
2011    alg_add_t_m2         total := total + multiplicand * coeff;
2012    alg_sub_t_m2         total := total - multiplicand * coeff;
2013    alg_add_factor       total := total * coeff + total;
2014    alg_sub_factor       total := total * coeff - total;
2015    alg_add_t2_m         total := total * coeff + multiplicand;
2016    alg_sub_t2_m         total := total * coeff - multiplicand;
2017
2018    The first operand must be either alg_zero or alg_m.  */
2019
2020 struct algorithm
2021 {
2022   short cost;
2023   short ops;
2024   /* The size of the OP and LOG fields are not directly related to the
2025      word size, but the worst-case algorithms will be if we have few
2026      consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2027      In that case we will generate shift-by-2, add, shift-by-2, add,...,
2028      in total wordsize operations.  */
2029   enum alg_code op[MAX_BITS_PER_WORD];
2030   char log[MAX_BITS_PER_WORD];
2031 };
2032
2033 static void synth_mult                  PROTO((struct algorithm *,
2034                                                unsigned HOST_WIDE_INT,
2035                                                int));
2036 static unsigned HOST_WIDE_INT choose_multiplier PROTO((unsigned HOST_WIDE_INT,
2037                                                        int, int,
2038                                                        unsigned HOST_WIDE_INT *,
2039                                                        int *, int *));
2040 static unsigned HOST_WIDE_INT invert_mod2n      PROTO((unsigned HOST_WIDE_INT,
2041                                                        int));
2042 /* Compute and return the best algorithm for multiplying by T.
2043    The algorithm must cost less than cost_limit
2044    If retval.cost >= COST_LIMIT, no algorithm was found and all
2045    other field of the returned struct are undefined.  */
2046
2047 static void
2048 synth_mult (alg_out, t, cost_limit)
2049      struct algorithm *alg_out;
2050      unsigned HOST_WIDE_INT t;
2051      int cost_limit;
2052 {
2053   int m;
2054   struct algorithm *alg_in, *best_alg;
2055   int cost;
2056   unsigned HOST_WIDE_INT q;
2057
2058   /* Indicate that no algorithm is yet found.  If no algorithm
2059      is found, this value will be returned and indicate failure.  */
2060   alg_out->cost = cost_limit;
2061
2062   if (cost_limit <= 0)
2063     return;
2064
2065   /* t == 1 can be done in zero cost.  */
2066   if (t == 1)
2067     {
2068       alg_out->ops = 1;
2069       alg_out->cost = 0;
2070       alg_out->op[0] = alg_m;
2071       return;
2072     }
2073
2074   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2075      fail now.  */
2076   if (t == 0)
2077     {
2078       if (zero_cost >= cost_limit)
2079         return;
2080       else
2081         {
2082           alg_out->ops = 1;
2083           alg_out->cost = zero_cost;
2084           alg_out->op[0] = alg_zero;
2085           return;
2086         }
2087     }
2088
2089   /* We'll be needing a couple extra algorithm structures now.  */
2090
2091   alg_in = (struct algorithm *)alloca (sizeof (struct algorithm));
2092   best_alg = (struct algorithm *)alloca (sizeof (struct algorithm));
2093
2094   /* If we have a group of zero bits at the low-order part of T, try
2095      multiplying by the remaining bits and then doing a shift.  */
2096
2097   if ((t & 1) == 0)
2098     {
2099       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2100       q = t >> m;
2101       cost = shift_cost[m];
2102       synth_mult (alg_in, q, cost_limit - cost);
2103
2104       cost += alg_in->cost;
2105       if (cost < cost_limit)
2106         {
2107           struct algorithm *x;
2108           x = alg_in, alg_in = best_alg, best_alg = x;
2109           best_alg->log[best_alg->ops] = m;
2110           best_alg->op[best_alg->ops] = alg_shift;
2111           cost_limit = cost;
2112         }
2113     }
2114
2115   /* If we have an odd number, add or subtract one.  */
2116   if ((t & 1) != 0)
2117     {
2118       unsigned HOST_WIDE_INT w;
2119
2120       for (w = 1; (w & t) != 0; w <<= 1)
2121         ;
2122       /* If T was -1, then W will be zero after the loop.  This is another
2123          case where T ends with ...111.  Handling this with (T + 1) and
2124          subtract 1 produces slightly better code and results in algorithm
2125          selection much faster than treating it like the ...0111 case
2126          below.  */
2127       if (w == 0
2128           || (w > 2
2129               /* Reject the case where t is 3.
2130                  Thus we prefer addition in that case.  */
2131               && t != 3))
2132         {
2133           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2134
2135           cost = add_cost;
2136           synth_mult (alg_in, t + 1, cost_limit - cost);
2137
2138           cost += alg_in->cost;
2139           if (cost < cost_limit)
2140             {
2141               struct algorithm *x;
2142               x = alg_in, alg_in = best_alg, best_alg = x;
2143               best_alg->log[best_alg->ops] = 0;
2144               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2145               cost_limit = cost;
2146             }
2147         }
2148       else
2149         {
2150           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2151
2152           cost = add_cost;
2153           synth_mult (alg_in, t - 1, cost_limit - cost);
2154
2155           cost += alg_in->cost;
2156           if (cost < cost_limit)
2157             {
2158               struct algorithm *x;
2159               x = alg_in, alg_in = best_alg, best_alg = x;
2160               best_alg->log[best_alg->ops] = 0;
2161               best_alg->op[best_alg->ops] = alg_add_t_m2;
2162               cost_limit = cost;
2163             }
2164         }
2165     }
2166
2167   /* Look for factors of t of the form
2168      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2169      If we find such a factor, we can multiply by t using an algorithm that
2170      multiplies by q, shift the result by m and add/subtract it to itself.
2171
2172      We search for large factors first and loop down, even if large factors
2173      are less probable than small; if we find a large factor we will find a
2174      good sequence quickly, and therefore be able to prune (by decreasing
2175      COST_LIMIT) the search.  */
2176
2177   for (m = floor_log2 (t - 1); m >= 2; m--)
2178     {
2179       unsigned HOST_WIDE_INT d;
2180
2181       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2182       if (t % d == 0 && t > d)
2183         {
2184           cost = MIN (shiftadd_cost[m], add_cost + shift_cost[m]);
2185           synth_mult (alg_in, t / d, cost_limit - cost);
2186
2187           cost += alg_in->cost;
2188           if (cost < cost_limit)
2189             {
2190               struct algorithm *x;
2191               x = alg_in, alg_in = best_alg, best_alg = x;
2192               best_alg->log[best_alg->ops] = m;
2193               best_alg->op[best_alg->ops] = alg_add_factor;
2194               cost_limit = cost;
2195             }
2196           /* Other factors will have been taken care of in the recursion.  */
2197           break;
2198         }
2199
2200       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2201       if (t % d == 0 && t > d)
2202         {
2203           cost = MIN (shiftsub_cost[m], add_cost + shift_cost[m]);
2204           synth_mult (alg_in, t / d, cost_limit - cost);
2205
2206           cost += alg_in->cost;
2207           if (cost < cost_limit)
2208             {
2209               struct algorithm *x;
2210               x = alg_in, alg_in = best_alg, best_alg = x;
2211               best_alg->log[best_alg->ops] = m;
2212               best_alg->op[best_alg->ops] = alg_sub_factor;
2213               cost_limit = cost;
2214             }
2215           break;
2216         }
2217     }
2218
2219   /* Try shift-and-add (load effective address) instructions,
2220      i.e. do a*3, a*5, a*9.  */
2221   if ((t & 1) != 0)
2222     {
2223       q = t - 1;
2224       q = q & -q;
2225       m = exact_log2 (q);
2226       if (m >= 0)
2227         {
2228           cost = shiftadd_cost[m];
2229           synth_mult (alg_in, (t - 1) >> m, cost_limit - cost);
2230
2231           cost += alg_in->cost;
2232           if (cost < cost_limit)
2233             {
2234               struct algorithm *x;
2235               x = alg_in, alg_in = best_alg, best_alg = x;
2236               best_alg->log[best_alg->ops] = m;
2237               best_alg->op[best_alg->ops] = alg_add_t2_m;
2238               cost_limit = cost;
2239             }
2240         }
2241
2242       q = t + 1;
2243       q = q & -q;
2244       m = exact_log2 (q);
2245       if (m >= 0)
2246         {
2247           cost = shiftsub_cost[m];
2248           synth_mult (alg_in, (t + 1) >> m, cost_limit - cost);
2249
2250           cost += alg_in->cost;
2251           if (cost < cost_limit)
2252             {
2253               struct algorithm *x;
2254               x = alg_in, alg_in = best_alg, best_alg = x;
2255               best_alg->log[best_alg->ops] = m;
2256               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2257               cost_limit = cost;
2258             }
2259         }
2260     }
2261
2262   /* If cost_limit has not decreased since we stored it in alg_out->cost,
2263      we have not found any algorithm.  */
2264   if (cost_limit == alg_out->cost)
2265     return;
2266
2267   /* If we are getting a too long sequence for `struct algorithm'
2268      to record, make this search fail.  */
2269   if (best_alg->ops == MAX_BITS_PER_WORD)
2270     return;
2271
2272   /* Copy the algorithm from temporary space to the space at alg_out.
2273      We avoid using structure assignment because the majority of
2274      best_alg is normally undefined, and this is a critical function.  */
2275   alg_out->ops = best_alg->ops + 1;
2276   alg_out->cost = cost_limit;
2277   bcopy ((char *) best_alg->op, (char *) alg_out->op,
2278          alg_out->ops * sizeof *alg_out->op);
2279   bcopy ((char *) best_alg->log, (char *) alg_out->log,
2280          alg_out->ops * sizeof *alg_out->log);
2281 }
2282 \f
2283 /* Perform a multiplication and return an rtx for the result.
2284    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
2285    TARGET is a suggestion for where to store the result (an rtx).
2286
2287    We check specially for a constant integer as OP1.
2288    If you want this check for OP0 as well, then before calling
2289    you should swap the two operands if OP0 would be constant.  */
2290
2291 rtx
2292 expand_mult (mode, op0, op1, target, unsignedp)
2293      enum machine_mode mode;
2294      register rtx op0, op1, target;
2295      int unsignedp;
2296 {
2297   rtx const_op1 = op1;
2298
2299   /* synth_mult does an `unsigned int' multiply.  As long as the mode is
2300      less than or equal in size to `unsigned int' this doesn't matter.
2301      If the mode is larger than `unsigned int', then synth_mult works only
2302      if the constant value exactly fits in an `unsigned int' without any
2303      truncation.  This means that multiplying by negative values does
2304      not work; results are off by 2^32 on a 32 bit machine.  */
2305
2306   /* If we are multiplying in DImode, it may still be a win
2307      to try to work with shifts and adds.  */
2308   if (GET_CODE (op1) == CONST_DOUBLE
2309       && GET_MODE_CLASS (GET_MODE (op1)) == MODE_INT
2310       && HOST_BITS_PER_INT >= BITS_PER_WORD
2311       && CONST_DOUBLE_HIGH (op1) == 0)
2312     const_op1 = GEN_INT (CONST_DOUBLE_LOW (op1));
2313   else if (HOST_BITS_PER_INT < GET_MODE_BITSIZE (mode)
2314            && GET_CODE (op1) == CONST_INT
2315            && INTVAL (op1) < 0)
2316     const_op1 = 0;
2317
2318   /* We used to test optimize here, on the grounds that it's better to
2319      produce a smaller program when -O is not used.
2320      But this causes such a terrible slowdown sometimes
2321      that it seems better to use synth_mult always.  */
2322
2323   if (const_op1 && GET_CODE (const_op1) == CONST_INT)
2324     {
2325       struct algorithm alg;
2326       struct algorithm alg2;
2327       HOST_WIDE_INT val = INTVAL (op1);
2328       HOST_WIDE_INT val_so_far;
2329       rtx insn;
2330       int mult_cost;
2331       enum {basic_variant, negate_variant, add_variant} variant = basic_variant;
2332
2333       /* Try to do the computation three ways: multiply by the negative of OP1
2334          and then negate, do the multiplication directly, or do multiplication
2335          by OP1 - 1.  */
2336
2337       mult_cost = rtx_cost (gen_rtx_MULT (mode, op0, op1), SET);
2338       mult_cost = MIN (12 * add_cost, mult_cost);
2339
2340       synth_mult (&alg, val, mult_cost);
2341
2342       /* This works only if the inverted value actually fits in an
2343          `unsigned int' */
2344       if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2345         {
2346           synth_mult (&alg2, - val,
2347                       (alg.cost < mult_cost ? alg.cost : mult_cost) - negate_cost);
2348           if (alg2.cost + negate_cost < alg.cost)
2349             alg = alg2, variant = negate_variant;
2350         }
2351
2352       /* This proves very useful for division-by-constant.  */
2353       synth_mult (&alg2, val - 1,
2354                   (alg.cost < mult_cost ? alg.cost : mult_cost) - add_cost);
2355       if (alg2.cost + add_cost < alg.cost)
2356         alg = alg2, variant = add_variant;
2357
2358       if (alg.cost < mult_cost)
2359         {
2360           /* We found something cheaper than a multiply insn.  */
2361           int opno;
2362           rtx accum, tem;
2363
2364           op0 = protect_from_queue (op0, 0);
2365
2366           /* Avoid referencing memory over and over.
2367              For speed, but also for correctness when mem is volatile.  */
2368           if (GET_CODE (op0) == MEM)
2369             op0 = force_reg (mode, op0);
2370
2371           /* ACCUM starts out either as OP0 or as a zero, depending on
2372              the first operation.  */
2373
2374           if (alg.op[0] == alg_zero)
2375             {
2376               accum = copy_to_mode_reg (mode, const0_rtx);
2377               val_so_far = 0;
2378             }
2379           else if (alg.op[0] == alg_m)
2380             {
2381               accum = copy_to_mode_reg (mode, op0);
2382               val_so_far = 1;
2383             }
2384           else
2385             abort ();
2386
2387           for (opno = 1; opno < alg.ops; opno++)
2388             {
2389               int log = alg.log[opno];
2390               int preserve = preserve_subexpressions_p ();
2391               rtx shift_subtarget = preserve ? 0 : accum;
2392               rtx add_target
2393                 = (opno == alg.ops - 1 && target != 0 && variant != add_variant
2394                    && ! preserve)
2395                   ? target : 0;
2396               rtx accum_target = preserve ? 0 : accum;
2397
2398               switch (alg.op[opno])
2399                 {
2400                 case alg_shift:
2401                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2402                                         build_int_2 (log, 0), NULL_RTX, 0);
2403                   val_so_far <<= log;
2404                   break;
2405
2406                 case alg_add_t_m2:
2407                   tem = expand_shift (LSHIFT_EXPR, mode, op0,
2408                                       build_int_2 (log, 0), NULL_RTX, 0);
2409                   accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2410                                          add_target ? add_target : accum_target);
2411                   val_so_far += (HOST_WIDE_INT) 1 << log;
2412                   break;
2413
2414                 case alg_sub_t_m2:
2415                   tem = expand_shift (LSHIFT_EXPR, mode, op0,
2416                                       build_int_2 (log, 0), NULL_RTX, 0);
2417                   accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2418                                          add_target ? add_target : accum_target);
2419                   val_so_far -= (HOST_WIDE_INT) 1 << log;
2420                   break;
2421
2422                 case alg_add_t2_m:
2423                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2424                                         build_int_2 (log, 0), shift_subtarget,
2425                                         0);
2426                   accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2427                                          add_target ? add_target : accum_target);
2428                   val_so_far = (val_so_far << log) + 1;
2429                   break;
2430
2431                 case alg_sub_t2_m:
2432                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2433                                         build_int_2 (log, 0), shift_subtarget,
2434                                         0);
2435                   accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2436                                          add_target ? add_target : accum_target);
2437                   val_so_far = (val_so_far << log) - 1;
2438                   break;
2439
2440                 case alg_add_factor:
2441                   tem = expand_shift (LSHIFT_EXPR, mode, accum,
2442                                       build_int_2 (log, 0), NULL_RTX, 0);
2443                   accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2444                                          add_target ? add_target : accum_target);
2445                   val_so_far += val_so_far << log;
2446                   break;
2447
2448                 case alg_sub_factor:
2449                   tem = expand_shift (LSHIFT_EXPR, mode, accum,
2450                                       build_int_2 (log, 0), NULL_RTX, 0);
2451                   accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2452                                          (add_target ? add_target
2453                                           : preserve ? 0 : tem));
2454                   val_so_far = (val_so_far << log) - val_so_far;
2455                   break;
2456
2457                 default:
2458                   abort ();;
2459                 }
2460
2461               /* Write a REG_EQUAL note on the last insn so that we can cse
2462                  multiplication sequences.  */
2463
2464               insn = get_last_insn ();
2465               REG_NOTES (insn)
2466                 = gen_rtx_EXPR_LIST (REG_EQUAL,
2467                                      gen_rtx_MULT (mode, op0, GEN_INT (val_so_far)),
2468                                      REG_NOTES (insn));
2469             }
2470
2471           if (variant == negate_variant)
2472             {
2473               val_so_far = - val_so_far;
2474               accum = expand_unop (mode, neg_optab, accum, target, 0);
2475             }
2476           else if (variant == add_variant)
2477             {
2478               val_so_far = val_so_far + 1;
2479               accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
2480             }
2481
2482           if (val != val_so_far)
2483             abort ();
2484
2485           return accum;
2486         }
2487     }
2488
2489   /* This used to use umul_optab if unsigned, but for non-widening multiply
2490      there is no difference between signed and unsigned.  */
2491   op0 = expand_binop (mode, smul_optab,
2492                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
2493   if (op0 == 0)
2494     abort ();
2495   return op0;
2496 }
2497 \f
2498 /* Return the smallest n such that 2**n >= X.  */
2499
2500 int
2501 ceil_log2 (x)
2502      unsigned HOST_WIDE_INT x;
2503 {
2504   return floor_log2 (x - 1) + 1;
2505 }
2506
2507 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
2508    replace division by D, and put the least significant N bits of the result
2509    in *MULTIPLIER_PTR and return the most significant bit.
2510
2511    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
2512    needed precision is in PRECISION (should be <= N).
2513
2514    PRECISION should be as small as possible so this function can choose
2515    multiplier more freely.
2516
2517    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
2518    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
2519
2520    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
2521    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
2522
2523 static
2524 unsigned HOST_WIDE_INT
2525 choose_multiplier (d, n, precision, multiplier_ptr, post_shift_ptr, lgup_ptr)
2526      unsigned HOST_WIDE_INT d;
2527      int n;
2528      int precision;
2529      unsigned HOST_WIDE_INT *multiplier_ptr;
2530      int *post_shift_ptr;
2531      int *lgup_ptr;
2532 {
2533   unsigned HOST_WIDE_INT mhigh_hi, mhigh_lo;
2534   unsigned HOST_WIDE_INT mlow_hi, mlow_lo;
2535   int lgup, post_shift;
2536   int pow, pow2;
2537   unsigned HOST_WIDE_INT nh, nl, dummy1, dummy2;
2538
2539   /* lgup = ceil(log2(divisor)); */
2540   lgup = ceil_log2 (d);
2541
2542   if (lgup > n)
2543     abort ();
2544
2545   pow = n + lgup;
2546   pow2 = n + lgup - precision;
2547
2548   if (pow == 2 * HOST_BITS_PER_WIDE_INT)
2549     {
2550       /* We could handle this with some effort, but this case is much better
2551          handled directly with a scc insn, so rely on caller using that.  */
2552       abort ();
2553     }
2554
2555   /* mlow = 2^(N + lgup)/d */
2556  if (pow >= HOST_BITS_PER_WIDE_INT)
2557     {
2558       nh = (unsigned HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
2559       nl = 0;
2560     }
2561   else
2562     {
2563       nh = 0;
2564       nl = (unsigned HOST_WIDE_INT) 1 << pow;
2565     }
2566   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2567                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
2568
2569   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
2570   if (pow2 >= HOST_BITS_PER_WIDE_INT)
2571     nh |= (unsigned HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
2572   else
2573     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
2574   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2575                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
2576
2577   if (mhigh_hi && nh - d >= d)
2578     abort ();
2579   if (mhigh_hi > 1 || mlow_hi > 1)
2580     abort ();
2581   /* assert that mlow < mhigh.  */
2582   if (! (mlow_hi < mhigh_hi || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo)))
2583     abort();
2584
2585   /* If precision == N, then mlow, mhigh exceed 2^N
2586      (but they do not exceed 2^(N+1)).  */
2587
2588   /* Reduce to lowest terms */
2589   for (post_shift = lgup; post_shift > 0; post_shift--)
2590     {
2591       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
2592       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
2593       if (ml_lo >= mh_lo)
2594         break;
2595
2596       mlow_hi = 0;
2597       mlow_lo = ml_lo;
2598       mhigh_hi = 0;
2599       mhigh_lo = mh_lo;
2600     }
2601
2602   *post_shift_ptr = post_shift;
2603   *lgup_ptr = lgup;
2604   if (n < HOST_BITS_PER_WIDE_INT)
2605     {
2606       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
2607       *multiplier_ptr = mhigh_lo & mask;
2608       return mhigh_lo >= mask;
2609     }
2610   else
2611     {
2612       *multiplier_ptr = mhigh_lo;
2613       return mhigh_hi;
2614     }
2615 }
2616
2617 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
2618    congruent to 1 (mod 2**N).  */
2619
2620 static unsigned HOST_WIDE_INT
2621 invert_mod2n (x, n)
2622      unsigned HOST_WIDE_INT x;
2623      int n;
2624 {
2625   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
2626
2627   /* The algorithm notes that the choice y = x satisfies
2628      x*y == 1 mod 2^3, since x is assumed odd.
2629      Each iteration doubles the number of bits of significance in y.  */
2630
2631   unsigned HOST_WIDE_INT mask;
2632   unsigned HOST_WIDE_INT y = x;
2633   int nbit = 3;
2634
2635   mask = (n == HOST_BITS_PER_WIDE_INT
2636           ? ~(unsigned HOST_WIDE_INT) 0
2637           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
2638
2639   while (nbit < n)
2640     {
2641       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
2642       nbit *= 2;
2643     }
2644   return y;
2645 }
2646
2647 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
2648    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
2649    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
2650    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
2651    become signed.
2652
2653    The result is put in TARGET if that is convenient.
2654
2655    MODE is the mode of operation.  */
2656
2657 rtx
2658 expand_mult_highpart_adjust (mode, adj_operand, op0, op1, target, unsignedp)
2659      enum machine_mode mode;
2660      register rtx adj_operand, op0, op1, target;
2661      int unsignedp;
2662 {
2663   rtx tem;
2664   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
2665
2666   tem = expand_shift (RSHIFT_EXPR, mode, op0,
2667                       build_int_2 (GET_MODE_BITSIZE (mode) - 1, 0),
2668                       NULL_RTX, 0);
2669   tem = expand_and (tem, op1, NULL_RTX);
2670   adj_operand
2671     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
2672                      adj_operand);
2673
2674   tem = expand_shift (RSHIFT_EXPR, mode, op1,
2675                       build_int_2 (GET_MODE_BITSIZE (mode) - 1, 0),
2676                       NULL_RTX, 0);
2677   tem = expand_and (tem, op0, NULL_RTX);
2678   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
2679                           target);
2680
2681   return target;
2682 }
2683
2684 /* Emit code to multiply OP0 and CNST1, putting the high half of the result
2685    in TARGET if that is convenient, and return where the result is.  If the
2686    operation can not be performed, 0 is returned.
2687
2688    MODE is the mode of operation and result.
2689
2690    UNSIGNEDP nonzero means unsigned multiply.
2691
2692    MAX_COST is the total allowed cost for the expanded RTL.  */
2693
2694 rtx
2695 expand_mult_highpart (mode, op0, cnst1, target, unsignedp, max_cost)
2696      enum machine_mode mode;
2697      register rtx op0, target;
2698      unsigned HOST_WIDE_INT cnst1;
2699      int unsignedp;
2700      int max_cost;
2701 {
2702   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
2703   optab mul_highpart_optab;
2704   optab moptab;
2705   rtx tem;
2706   int size = GET_MODE_BITSIZE (mode);
2707   rtx op1, wide_op1;
2708
2709   /* We can't support modes wider than HOST_BITS_PER_INT.  */
2710   if (size > HOST_BITS_PER_WIDE_INT)
2711     abort ();
2712
2713   op1 = GEN_INT (cnst1);
2714
2715   if (GET_MODE_BITSIZE (wider_mode) <= HOST_BITS_PER_INT)
2716     wide_op1 = op1;
2717   else
2718     wide_op1
2719       = immed_double_const (cnst1,
2720                             (unsignedp
2721                              ? (HOST_WIDE_INT) 0
2722                              : -(cnst1 >> (HOST_BITS_PER_WIDE_INT - 1))),
2723                             wider_mode);
2724
2725   /* expand_mult handles constant multiplication of word_mode
2726      or narrower.  It does a poor job for large modes.  */
2727   if (size < BITS_PER_WORD
2728       && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
2729     {
2730       /* We have to do this, since expand_binop doesn't do conversion for
2731          multiply.  Maybe change expand_binop to handle widening multiply?  */
2732       op0 = convert_to_mode (wider_mode, op0, unsignedp);
2733
2734       tem = expand_mult (wider_mode, op0, wide_op1, NULL_RTX, unsignedp);
2735       tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2736                           build_int_2 (size, 0), NULL_RTX, 1);
2737       return convert_modes (mode, wider_mode, tem, unsignedp);
2738     }
2739
2740   if (target == 0)
2741     target = gen_reg_rtx (mode);
2742
2743   /* Firstly, try using a multiplication insn that only generates the needed
2744      high part of the product, and in the sign flavor of unsignedp.  */
2745   if (mul_highpart_cost[(int) mode] < max_cost)
2746     {
2747       mul_highpart_optab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
2748       target = expand_binop (mode, mul_highpart_optab,
2749                              op0, wide_op1, target, unsignedp, OPTAB_DIRECT);
2750       if (target)
2751         return target;
2752     }
2753
2754   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
2755      Need to adjust the result after the multiplication.  */
2756   if (mul_highpart_cost[(int) mode] + 2 * shift_cost[size-1] + 4 * add_cost < max_cost)
2757     {
2758       mul_highpart_optab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
2759       target = expand_binop (mode, mul_highpart_optab,
2760                              op0, wide_op1, target, unsignedp, OPTAB_DIRECT);
2761       if (target)
2762         /* We used the wrong signedness.  Adjust the result.  */
2763         return expand_mult_highpart_adjust (mode, target, op0,
2764                                             op1, target, unsignedp);
2765     }
2766
2767   /* Try widening multiplication.  */
2768   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
2769   if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2770       && mul_widen_cost[(int) wider_mode] < max_cost)
2771     {
2772       op1 = force_reg (mode, op1);
2773       goto try;
2774     }
2775
2776   /* Try widening the mode and perform a non-widening multiplication.  */
2777   moptab = smul_optab;
2778   if (smul_optab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2779       && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
2780     {
2781       op1 = wide_op1;
2782       goto try;
2783     }
2784
2785   /* Try widening multiplication of opposite signedness, and adjust.  */
2786   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
2787   if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2788       && (mul_widen_cost[(int) wider_mode]
2789           + 2 * shift_cost[size-1] + 4 * add_cost < max_cost))
2790     {
2791       rtx regop1 = force_reg (mode, op1);
2792       tem = expand_binop (wider_mode, moptab, op0, regop1,
2793                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
2794       if (tem != 0)
2795         {
2796           /* Extract the high half of the just generated product.  */
2797           tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2798                               build_int_2 (size, 0), NULL_RTX, 1);
2799           tem = convert_modes (mode, wider_mode, tem, unsignedp);
2800           /* We used the wrong signedness.  Adjust the result.  */
2801           return expand_mult_highpart_adjust (mode, tem, op0, op1,
2802                                               target, unsignedp);
2803         }
2804     }
2805
2806   return 0;
2807
2808  try:
2809   /* Pass NULL_RTX as target since TARGET has wrong mode.  */
2810   tem = expand_binop (wider_mode, moptab, op0, op1,
2811                       NULL_RTX, unsignedp, OPTAB_WIDEN);
2812   if (tem == 0)
2813     return 0;
2814
2815   /* Extract the high half of the just generated product.  */
2816   if (mode == word_mode)
2817     {
2818       return gen_highpart (mode, tem);
2819     }
2820   else
2821     {
2822       tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2823                           build_int_2 (size, 0), NULL_RTX, 1);
2824       return convert_modes (mode, wider_mode, tem, unsignedp);
2825     }
2826 }
2827 \f
2828 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
2829    if that is convenient, and returning where the result is.
2830    You may request either the quotient or the remainder as the result;
2831    specify REM_FLAG nonzero to get the remainder.
2832
2833    CODE is the expression code for which kind of division this is;
2834    it controls how rounding is done.  MODE is the machine mode to use.
2835    UNSIGNEDP nonzero means do unsigned division.  */
2836
2837 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
2838    and then correct it by or'ing in missing high bits
2839    if result of ANDI is nonzero.
2840    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
2841    This could optimize to a bfexts instruction.
2842    But C doesn't use these operations, so their optimizations are
2843    left for later.  */
2844
2845 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
2846
2847 rtx
2848 expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
2849      int rem_flag;
2850      enum tree_code code;
2851      enum machine_mode mode;
2852      register rtx op0, op1, target;
2853      int unsignedp;
2854 {
2855   enum machine_mode compute_mode;
2856   register rtx tquotient;
2857   rtx quotient = 0, remainder = 0;
2858   rtx last;
2859   int size;
2860   rtx insn, set;
2861   optab optab1, optab2;
2862   int op1_is_constant, op1_is_pow2;
2863   int max_cost, extra_cost;
2864   static HOST_WIDE_INT last_div_const = 0;
2865
2866   op1_is_constant = GET_CODE (op1) == CONST_INT;
2867   op1_is_pow2 = (op1_is_constant
2868                  && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
2869                       || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1))))));
2870
2871   /*
2872      This is the structure of expand_divmod:
2873
2874      First comes code to fix up the operands so we can perform the operations
2875      correctly and efficiently.
2876
2877      Second comes a switch statement with code specific for each rounding mode.
2878      For some special operands this code emits all RTL for the desired
2879      operation, for other cases, it generates only a quotient and stores it in
2880      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
2881      to indicate that it has not done anything.
2882
2883      Last comes code that finishes the operation.  If QUOTIENT is set and
2884      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
2885      QUOTIENT is not set, it is computed using trunc rounding.
2886
2887      We try to generate special code for division and remainder when OP1 is a
2888      constant.  If |OP1| = 2**n we can use shifts and some other fast
2889      operations.  For other values of OP1, we compute a carefully selected
2890      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
2891      by m.
2892
2893      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
2894      half of the product.  Different strategies for generating the product are
2895      implemented in expand_mult_highpart.
2896
2897      If what we actually want is the remainder, we generate that by another
2898      by-constant multiplication and a subtraction.  */
2899
2900   /* We shouldn't be called with OP1 == const1_rtx, but some of the
2901      code below will malfunction if we are, so check here and handle
2902      the special case if so.  */
2903   if (op1 == const1_rtx)
2904     return rem_flag ? const0_rtx : op0;
2905
2906   if (target
2907       /* Don't use the function value register as a target
2908          since we have to read it as well as write it,
2909          and function-inlining gets confused by this.  */
2910       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
2911           /* Don't clobber an operand while doing a multi-step calculation.  */
2912           || ((rem_flag || op1_is_constant)
2913               && (reg_mentioned_p (target, op0)
2914                   || (GET_CODE (op0) == MEM && GET_CODE (target) == MEM)))
2915           || reg_mentioned_p (target, op1)
2916           || (GET_CODE (op1) == MEM && GET_CODE (target) == MEM)))
2917     target = 0;
2918
2919   /* Get the mode in which to perform this computation.  Normally it will
2920      be MODE, but sometimes we can't do the desired operation in MODE.
2921      If so, pick a wider mode in which we can do the operation.  Convert
2922      to that mode at the start to avoid repeated conversions.
2923
2924      First see what operations we need.  These depend on the expression
2925      we are evaluating.  (We assume that divxx3 insns exist under the
2926      same conditions that modxx3 insns and that these insns don't normally
2927      fail.  If these assumptions are not correct, we may generate less
2928      efficient code in some cases.)
2929
2930      Then see if we find a mode in which we can open-code that operation
2931      (either a division, modulus, or shift).  Finally, check for the smallest
2932      mode for which we can do the operation with a library call.  */
2933
2934   /* We might want to refine this now that we have division-by-constant
2935      optimization.  Since expand_mult_highpart tries so many variants, it is
2936      not straightforward to generalize this.  Maybe we should make an array
2937      of possible modes in init_expmed?  Save this for GCC 2.7.  */
2938
2939   optab1 = (op1_is_pow2 ? (unsignedp ? lshr_optab : ashr_optab)
2940             : (unsignedp ? udiv_optab : sdiv_optab));
2941   optab2 = (op1_is_pow2 ? optab1 : (unsignedp ? udivmod_optab : sdivmod_optab));
2942
2943   for (compute_mode = mode; compute_mode != VOIDmode;
2944        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
2945     if (optab1->handlers[(int) compute_mode].insn_code != CODE_FOR_nothing
2946         || optab2->handlers[(int) compute_mode].insn_code != CODE_FOR_nothing)
2947       break;
2948
2949   if (compute_mode == VOIDmode)
2950     for (compute_mode = mode; compute_mode != VOIDmode;
2951          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
2952       if (optab1->handlers[(int) compute_mode].libfunc
2953           || optab2->handlers[(int) compute_mode].libfunc)
2954         break;
2955
2956   /* If we still couldn't find a mode, use MODE, but we'll probably abort
2957      in expand_binop.  */
2958   if (compute_mode == VOIDmode)
2959     compute_mode = mode;
2960
2961   if (target && GET_MODE (target) == compute_mode)
2962     tquotient = target;
2963   else
2964     tquotient = gen_reg_rtx (compute_mode);
2965
2966   size = GET_MODE_BITSIZE (compute_mode);
2967 #if 0
2968   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
2969      (mode), and thereby get better code when OP1 is a constant.  Do that
2970      later.  It will require going over all usages of SIZE below.  */
2971   size = GET_MODE_BITSIZE (mode);
2972 #endif
2973
2974   /* Only deduct something for a REM if the last divide done was
2975      for a different constant.   Then set the constant of the last
2976      divide.  */
2977   max_cost = div_cost[(int) compute_mode]
2978     - (rem_flag && ! (last_div_const != 0 && op1_is_constant
2979                       && INTVAL (op1) == last_div_const)
2980        ? mul_cost[(int) compute_mode] + add_cost : 0);
2981
2982   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
2983
2984   /* Now convert to the best mode to use.  */
2985   if (compute_mode != mode)
2986     {
2987       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
2988       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
2989
2990       /* convert_modes may have placed op1 into a register, so we
2991          must recompute the following.  */
2992       op1_is_constant = GET_CODE (op1) == CONST_INT;
2993       op1_is_pow2 = (op1_is_constant
2994                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
2995                           || (! unsignedp
2996                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
2997     }
2998
2999   /* If one of the operands is a volatile MEM, copy it into a register.  */
3000
3001   if (GET_CODE (op0) == MEM && MEM_VOLATILE_P (op0))
3002     op0 = force_reg (compute_mode, op0);
3003   if (GET_CODE (op1) == MEM && MEM_VOLATILE_P (op1))
3004     op1 = force_reg (compute_mode, op1);
3005
3006   /* If we need the remainder or if OP1 is constant, we need to
3007      put OP0 in a register in case it has any queued subexpressions.  */
3008   if (rem_flag || op1_is_constant)
3009     op0 = force_reg (compute_mode, op0);
3010
3011   last = get_last_insn ();
3012
3013   /* Promote floor rounding to trunc rounding for unsigned operations.  */
3014   if (unsignedp)
3015     {
3016       if (code == FLOOR_DIV_EXPR)
3017         code = TRUNC_DIV_EXPR;
3018       if (code == FLOOR_MOD_EXPR)
3019         code = TRUNC_MOD_EXPR;
3020       if (code == EXACT_DIV_EXPR && op1_is_pow2)
3021         code = TRUNC_DIV_EXPR;
3022     }
3023
3024   if (op1 != const0_rtx)
3025     switch (code)
3026       {
3027       case TRUNC_MOD_EXPR:
3028       case TRUNC_DIV_EXPR:
3029         if (op1_is_constant)
3030           {
3031             if (unsignedp)
3032               {
3033                 unsigned HOST_WIDE_INT mh, ml;
3034                 int pre_shift, post_shift;
3035                 int dummy;
3036                 unsigned HOST_WIDE_INT d = INTVAL (op1);
3037
3038                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3039                   {
3040                     pre_shift = floor_log2 (d);
3041                     if (rem_flag)
3042                       {
3043                         remainder
3044                           = expand_binop (compute_mode, and_optab, op0,
3045                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3046                                           remainder, 1,
3047                                           OPTAB_LIB_WIDEN);
3048                         if (remainder)
3049                           return gen_lowpart (mode, remainder);
3050                       }
3051                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3052                                              build_int_2 (pre_shift, 0),
3053                                              tquotient, 1);
3054                   }
3055                 else if (size <= HOST_BITS_PER_WIDE_INT)
3056                   {
3057                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
3058                       {
3059                         /* Most significant bit of divisor is set; emit an scc
3060                            insn.  */
3061                         quotient = emit_store_flag (tquotient, GEU, op0, op1,
3062                                                     compute_mode, 1, 1);
3063                         if (quotient == 0)
3064                           goto fail1;
3065                       }
3066                     else
3067                       {
3068                         /* Find a suitable multiplier and right shift count
3069                            instead of multiplying with D.  */
3070
3071                         mh = choose_multiplier (d, size, size,
3072                                                 &ml, &post_shift, &dummy);
3073
3074                         /* If the suggested multiplier is more than SIZE bits,
3075                            we can do better for even divisors, using an
3076                            initial right shift.  */
3077                         if (mh != 0 && (d & 1) == 0)
3078                           {
3079                             pre_shift = floor_log2 (d & -d);
3080                             mh = choose_multiplier (d >> pre_shift, size,
3081                                                     size - pre_shift,
3082                                                     &ml, &post_shift, &dummy);
3083                             if (mh)
3084                               abort ();
3085                           }
3086                         else
3087                           pre_shift = 0;
3088
3089                         if (mh != 0)
3090                           {
3091                             rtx t1, t2, t3, t4;
3092
3093                             extra_cost = (shift_cost[post_shift - 1]
3094                                           + shift_cost[1] + 2 * add_cost);
3095                             t1 = expand_mult_highpart (compute_mode, op0, ml,
3096                                                        NULL_RTX, 1,
3097                                                        max_cost - extra_cost);
3098                             if (t1 == 0)
3099                               goto fail1;
3100                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
3101                                                                op0, t1),
3102                                                 NULL_RTX);
3103                             t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3104                                                build_int_2 (1, 0), NULL_RTX,1);
3105                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
3106                                                               t1, t3),
3107                                                 NULL_RTX);
3108                             quotient
3109                               = expand_shift (RSHIFT_EXPR, compute_mode, t4,
3110                                               build_int_2 (post_shift - 1, 0),
3111                                               tquotient, 1);
3112                           }
3113                         else
3114                           {
3115                             rtx t1, t2;
3116
3117                             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3118                                                build_int_2 (pre_shift, 0),
3119                                                NULL_RTX, 1);
3120                             extra_cost = (shift_cost[pre_shift]
3121                                           + shift_cost[post_shift]);
3122                             t2 = expand_mult_highpart (compute_mode, t1, ml,
3123                                                        NULL_RTX, 1,
3124                                                        max_cost - extra_cost);
3125                             if (t2 == 0)
3126                               goto fail1;
3127                             quotient
3128                               = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3129                                               build_int_2 (post_shift, 0),
3130                                               tquotient, 1);
3131                           }
3132                       }
3133                   }
3134                 else            /* Too wide mode to use tricky code */
3135                   break;
3136
3137                 insn = get_last_insn ();
3138                 if (insn != last
3139                     && (set = single_set (insn)) != 0
3140                     && SET_DEST (set) == quotient)
3141                   REG_NOTES (insn)
3142                     = gen_rtx_EXPR_LIST (REG_EQUAL,
3143                                          gen_rtx_UDIV (compute_mode, op0, op1),
3144                                          REG_NOTES (insn));
3145               }
3146             else                /* TRUNC_DIV, signed */
3147               {
3148                 unsigned HOST_WIDE_INT ml;
3149                 int lgup, post_shift;
3150                 HOST_WIDE_INT d = INTVAL (op1);
3151                 unsigned HOST_WIDE_INT abs_d = d >= 0 ? d : -d;
3152
3153                 /* n rem d = n rem -d */
3154                 if (rem_flag && d < 0)
3155                   {
3156                     d = abs_d;
3157                     op1 = GEN_INT (abs_d);
3158                   }
3159
3160                 if (d == 1)
3161                   quotient = op0;
3162                 else if (d == -1)
3163                   quotient = expand_unop (compute_mode, neg_optab, op0,
3164                                           tquotient, 0);
3165                 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
3166                   {
3167                     /* This case is not handled correctly below.  */
3168                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
3169                                                 compute_mode, 1, 1);
3170                     if (quotient == 0)
3171                       goto fail1;
3172                   }
3173                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
3174                          && (rem_flag ? smod_pow2_cheap : sdiv_pow2_cheap))
3175                   ;
3176                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
3177                   {
3178                     lgup = floor_log2 (abs_d);
3179                     if (abs_d != 2 && BRANCH_COST < 3)
3180                       {
3181                         rtx label = gen_label_rtx ();
3182                         rtx t1;
3183
3184                         t1 = copy_to_mode_reg (compute_mode, op0);
3185                         do_cmp_and_jump (t1, const0_rtx, GE,
3186                                          compute_mode, label);
3187                         expand_inc (t1, GEN_INT (abs_d - 1));
3188                         emit_label (label);
3189                         quotient = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3190                                                  build_int_2 (lgup, 0),
3191                                                  tquotient, 0);
3192                       }
3193                     else
3194                       {
3195                         rtx t1, t2, t3;
3196                         t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3197                                            build_int_2 (size - 1, 0),
3198                                            NULL_RTX, 0);
3199                         t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3200                                            build_int_2 (size - lgup, 0),
3201                                            NULL_RTX, 1);
3202                         t3 = force_operand (gen_rtx_PLUS (compute_mode,
3203                                                           op0, t2),
3204                                             NULL_RTX);
3205                         quotient = expand_shift (RSHIFT_EXPR, compute_mode, t3,
3206                                                  build_int_2 (lgup, 0),
3207                                                  tquotient, 0);
3208                       }
3209
3210                     /* We have computed OP0 / abs(OP1).  If OP1 is negative, negate
3211                        the quotient.  */
3212                     if (d < 0)
3213                       {
3214                         insn = get_last_insn ();
3215                         if (insn != last
3216                             && (set = single_set (insn)) != 0
3217                             && SET_DEST (set) == quotient)
3218                           REG_NOTES (insn)
3219                             = gen_rtx_EXPR_LIST (REG_EQUAL,
3220                                                  gen_rtx_DIV (compute_mode,
3221                                                               op0,
3222                                                               GEN_INT (abs_d)),
3223                                        REG_NOTES (insn));
3224
3225                         quotient = expand_unop (compute_mode, neg_optab,
3226                                                 quotient, quotient, 0);
3227                       }
3228                   }
3229                 else if (size <= HOST_BITS_PER_WIDE_INT)
3230                   {
3231                     choose_multiplier (abs_d, size, size - 1,
3232                                        &ml, &post_shift, &lgup);
3233                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
3234                       {
3235                         rtx t1, t2, t3;
3236
3237                         extra_cost = (shift_cost[post_shift]
3238                                       + shift_cost[size - 1] + add_cost);
3239                         t1 = expand_mult_highpart (compute_mode, op0, ml,
3240                                                    NULL_RTX, 0,
3241                                                    max_cost - extra_cost);
3242                         if (t1 == 0)
3243                           goto fail1;
3244                         t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3245                                            build_int_2 (post_shift, 0), NULL_RTX, 0);
3246                         t3 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3247                                            build_int_2 (size - 1, 0), NULL_RTX, 0);
3248                         if (d < 0)
3249                           quotient = force_operand (gen_rtx_MINUS (compute_mode, t3, t2),
3250                                                     tquotient);
3251                         else
3252                           quotient = force_operand (gen_rtx_MINUS (compute_mode, t2, t3),
3253                                                     tquotient);
3254                       }
3255                     else
3256                       {
3257                         rtx t1, t2, t3, t4;
3258
3259                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
3260                         extra_cost = (shift_cost[post_shift]
3261                                       + shift_cost[size - 1] + 2 * add_cost);
3262                         t1 = expand_mult_highpart (compute_mode, op0, ml,
3263                                                    NULL_RTX, 0,
3264                                                    max_cost - extra_cost);
3265                         if (t1 == 0)
3266                           goto fail1;
3267                         t2 = force_operand (gen_rtx_PLUS (compute_mode, t1, op0),
3268                                             NULL_RTX);
3269                         t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3270                                            build_int_2 (post_shift, 0), NULL_RTX, 0);
3271                         t4 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3272                                            build_int_2 (size - 1, 0), NULL_RTX, 0);
3273                         if (d < 0)
3274                           quotient = force_operand (gen_rtx_MINUS (compute_mode, t4, t3),
3275                                                     tquotient);
3276                         else
3277                           quotient = force_operand (gen_rtx_MINUS (compute_mode, t3, t4),
3278                                                     tquotient);
3279                       }
3280                   }
3281                 else            /* Too wide mode to use tricky code */
3282                   break;
3283
3284                 insn = get_last_insn ();
3285                 if (insn != last
3286                     && (set = single_set (insn)) != 0
3287                     && SET_DEST (set) == quotient)
3288                   REG_NOTES (insn)
3289                     = gen_rtx_EXPR_LIST (REG_EQUAL,
3290                                          gen_rtx_DIV (compute_mode, op0, op1),
3291                                          REG_NOTES (insn));
3292               }
3293             break;
3294           }
3295       fail1:
3296         delete_insns_since (last);
3297         break;
3298
3299       case FLOOR_DIV_EXPR:
3300       case FLOOR_MOD_EXPR:
3301       /* We will come here only for signed operations.  */
3302         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
3303           {
3304             unsigned HOST_WIDE_INT mh, ml;
3305             int pre_shift, lgup, post_shift;
3306             HOST_WIDE_INT d = INTVAL (op1);
3307
3308             if (d > 0)
3309               {
3310                 /* We could just as easily deal with negative constants here,
3311                    but it does not seem worth the trouble for GCC 2.6.  */
3312                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3313                   {
3314                     pre_shift = floor_log2 (d);
3315                     if (rem_flag)
3316                       {
3317                         remainder = expand_binop (compute_mode, and_optab, op0,
3318                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3319                                                   remainder, 0, OPTAB_LIB_WIDEN);
3320                         if (remainder)
3321                           return gen_lowpart (mode, remainder);
3322                       }
3323                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3324                                              build_int_2 (pre_shift, 0),
3325                                              tquotient, 0);
3326                   }
3327                 else
3328                   {
3329                     rtx t1, t2, t3, t4;
3330
3331                     mh = choose_multiplier (d, size, size - 1,
3332                                             &ml, &post_shift, &lgup);
3333                     if (mh)
3334                       abort ();
3335
3336                     t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3337                                        build_int_2 (size - 1, 0), NULL_RTX, 0);
3338                     t2 = expand_binop (compute_mode, xor_optab, op0, t1,
3339                                        NULL_RTX, 0, OPTAB_WIDEN);
3340                     extra_cost = (shift_cost[post_shift]
3341                                   + shift_cost[size - 1] + 2 * add_cost);
3342                     t3 = expand_mult_highpart (compute_mode, t2, ml,
3343                                                NULL_RTX, 1,
3344                                                max_cost - extra_cost);
3345                     if (t3 != 0)
3346                       {
3347                         t4 = expand_shift (RSHIFT_EXPR, compute_mode, t3,
3348                                            build_int_2 (post_shift, 0),
3349                                            NULL_RTX, 1);
3350                         quotient = expand_binop (compute_mode, xor_optab,
3351                                                  t4, t1, tquotient, 0,
3352                                                  OPTAB_WIDEN);
3353                       }
3354                   }
3355               }
3356             else
3357               {
3358                 rtx nsign, t1, t2, t3, t4;
3359                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
3360                                                   op0, constm1_rtx), NULL_RTX);
3361                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
3362                                    0, OPTAB_WIDEN);
3363                 nsign = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3364                                       build_int_2 (size - 1, 0), NULL_RTX, 0);
3365                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
3366                                     NULL_RTX);
3367                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
3368                                     NULL_RTX, 0);
3369                 if (t4)
3370                   {
3371                     rtx t5;
3372                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
3373                                       NULL_RTX, 0);
3374                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
3375                                                             t4, t5),
3376                                               tquotient);
3377                   }
3378               }
3379           }
3380
3381         if (quotient != 0)
3382           break;
3383         delete_insns_since (last);
3384
3385         /* Try using an instruction that produces both the quotient and
3386            remainder, using truncation.  We can easily compensate the quotient
3387            or remainder to get floor rounding, once we have the remainder.
3388            Notice that we compute also the final remainder value here,
3389            and return the result right away.  */
3390         if (target == 0 || GET_MODE (target) != compute_mode)
3391           target = gen_reg_rtx (compute_mode);
3392
3393         if (rem_flag)
3394           {
3395             remainder
3396               = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode);
3397             quotient = gen_reg_rtx (compute_mode);
3398           }
3399         else
3400           {
3401             quotient
3402               = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode);
3403             remainder = gen_reg_rtx (compute_mode);
3404           }
3405
3406         if (expand_twoval_binop (sdivmod_optab, op0, op1,
3407                                  quotient, remainder, 0))
3408           {
3409             /* This could be computed with a branch-less sequence.
3410                Save that for later.  */
3411             rtx tem;
3412             rtx label = gen_label_rtx ();
3413             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
3414             tem = expand_binop (compute_mode, xor_optab, op0, op1,
3415                                 NULL_RTX, 0, OPTAB_WIDEN);
3416             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
3417             expand_dec (quotient, const1_rtx);
3418             expand_inc (remainder, op1);
3419             emit_label (label);
3420             return gen_lowpart (mode, rem_flag ? remainder : quotient);
3421           }
3422
3423         /* No luck with division elimination or divmod.  Have to do it
3424            by conditionally adjusting op0 *and* the result.  */
3425         {
3426           rtx label1, label2, label3, label4, label5;
3427           rtx adjusted_op0;
3428           rtx tem;
3429
3430           quotient = gen_reg_rtx (compute_mode);
3431           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3432           label1 = gen_label_rtx ();
3433           label2 = gen_label_rtx ();
3434           label3 = gen_label_rtx ();
3435           label4 = gen_label_rtx ();
3436           label5 = gen_label_rtx ();
3437           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
3438           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
3439           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3440                               quotient, 0, OPTAB_LIB_WIDEN);
3441           if (tem != quotient)
3442             emit_move_insn (quotient, tem);
3443           emit_jump_insn (gen_jump (label5));
3444           emit_barrier ();
3445           emit_label (label1);
3446           expand_inc (adjusted_op0, const1_rtx);
3447           emit_jump_insn (gen_jump (label4));
3448           emit_barrier ();
3449           emit_label (label2);
3450           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
3451           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3452                               quotient, 0, OPTAB_LIB_WIDEN);
3453           if (tem != quotient)
3454             emit_move_insn (quotient, tem);
3455           emit_jump_insn (gen_jump (label5));
3456           emit_barrier ();
3457           emit_label (label3);
3458           expand_dec (adjusted_op0, const1_rtx);
3459           emit_label (label4);
3460           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3461                               quotient, 0, OPTAB_LIB_WIDEN);
3462           if (tem != quotient)
3463             emit_move_insn (quotient, tem);
3464           expand_dec (quotient, const1_rtx);
3465           emit_label (label5);
3466         }
3467         break;
3468
3469       case CEIL_DIV_EXPR:
3470       case CEIL_MOD_EXPR:
3471         if (unsignedp)
3472           {
3473             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
3474               {
3475                 rtx t1, t2, t3;
3476                 unsigned HOST_WIDE_INT d = INTVAL (op1);
3477                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3478                                    build_int_2 (floor_log2 (d), 0),
3479                                    tquotient, 1);
3480                 t2 = expand_binop (compute_mode, and_optab, op0,
3481                                    GEN_INT (d - 1),
3482                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3483                 t3 = gen_reg_rtx (compute_mode);
3484                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
3485                                       compute_mode, 1, 1);
3486                 if (t3 == 0)
3487                   {
3488                     rtx lab;
3489                     lab = gen_label_rtx ();
3490                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
3491                     expand_inc (t1, const1_rtx);
3492                     emit_label (lab);
3493                     quotient = t1;
3494                   }
3495                 else
3496                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
3497                                                           t1, t3),
3498                                             tquotient);
3499                 break;
3500               }
3501
3502             /* Try using an instruction that produces both the quotient and
3503                remainder, using truncation.  We can easily compensate the
3504                quotient or remainder to get ceiling rounding, once we have the
3505                remainder.  Notice that we compute also the final remainder
3506                value here, and return the result right away.  */
3507             if (target == 0 || GET_MODE (target) != compute_mode)
3508               target = gen_reg_rtx (compute_mode);
3509
3510             if (rem_flag)
3511               {
3512                 remainder = (GET_CODE (target) == REG
3513                              ? target : gen_reg_rtx (compute_mode));
3514                 quotient = gen_reg_rtx (compute_mode);
3515               }
3516             else
3517               {
3518                 quotient = (GET_CODE (target) == REG
3519                             ? target : gen_reg_rtx (compute_mode));
3520                 remainder = gen_reg_rtx (compute_mode);
3521               }
3522
3523             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
3524                                      remainder, 1))
3525               {
3526                 /* This could be computed with a branch-less sequence.
3527                    Save that for later.  */
3528                 rtx label = gen_label_rtx ();
3529                 do_cmp_and_jump (remainder, const0_rtx, EQ,
3530                                  compute_mode, label);
3531                 expand_inc (quotient, const1_rtx);
3532                 expand_dec (remainder, op1);
3533                 emit_label (label);
3534                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
3535               }
3536
3537             /* No luck with division elimination or divmod.  Have to do it
3538                by conditionally adjusting op0 *and* the result.  */
3539             {
3540               rtx label1, label2;
3541               rtx adjusted_op0, tem;
3542
3543               quotient = gen_reg_rtx (compute_mode);
3544               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3545               label1 = gen_label_rtx ();
3546               label2 = gen_label_rtx ();
3547               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
3548                                compute_mode, label1);
3549               emit_move_insn  (quotient, const0_rtx);
3550               emit_jump_insn (gen_jump (label2));
3551               emit_barrier ();
3552               emit_label (label1);
3553               expand_dec (adjusted_op0, const1_rtx);
3554               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
3555                                   quotient, 1, OPTAB_LIB_WIDEN);
3556               if (tem != quotient)
3557                 emit_move_insn (quotient, tem);
3558               expand_inc (quotient, const1_rtx);
3559               emit_label (label2);
3560             }
3561           }
3562         else /* signed */
3563           {
3564             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3565                 && INTVAL (op1) >= 0)
3566               {
3567                 /* This is extremely similar to the code for the unsigned case
3568                    above.  For 2.7 we should merge these variants, but for
3569                    2.6.1 I don't want to touch the code for unsigned since that
3570                    get used in C.  The signed case will only be used by other
3571                    languages (Ada).  */
3572
3573                 rtx t1, t2, t3;
3574                 unsigned HOST_WIDE_INT d = INTVAL (op1);
3575                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3576                                    build_int_2 (floor_log2 (d), 0),
3577                                    tquotient, 0);
3578                 t2 = expand_binop (compute_mode, and_optab, op0,
3579                                    GEN_INT (d - 1),
3580                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3581                 t3 = gen_reg_rtx (compute_mode);
3582                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
3583                                       compute_mode, 1, 1);
3584                 if (t3 == 0)
3585                   {
3586                     rtx lab;
3587                     lab = gen_label_rtx ();
3588                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
3589                     expand_inc (t1, const1_rtx);
3590                     emit_label (lab);
3591                     quotient = t1;
3592                   }
3593                 else
3594                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
3595                                                           t1, t3),
3596                                             tquotient);
3597                 break;
3598               }
3599
3600             /* Try using an instruction that produces both the quotient and
3601                remainder, using truncation.  We can easily compensate the
3602                quotient or remainder to get ceiling rounding, once we have the
3603                remainder.  Notice that we compute also the final remainder
3604                value here, and return the result right away.  */
3605             if (target == 0 || GET_MODE (target) != compute_mode)
3606               target = gen_reg_rtx (compute_mode);
3607             if (rem_flag)
3608               {
3609                 remainder= (GET_CODE (target) == REG
3610                             ? target : gen_reg_rtx (compute_mode));
3611                 quotient = gen_reg_rtx (compute_mode);
3612               }
3613             else
3614               {
3615                 quotient = (GET_CODE (target) == REG
3616                             ? target : gen_reg_rtx (compute_mode));
3617                 remainder = gen_reg_rtx (compute_mode);
3618               }
3619
3620             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
3621                                      remainder, 0))
3622               {
3623                 /* This could be computed with a branch-less sequence.
3624                    Save that for later.  */
3625                 rtx tem;
3626                 rtx label = gen_label_rtx ();
3627                 do_cmp_and_jump (remainder, const0_rtx, EQ,
3628                                  compute_mode, label);
3629                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
3630                                     NULL_RTX, 0, OPTAB_WIDEN);
3631                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
3632                 expand_inc (quotient, const1_rtx);
3633                 expand_dec (remainder, op1);
3634                 emit_label (label);
3635                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
3636               }
3637
3638             /* No luck with division elimination or divmod.  Have to do it
3639                by conditionally adjusting op0 *and* the result.  */
3640             {
3641               rtx label1, label2, label3, label4, label5;
3642               rtx adjusted_op0;
3643               rtx tem;
3644
3645               quotient = gen_reg_rtx (compute_mode);
3646               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3647               label1 = gen_label_rtx ();
3648               label2 = gen_label_rtx ();
3649               label3 = gen_label_rtx ();
3650               label4 = gen_label_rtx ();
3651               label5 = gen_label_rtx ();
3652               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
3653               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
3654                                compute_mode, label1);
3655               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3656                                   quotient, 0, OPTAB_LIB_WIDEN);
3657               if (tem != quotient)
3658                 emit_move_insn (quotient, tem);
3659               emit_jump_insn (gen_jump (label5));
3660               emit_barrier ();
3661               emit_label (label1);
3662               expand_dec (adjusted_op0, const1_rtx);
3663               emit_jump_insn (gen_jump (label4));
3664               emit_barrier ();
3665               emit_label (label2);
3666               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
3667                                compute_mode, label3);
3668               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3669                                   quotient, 0, OPTAB_LIB_WIDEN);
3670               if (tem != quotient)
3671                 emit_move_insn (quotient, tem);
3672               emit_jump_insn (gen_jump (label5));
3673               emit_barrier ();
3674               emit_label (label3);
3675               expand_inc (adjusted_op0, const1_rtx);
3676               emit_label (label4);
3677               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3678                                   quotient, 0, OPTAB_LIB_WIDEN);
3679               if (tem != quotient)
3680                 emit_move_insn (quotient, tem);
3681               expand_inc (quotient, const1_rtx);
3682               emit_label (label5);
3683             }
3684           }
3685         break;
3686
3687       case EXACT_DIV_EXPR:
3688         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
3689           {
3690             HOST_WIDE_INT d = INTVAL (op1);
3691             unsigned HOST_WIDE_INT ml;
3692             int post_shift;
3693             rtx t1;
3694
3695             post_shift = floor_log2 (d & -d);
3696             ml = invert_mod2n (d >> post_shift, size);
3697             t1 = expand_mult (compute_mode, op0, GEN_INT (ml), NULL_RTX,
3698                               unsignedp);
3699             quotient = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3700                                      build_int_2 (post_shift, 0),
3701                                      NULL_RTX, unsignedp);
3702
3703             insn = get_last_insn ();
3704             REG_NOTES (insn)
3705               = gen_rtx_EXPR_LIST (REG_EQUAL,
3706                                    gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
3707                                                    compute_mode,
3708                                                    op0, op1),
3709                                    REG_NOTES (insn));
3710           }
3711         break;
3712
3713       case ROUND_DIV_EXPR:
3714       case ROUND_MOD_EXPR:
3715         if (unsignedp)
3716           {
3717             rtx tem;
3718             rtx label;
3719             label = gen_label_rtx ();
3720             quotient = gen_reg_rtx (compute_mode);
3721             remainder = gen_reg_rtx (compute_mode);
3722             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
3723               {
3724                 rtx tem;
3725                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
3726                                          quotient, 1, OPTAB_LIB_WIDEN);
3727                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
3728                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
3729                                           remainder, 1, OPTAB_LIB_WIDEN);
3730               }
3731             tem = plus_constant (op1, -1);
3732             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
3733                                 build_int_2 (1, 0), NULL_RTX, 1);
3734             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
3735             expand_inc (quotient, const1_rtx);
3736             expand_dec (remainder, op1);
3737             emit_label (label);
3738           }
3739         else
3740           {
3741             rtx abs_rem, abs_op1, tem, mask;
3742             rtx label;
3743             label = gen_label_rtx ();
3744             quotient = gen_reg_rtx (compute_mode);
3745             remainder = gen_reg_rtx (compute_mode);
3746             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
3747               {
3748                 rtx tem;
3749                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
3750                                          quotient, 0, OPTAB_LIB_WIDEN);
3751                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
3752                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
3753                                           remainder, 0, OPTAB_LIB_WIDEN);
3754               }
3755             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 0, 0);
3756             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 0, 0);
3757             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
3758                                 build_int_2 (1, 0), NULL_RTX, 1);
3759             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
3760             tem = expand_binop (compute_mode, xor_optab, op0, op1,
3761                                 NULL_RTX, 0, OPTAB_WIDEN);
3762             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
3763                                 build_int_2 (size - 1, 0), NULL_RTX, 0);
3764             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
3765                                 NULL_RTX, 0, OPTAB_WIDEN);
3766             tem = expand_binop (compute_mode, sub_optab, tem, mask,
3767                                 NULL_RTX, 0, OPTAB_WIDEN);
3768             expand_inc (quotient, tem);
3769             tem = expand_binop (compute_mode, xor_optab, mask, op1,
3770                                 NULL_RTX, 0, OPTAB_WIDEN);
3771             tem = expand_binop (compute_mode, sub_optab, tem, mask,
3772                                 NULL_RTX, 0, OPTAB_WIDEN);
3773             expand_dec (remainder, tem);
3774             emit_label (label);
3775           }
3776         return gen_lowpart (mode, rem_flag ? remainder : quotient);
3777
3778       default:
3779         abort ();
3780       }
3781
3782   if (quotient == 0)
3783     {
3784       if (target && GET_MODE (target) != compute_mode)
3785         target = 0;
3786
3787       if (rem_flag)
3788         {
3789           /* Try to produce the remainder without producing the quotient.
3790              If we seem to have a divmod patten that does not require widening,
3791              don't try windening here.  We should really have an WIDEN argument
3792              to expand_twoval_binop, since what we'd really like to do here is
3793              1) try a mod insn in compute_mode
3794              2) try a divmod insn in compute_mode
3795              3) try a div insn in compute_mode and multiply-subtract to get
3796                 remainder
3797              4) try the same things with widening allowed.  */
3798           remainder
3799             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
3800                                  op0, op1, target,
3801                                  unsignedp,
3802                                  ((optab2->handlers[(int) compute_mode].insn_code
3803                                    != CODE_FOR_nothing)
3804                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
3805           if (remainder == 0)
3806             {
3807               /* No luck there.  Can we do remainder and divide at once
3808                  without a library call?  */
3809               remainder = gen_reg_rtx (compute_mode);
3810               if (! expand_twoval_binop ((unsignedp
3811                                           ? udivmod_optab
3812                                           : sdivmod_optab),
3813                                          op0, op1,
3814                                          NULL_RTX, remainder, unsignedp))
3815                 remainder = 0;
3816             }
3817
3818           if (remainder)
3819             return gen_lowpart (mode, remainder);
3820         }
3821
3822       /* Produce the quotient.  Try a quotient insn, but not a library call.
3823          If we have a divmod in this mode, use it in preference to widening
3824          the div (for this test we assume it will not fail). Note that optab2
3825          is set to the one of the two optabs that the call below will use.  */
3826       quotient
3827         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
3828                              op0, op1, rem_flag ? NULL_RTX : target,
3829                              unsignedp,
3830                              ((optab2->handlers[(int) compute_mode].insn_code
3831                                != CODE_FOR_nothing)
3832                               ? OPTAB_DIRECT : OPTAB_WIDEN));
3833
3834       if (quotient == 0)
3835         {
3836           /* No luck there.  Try a quotient-and-remainder insn,
3837              keeping the quotient alone.  */
3838           quotient = gen_reg_rtx (compute_mode);
3839           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
3840                                      op0, op1,
3841                                      quotient, NULL_RTX, unsignedp))
3842             {
3843               quotient = 0;
3844               if (! rem_flag)
3845                 /* Still no luck.  If we are not computing the remainder,
3846                    use a library call for the quotient.  */
3847                 quotient = sign_expand_binop (compute_mode,
3848                                               udiv_optab, sdiv_optab,
3849                                               op0, op1, target,
3850                                               unsignedp, OPTAB_LIB_WIDEN);
3851             }
3852         }
3853     }
3854
3855   if (rem_flag)
3856     {
3857       if (target && GET_MODE (target) != compute_mode)
3858         target = 0;
3859
3860       if (quotient == 0)
3861         /* No divide instruction either.  Use library for remainder.  */
3862         remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
3863                                        op0, op1, target,
3864                                        unsignedp, OPTAB_LIB_WIDEN);
3865       else
3866         {
3867           /* We divided.  Now finish doing X - Y * (X / Y).  */
3868           remainder = expand_mult (compute_mode, quotient, op1,
3869                                    NULL_RTX, unsignedp);
3870           remainder = expand_binop (compute_mode, sub_optab, op0,
3871                                     remainder, target, unsignedp,
3872                                     OPTAB_LIB_WIDEN);
3873         }
3874     }
3875
3876   return gen_lowpart (mode, rem_flag ? remainder : quotient);
3877 }
3878 \f
3879 /* Return a tree node with data type TYPE, describing the value of X.
3880    Usually this is an RTL_EXPR, if there is no obvious better choice.
3881    X may be an expression, however we only support those expressions
3882    generated by loop.c.   */
3883
3884 tree
3885 make_tree (type, x)
3886      tree type;
3887      rtx x;
3888 {
3889   tree t;
3890
3891   switch (GET_CODE (x))
3892     {
3893     case CONST_INT:
3894       t = build_int_2 (INTVAL (x),
3895                        (TREE_UNSIGNED (type)
3896                         && (GET_MODE_BITSIZE (TYPE_MODE (type)) < HOST_BITS_PER_WIDE_INT))
3897                        || INTVAL (x) >= 0 ? 0 : -1);
3898       TREE_TYPE (t) = type;
3899       return t;
3900
3901     case CONST_DOUBLE:
3902       if (GET_MODE (x) == VOIDmode)
3903         {
3904           t = build_int_2 (CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
3905           TREE_TYPE (t) = type;
3906         }
3907       else
3908         {
3909           REAL_VALUE_TYPE d;
3910
3911           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
3912           t = build_real (type, d);
3913         }
3914
3915       return t;
3916
3917     case PLUS:
3918       return fold (build (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
3919                           make_tree (type, XEXP (x, 1))));
3920
3921     case MINUS:
3922       return fold (build (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
3923                           make_tree (type, XEXP (x, 1))));
3924
3925     case NEG:
3926       return fold (build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0))));
3927
3928     case MULT:
3929       return fold (build (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
3930                           make_tree (type, XEXP (x, 1))));
3931
3932     case ASHIFT:
3933       return fold (build (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
3934                           make_tree (type, XEXP (x, 1))));
3935
3936     case LSHIFTRT:
3937       return fold (convert (type,
3938                             build (RSHIFT_EXPR, unsigned_type (type),
3939                                    make_tree (unsigned_type (type),
3940                                               XEXP (x, 0)),
3941                                    make_tree (type, XEXP (x, 1)))));
3942
3943     case ASHIFTRT:
3944       return fold (convert (type,
3945                             build (RSHIFT_EXPR, signed_type (type),
3946                                    make_tree (signed_type (type), XEXP (x, 0)),
3947                                    make_tree (type, XEXP (x, 1)))));
3948
3949     case DIV:
3950       if (TREE_CODE (type) != REAL_TYPE)
3951         t = signed_type (type);
3952       else
3953         t = type;
3954
3955       return fold (convert (type,
3956                             build (TRUNC_DIV_EXPR, t,
3957                                    make_tree (t, XEXP (x, 0)),
3958                                    make_tree (t, XEXP (x, 1)))));
3959     case UDIV:
3960       t = unsigned_type (type);
3961       return fold (convert (type,
3962                             build (TRUNC_DIV_EXPR, t,
3963                                    make_tree (t, XEXP (x, 0)),
3964                                    make_tree (t, XEXP (x, 1)))));
3965    default:
3966       t = make_node (RTL_EXPR);
3967       TREE_TYPE (t) = type;
3968       RTL_EXPR_RTL (t) = x;
3969       /* There are no insns to be output
3970          when this rtl_expr is used.  */
3971       RTL_EXPR_SEQUENCE (t) = 0;
3972       return t;
3973     }
3974 }
3975
3976 /* Return an rtx representing the value of X * MULT + ADD.
3977    TARGET is a suggestion for where to store the result (an rtx).
3978    MODE is the machine mode for the computation.
3979    X and MULT must have mode MODE.  ADD may have a different mode.
3980    So can X (defaults to same as MODE).
3981    UNSIGNEDP is non-zero to do unsigned multiplication.
3982    This may emit insns.  */
3983
3984 rtx
3985 expand_mult_add (x, target, mult, add, mode, unsignedp)
3986      rtx x, target, mult, add;
3987      enum machine_mode mode;
3988      int unsignedp;
3989 {
3990   tree type = type_for_mode (mode, unsignedp);
3991   tree add_type = (GET_MODE (add) == VOIDmode
3992                    ? type : type_for_mode (GET_MODE (add), unsignedp));
3993   tree result =  fold (build (PLUS_EXPR, type,
3994                               fold (build (MULT_EXPR, type,
3995                                            make_tree (type, x),
3996                                            make_tree (type, mult))),
3997                               make_tree (add_type, add)));
3998
3999   return expand_expr (result, target, VOIDmode, 0);
4000 }
4001 \f
4002 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
4003    and returning TARGET.
4004
4005    If TARGET is 0, a pseudo-register or constant is returned.  */
4006
4007 rtx
4008 expand_and (op0, op1, target)
4009      rtx op0, op1, target;
4010 {
4011   enum machine_mode mode = VOIDmode;
4012   rtx tem;
4013
4014   if (GET_MODE (op0) != VOIDmode)
4015     mode = GET_MODE (op0);
4016   else if (GET_MODE (op1) != VOIDmode)
4017     mode = GET_MODE (op1);
4018
4019   if (mode != VOIDmode)
4020     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
4021   else if (GET_CODE (op0) == CONST_INT && GET_CODE (op1) == CONST_INT)
4022     tem = GEN_INT (INTVAL (op0) & INTVAL (op1));
4023   else
4024     abort ();
4025
4026   if (target == 0)
4027     target = tem;
4028   else if (tem != target)
4029     emit_move_insn (target, tem);
4030   return target;
4031 }
4032 \f
4033 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
4034    and storing in TARGET.  Normally return TARGET.
4035    Return 0 if that cannot be done.
4036
4037    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
4038    it is VOIDmode, they cannot both be CONST_INT.
4039
4040    UNSIGNEDP is for the case where we have to widen the operands
4041    to perform the operation.  It says to use zero-extension.
4042
4043    NORMALIZEP is 1 if we should convert the result to be either zero
4044    or one.  Normalize is -1 if we should convert the result to be
4045    either zero or -1.  If NORMALIZEP is zero, the result will be left
4046    "raw" out of the scc insn.  */
4047
4048 rtx
4049 emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep)
4050      rtx target;
4051      enum rtx_code code;
4052      rtx op0, op1;
4053      enum machine_mode mode;
4054      int unsignedp;
4055      int normalizep;
4056 {
4057   rtx subtarget;
4058   enum insn_code icode;
4059   enum machine_mode compare_mode;
4060   enum machine_mode target_mode = GET_MODE (target);
4061   rtx tem;
4062   rtx last = get_last_insn ();
4063   rtx pattern, comparison;
4064
4065   /* If one operand is constant, make it the second one.  Only do this
4066      if the other operand is not constant as well.  */
4067
4068   if ((CONSTANT_P (op0) && ! CONSTANT_P (op1))
4069       || (GET_CODE (op0) == CONST_INT && GET_CODE (op1) != CONST_INT))
4070     {
4071       tem = op0;
4072       op0 = op1;
4073       op1 = tem;
4074       code = swap_condition (code);
4075     }
4076
4077   if (mode == VOIDmode)
4078     mode = GET_MODE (op0);
4079
4080   /* For some comparisons with 1 and -1, we can convert this to
4081      comparisons with zero.  This will often produce more opportunities for
4082      store-flag insns.  */
4083
4084   switch (code)
4085     {
4086     case LT:
4087       if (op1 == const1_rtx)
4088         op1 = const0_rtx, code = LE;
4089       break;
4090     case LE:
4091       if (op1 == constm1_rtx)
4092         op1 = const0_rtx, code = LT;
4093       break;
4094     case GE:
4095       if (op1 == const1_rtx)
4096         op1 = const0_rtx, code = GT;
4097       break;
4098     case GT:
4099       if (op1 == constm1_rtx)
4100         op1 = const0_rtx, code = GE;
4101       break;
4102     case GEU:
4103       if (op1 == const1_rtx)
4104         op1 = const0_rtx, code = NE;
4105       break;
4106     case LTU:
4107       if (op1 == const1_rtx)
4108         op1 = const0_rtx, code = EQ;
4109       break;
4110     default:
4111       break;
4112     }
4113
4114   /* From now on, we won't change CODE, so set ICODE now.  */
4115   icode = setcc_gen_code[(int) code];
4116
4117   /* If this is A < 0 or A >= 0, we can do this by taking the ones
4118      complement of A (for GE) and shifting the sign bit to the low bit.  */
4119   if (op1 == const0_rtx && (code == LT || code == GE)
4120       && GET_MODE_CLASS (mode) == MODE_INT
4121       && (normalizep || STORE_FLAG_VALUE == 1
4122           || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4123               && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
4124                   == (HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
4125     {
4126       subtarget = target;
4127
4128       /* If the result is to be wider than OP0, it is best to convert it
4129          first.  If it is to be narrower, it is *incorrect* to convert it
4130          first.  */
4131       if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
4132         {
4133           op0 = protect_from_queue (op0, 0);
4134           op0 = convert_modes (target_mode, mode, op0, 0);
4135           mode = target_mode;
4136         }
4137
4138       if (target_mode != mode)
4139         subtarget = 0;
4140
4141       if (code == GE)
4142         op0 = expand_unop (mode, one_cmpl_optab, op0,
4143                            ((STORE_FLAG_VALUE == 1 || normalizep)
4144                             ? 0 : subtarget), 0);
4145
4146       if (STORE_FLAG_VALUE == 1 || normalizep)
4147         /* If we are supposed to produce a 0/1 value, we want to do
4148            a logical shift from the sign bit to the low-order bit; for
4149            a -1/0 value, we do an arithmetic shift.  */
4150         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
4151                             size_int (GET_MODE_BITSIZE (mode) - 1),
4152                             subtarget, normalizep != -1);
4153
4154       if (mode != target_mode)
4155         op0 = convert_modes (target_mode, mode, op0, 0);
4156
4157       return op0;
4158     }
4159
4160   if (icode != CODE_FOR_nothing)
4161     {
4162       /* We think we may be able to do this with a scc insn.  Emit the
4163          comparison and then the scc insn.
4164
4165          compare_from_rtx may call emit_queue, which would be deleted below
4166          if the scc insn fails.  So call it ourselves before setting LAST.  */
4167
4168       emit_queue ();
4169       last = get_last_insn ();
4170
4171       comparison
4172         = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX, 0);
4173       if (GET_CODE (comparison) == CONST_INT)
4174         return (comparison == const0_rtx ? const0_rtx
4175                 : normalizep == 1 ? const1_rtx
4176                 : normalizep == -1 ? constm1_rtx
4177                 : const_true_rtx);
4178
4179       /* If the code of COMPARISON doesn't match CODE, something is
4180          wrong; we can no longer be sure that we have the operation.
4181          We could handle this case, but it should not happen.  */
4182
4183       if (GET_CODE (comparison) != code)
4184         abort ();
4185
4186       /* Get a reference to the target in the proper mode for this insn.  */
4187       compare_mode = insn_operand_mode[(int) icode][0];
4188       subtarget = target;
4189       if (preserve_subexpressions_p ()
4190           || ! (*insn_operand_predicate[(int) icode][0]) (subtarget, compare_mode))
4191         subtarget = gen_reg_rtx (compare_mode);
4192
4193       pattern = GEN_FCN (icode) (subtarget);
4194       if (pattern)
4195         {
4196           emit_insn (pattern);
4197
4198           /* If we are converting to a wider mode, first convert to
4199              TARGET_MODE, then normalize.  This produces better combining
4200              opportunities on machines that have a SIGN_EXTRACT when we are
4201              testing a single bit.  This mostly benefits the 68k.
4202
4203              If STORE_FLAG_VALUE does not have the sign bit set when
4204              interpreted in COMPARE_MODE, we can do this conversion as
4205              unsigned, which is usually more efficient.  */
4206           if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (compare_mode))
4207             {
4208               convert_move (target, subtarget,
4209                             (GET_MODE_BITSIZE (compare_mode)
4210                              <= HOST_BITS_PER_WIDE_INT)
4211                             && 0 == (STORE_FLAG_VALUE
4212                                      & ((HOST_WIDE_INT) 1
4213                                         << (GET_MODE_BITSIZE (compare_mode) -1))));
4214               op0 = target;
4215               compare_mode = target_mode;
4216             }
4217           else
4218             op0 = subtarget;
4219
4220           /* If we want to keep subexpressions around, don't reuse our
4221              last target.  */
4222
4223           if (preserve_subexpressions_p ())
4224             subtarget = 0;
4225
4226           /* Now normalize to the proper value in COMPARE_MODE.  Sometimes
4227              we don't have to do anything.  */
4228           if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
4229             ;
4230           else if (normalizep == - STORE_FLAG_VALUE)
4231             op0 = expand_unop (compare_mode, neg_optab, op0, subtarget, 0);
4232
4233           /* We don't want to use STORE_FLAG_VALUE < 0 below since this
4234              makes it hard to use a value of just the sign bit due to
4235              ANSI integer constant typing rules.  */
4236           else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_WIDE_INT
4237                    && (STORE_FLAG_VALUE
4238                        & ((HOST_WIDE_INT) 1
4239                           << (GET_MODE_BITSIZE (compare_mode) - 1))))
4240             op0 = expand_shift (RSHIFT_EXPR, compare_mode, op0,
4241                                 size_int (GET_MODE_BITSIZE (compare_mode) - 1),
4242                                 subtarget, normalizep == 1);
4243           else if (STORE_FLAG_VALUE & 1)
4244             {
4245               op0 = expand_and (op0, const1_rtx, subtarget);
4246               if (normalizep == -1)
4247                 op0 = expand_unop (compare_mode, neg_optab, op0, op0, 0);
4248             }
4249           else
4250             abort ();
4251
4252           /* If we were converting to a smaller mode, do the
4253              conversion now.  */
4254           if (target_mode != compare_mode)
4255             {
4256               convert_move (target, op0, 0);
4257               return target;
4258             }
4259           else
4260             return op0;
4261         }
4262     }
4263
4264   delete_insns_since (last);
4265
4266   /* If expensive optimizations, use different pseudo registers for each
4267      insn, instead of reusing the same pseudo.  This leads to better CSE,
4268      but slows down the compiler, since there are more pseudos */
4269   subtarget = (!flag_expensive_optimizations
4270                && (target_mode == mode)) ? target : NULL_RTX;
4271
4272   /* If we reached here, we can't do this with a scc insn.  However, there
4273      are some comparisons that can be done directly.  For example, if
4274      this is an equality comparison of integers, we can try to exclusive-or
4275      (or subtract) the two operands and use a recursive call to try the
4276      comparison with zero.  Don't do any of these cases if branches are
4277      very cheap.  */
4278
4279   if (BRANCH_COST > 0
4280       && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
4281       && op1 != const0_rtx)
4282     {
4283       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
4284                           OPTAB_WIDEN);
4285
4286       if (tem == 0)
4287         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
4288                             OPTAB_WIDEN);
4289       if (tem != 0)
4290         tem = emit_store_flag (target, code, tem, const0_rtx,
4291                                mode, unsignedp, normalizep);
4292       if (tem == 0)
4293         delete_insns_since (last);
4294       return tem;
4295     }
4296
4297   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
4298      the constant zero.  Reject all other comparisons at this point.  Only
4299      do LE and GT if branches are expensive since they are expensive on
4300      2-operand machines.  */
4301
4302   if (BRANCH_COST == 0
4303       || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
4304       || (code != EQ && code != NE
4305           && (BRANCH_COST <= 1 || (code != LE && code != GT))))
4306     return 0;
4307
4308   /* See what we need to return.  We can only return a 1, -1, or the
4309      sign bit.  */
4310
4311   if (normalizep == 0)
4312     {
4313       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
4314         normalizep = STORE_FLAG_VALUE;
4315
4316       else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4317                && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
4318                    == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
4319         ;
4320       else
4321         return 0;
4322     }
4323
4324   /* Try to put the result of the comparison in the sign bit.  Assume we can't
4325      do the necessary operation below.  */
4326
4327   tem = 0;
4328
4329   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
4330      the sign bit set.  */
4331
4332   if (code == LE)
4333     {
4334       /* This is destructive, so SUBTARGET can't be OP0.  */
4335       if (rtx_equal_p (subtarget, op0))
4336         subtarget = 0;
4337
4338       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
4339                           OPTAB_WIDEN);
4340       if (tem)
4341         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
4342                             OPTAB_WIDEN);
4343     }
4344
4345   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
4346      number of bits in the mode of OP0, minus one.  */
4347
4348   if (code == GT)
4349     {
4350       if (rtx_equal_p (subtarget, op0))
4351         subtarget = 0;
4352
4353       tem = expand_shift (RSHIFT_EXPR, mode, op0,
4354                           size_int (GET_MODE_BITSIZE (mode) - 1),
4355                           subtarget, 0);
4356       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
4357                           OPTAB_WIDEN);
4358     }
4359
4360   if (code == EQ || code == NE)
4361     {
4362       /* For EQ or NE, one way to do the comparison is to apply an operation
4363          that converts the operand into a positive number if it is non-zero
4364          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
4365          for NE we negate.  This puts the result in the sign bit.  Then we
4366          normalize with a shift, if needed.
4367
4368          Two operations that can do the above actions are ABS and FFS, so try
4369          them.  If that doesn't work, and MODE is smaller than a full word,
4370          we can use zero-extension to the wider mode (an unsigned conversion)
4371          as the operation.  */
4372
4373       if (abs_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
4374         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
4375       else if (ffs_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
4376         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
4377       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
4378         {
4379           op0 = protect_from_queue (op0, 0);
4380           tem = convert_modes (word_mode, mode, op0, 1);
4381           mode = word_mode;
4382         }
4383
4384       if (tem != 0)
4385         {
4386           if (code == EQ)
4387             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
4388                                 0, OPTAB_WIDEN);
4389           else
4390             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
4391         }
4392
4393       /* If we couldn't do it that way, for NE we can "or" the two's complement
4394          of the value with itself.  For EQ, we take the one's complement of
4395          that "or", which is an extra insn, so we only handle EQ if branches
4396          are expensive.  */
4397
4398       if (tem == 0 && (code == NE || BRANCH_COST > 1))
4399         {
4400           if (rtx_equal_p (subtarget, op0))
4401             subtarget = 0;
4402
4403           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
4404           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
4405                               OPTAB_WIDEN);
4406
4407           if (tem && code == EQ)
4408             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
4409         }
4410     }
4411
4412   if (tem && normalizep)
4413     tem = expand_shift (RSHIFT_EXPR, mode, tem,
4414                         size_int (GET_MODE_BITSIZE (mode) - 1),
4415                         subtarget, normalizep == 1);
4416
4417   if (tem)
4418     {
4419       if (GET_MODE (tem) != target_mode)
4420         {
4421           convert_move (target, tem, 0);
4422           tem = target;
4423         }
4424       else if (!subtarget)
4425         {
4426           emit_move_insn (target, tem);
4427           tem = target;
4428         }
4429     }
4430   else
4431     delete_insns_since (last);
4432
4433   return tem;
4434 }
4435
4436 /* Like emit_store_flag, but always succeeds.  */
4437
4438 rtx
4439 emit_store_flag_force (target, code, op0, op1, mode, unsignedp, normalizep)
4440      rtx target;
4441      enum rtx_code code;
4442      rtx op0, op1;
4443      enum machine_mode mode;
4444      int unsignedp;
4445      int normalizep;
4446 {
4447   rtx tem, label;
4448
4449   /* First see if emit_store_flag can do the job.  */
4450   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
4451   if (tem != 0)
4452     return tem;
4453
4454   if (normalizep == 0)
4455     normalizep = 1;
4456
4457   /* If this failed, we have to do this with set/compare/jump/set code.  */
4458
4459   if (GET_CODE (target) != REG
4460       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
4461     target = gen_reg_rtx (GET_MODE (target));
4462
4463   emit_move_insn (target, const1_rtx);
4464   tem = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX, 0);
4465   if (GET_CODE (tem) == CONST_INT)
4466     return tem;
4467
4468   label = gen_label_rtx ();
4469   if (bcc_gen_fctn[(int) code] == 0)
4470     abort ();
4471
4472   emit_jump_insn ((*bcc_gen_fctn[(int) code]) (label));
4473   emit_move_insn (target, const0_rtx);
4474   emit_label (label);
4475
4476   return target;
4477 }
4478 \f
4479 /* Perform possibly multi-word comparison and conditional jump to LABEL
4480    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE
4481
4482    The algorithm is based on the code in expr.c:do_jump.
4483
4484    Note that this does not perform a general comparison.  Only variants
4485    generated within expmed.c are correctly handled, others abort (but could
4486    be handled if needed).  */
4487
4488 static void
4489 do_cmp_and_jump (arg1, arg2, op, mode, label)
4490      rtx arg1, arg2, label;
4491     enum rtx_code op;
4492     enum machine_mode mode;
4493 {
4494   /* If this mode is an integer too wide to compare properly,
4495      compare word by word.  Rely on cse to optimize constant cases.  */
4496
4497   if (GET_MODE_CLASS (mode) == MODE_INT && !can_compare_p (mode))
4498     {
4499       rtx label2 = gen_label_rtx ();
4500
4501       switch (op)
4502         {
4503         case LTU:
4504           do_jump_by_parts_greater_rtx (mode, 1, arg2, arg1, label2, label);
4505           break;
4506
4507         case LEU:
4508           do_jump_by_parts_greater_rtx (mode, 1, arg1, arg2, label, label2);
4509           break;
4510
4511         case LT:
4512           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label2, label);
4513           break;
4514
4515         case GT:
4516           do_jump_by_parts_greater_rtx (mode, 0, arg1, arg2, label2, label);
4517           break;
4518
4519         case GE:
4520           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label, label2);
4521           break;
4522
4523           /* do_jump_by_parts_equality_rtx compares with zero.  Luckily
4524              that's the only equality operations we do */
4525         case EQ:
4526           if (arg2 != const0_rtx || mode != GET_MODE(arg1))
4527             abort();
4528           do_jump_by_parts_equality_rtx (arg1, label2, label);
4529           break;
4530
4531         case NE:
4532           if (arg2 != const0_rtx || mode != GET_MODE(arg1))
4533             abort();
4534           do_jump_by_parts_equality_rtx (arg1, label, label2);
4535           break;
4536
4537         default:
4538           abort();
4539         }
4540
4541       emit_label (label2);
4542     }
4543   else
4544     {
4545       emit_cmp_insn(arg1, arg2, op, NULL_RTX, mode, 0, 0);
4546       if (bcc_gen_fctn[(int) op] == 0)
4547         abort ();
4548       emit_jump_insn ((*bcc_gen_fctn[(int) op]) (label));
4549     }
4550 }