gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 88, 89, 92-5, 1996 Free Software Foundation, Inc.
   4
   5 This file is part of GNU CC.
   6
   7 GNU CC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU CC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU CC; see the file COPYING.  If not, write to
  19 the Free Software Foundation, 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.  */
  21
  22
  23 #include "config.h"
  24 #include "rtl.h"
  25 #include "tree.h"
  26 #include "flags.h"
  27 #include "insn-flags.h"
  28 #include "insn-codes.h"
  29 #include "insn-config.h"
  30 #include "expr.h"
  31 #include "real.h"
  32 #include "recog.h"
  33
  34 static void store_fixed_bit_field       PROTO((rtx, int, int, int, rtx, int));
  35 static void store_split_bit_field       PROTO((rtx, int, int, rtx, int));
  36 static rtx extract_fixed_bit_field      PROTO((enum machine_mode, rtx, int,
  37                                                int, int, rtx, int, int));
  38 static rtx mask_rtx                     PROTO((enum machine_mode, int,
  39                                                int, int));
  40 static rtx lshift_value                 PROTO((enum machine_mode, rtx,
  41                                                int, int));
  42 static rtx extract_split_bit_field      PROTO((rtx, int, int, int, int));
  43
  44 #define CEIL(x,y) (((x) + (y) - 1) / (y))
  45
  46 /* Non-zero means divides or modulus operations are relatively cheap for
  47    powers of two, so don't use branches; emit the operation instead.
  48    Usually, this will mean that the MD file will emit non-branch
  49    sequences.  */
  50
  51 static int sdiv_pow2_cheap, smod_pow2_cheap;
  52
  53 #ifndef SLOW_UNALIGNED_ACCESS
  54 #define SLOW_UNALIGNED_ACCESS STRICT_ALIGNMENT
  55 #endif
  56
  57 /* For compilers that support multiple targets with different word sizes,
  58    MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
  59    is the H8/300(H) compiler.  */
  60
  61 #ifndef MAX_BITS_PER_WORD
  62 #define MAX_BITS_PER_WORD BITS_PER_WORD
  63 #endif
  64
  65 /* Cost of various pieces of RTL.  Note that some of these are indexed by shift count,
  66    and some by mode.  */
  67 static int add_cost, negate_cost, zero_cost;
  68 static int shift_cost[MAX_BITS_PER_WORD];
  69 static int shiftadd_cost[MAX_BITS_PER_WORD];
  70 static int shiftsub_cost[MAX_BITS_PER_WORD];
  71 static int mul_cost[NUM_MACHINE_MODES];
  72 static int div_cost[NUM_MACHINE_MODES];
  73 static int mul_widen_cost[NUM_MACHINE_MODES];
  74 static int mul_highpart_cost[NUM_MACHINE_MODES];
  75
  76 void
  77 init_expmed ()
  78 {
  79   char *free_point;
  80   /* This is "some random pseudo register" for purposes of calling recog
  81      to see what insns exist.  */
  82   rtx reg = gen_rtx (REG, word_mode, 10000);
  83   rtx shift_insn, shiftadd_insn, shiftsub_insn;
  84   int dummy;
  85   int m;
  86   enum machine_mode mode, wider_mode;
  87
  88   start_sequence ();
  89
  90   /* Since we are on the permanent obstack, we must be sure we save this
  91      spot AFTER we call start_sequence, since it will reuse the rtl it
  92      makes.  */
  93
  94   free_point = (char *) oballoc (0);
  95
  96   zero_cost = rtx_cost (const0_rtx, 0);
  97   add_cost = rtx_cost (gen_rtx (PLUS, word_mode, reg, reg), SET);
  98
  99   shift_insn = emit_insn (gen_rtx (SET, VOIDmode, reg,
 100                                    gen_rtx (ASHIFT, word_mode, reg,
 101                                             const0_rtx)));
 102
 103   shiftadd_insn = emit_insn (gen_rtx (SET, VOIDmode, reg,
 104                                       gen_rtx (PLUS, word_mode,
 105                                                gen_rtx (MULT, word_mode,
 106                                                         reg, const0_rtx),
 107                                                reg)));
 108
 109   shiftsub_insn = emit_insn (gen_rtx (SET, VOIDmode, reg,
 110                                       gen_rtx (MINUS, word_mode,
 111                                                gen_rtx (MULT, word_mode,
 112                                                          reg, const0_rtx),
 113                                                 reg)));
 114
 115   init_recog ();
 116
 117   shift_cost[0] = 0;
 118   shiftadd_cost[0] = shiftsub_cost[0] = add_cost;
 119
 120   for (m = 1; m < BITS_PER_WORD; m++)
 121     {
 122       shift_cost[m] = shiftadd_cost[m] = shiftsub_cost[m] = 32000;
 123
 124       XEXP (SET_SRC (PATTERN (shift_insn)), 1) = GEN_INT (m);
 125       if (recog (PATTERN (shift_insn), shift_insn, &dummy) >= 0)
 126         shift_cost[m] = rtx_cost (SET_SRC (PATTERN (shift_insn)), SET);
 127
 128       XEXP (XEXP (SET_SRC (PATTERN (shiftadd_insn)), 0), 1)
 129         = GEN_INT ((HOST_WIDE_INT) 1 << m);
 130       if (recog (PATTERN (shiftadd_insn), shiftadd_insn, &dummy) >= 0)
 131         shiftadd_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftadd_insn)), SET);
 132
 133       XEXP (XEXP (SET_SRC (PATTERN (shiftsub_insn)), 0), 1)
 134         = GEN_INT ((HOST_WIDE_INT) 1 << m);
 135       if (recog (PATTERN (shiftsub_insn), shiftsub_insn, &dummy) >= 0)
 136         shiftsub_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftsub_insn)), SET);
 137     }
 138
 139   negate_cost = rtx_cost (gen_rtx (NEG, word_mode, reg), SET);
 140
 141   sdiv_pow2_cheap
 142     = (rtx_cost (gen_rtx (DIV, word_mode, reg, GEN_INT (32)), SET)
 143        <= 2 * add_cost);
 144   smod_pow2_cheap
 145     = (rtx_cost (gen_rtx (MOD, word_mode, reg, GEN_INT (32)), SET)
 146        <= 2 * add_cost);
 147
 148   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 149        mode != VOIDmode;
 150        mode = GET_MODE_WIDER_MODE (mode))
 151     {
 152       reg = gen_rtx (REG, mode, 10000);
 153       div_cost[(int) mode] = rtx_cost (gen_rtx (UDIV, mode, reg, reg), SET);
 154       mul_cost[(int) mode] = rtx_cost (gen_rtx (MULT, mode, reg, reg), SET);
 155       wider_mode = GET_MODE_WIDER_MODE (mode);
 156       if (wider_mode != VOIDmode)
 157         {
 158           mul_widen_cost[(int) wider_mode]
 159             = rtx_cost (gen_rtx (MULT, wider_mode,
 160                                  gen_rtx (ZERO_EXTEND, wider_mode, reg),
 161                                  gen_rtx (ZERO_EXTEND, wider_mode, reg)),
 162                         SET);
 163           mul_highpart_cost[(int) mode]
 164             = rtx_cost (gen_rtx (TRUNCATE, mode,
 165                                  gen_rtx (LSHIFTRT, wider_mode,
 166                                           gen_rtx (MULT, wider_mode,
 167                                                    gen_rtx (ZERO_EXTEND, wider_mode, reg),
 168                                                    gen_rtx (ZERO_EXTEND, wider_mode, reg)),
 169                                           GEN_INT (GET_MODE_BITSIZE (mode)))),
 170                         SET);
 171         }
 172     }
 173
 174   /* Free the objects we just allocated.  */
 175   end_sequence ();
 176   obfree (free_point);
 177 }
 178
 179 /* Return an rtx representing minus the value of X.
 180    MODE is the intended mode of the result,
 181    useful if X is a CONST_INT.  */
 182
 183 rtx
 184 negate_rtx (mode, x)
 185      enum machine_mode mode;
 186      rtx x;
 187 {
 188   if (GET_CODE (x) == CONST_INT)
 189     {
 190       HOST_WIDE_INT val = - INTVAL (x);
 191       if (GET_MODE_BITSIZE (mode) < HOST_BITS_PER_WIDE_INT)
 192         {
 193           /* Sign extend the value from the bits that are significant.  */
 194           if (val & ((HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
 195             val |= (HOST_WIDE_INT) (-1) << GET_MODE_BITSIZE (mode);
 196           else
 197             val &= ((HOST_WIDE_INT) 1 << GET_MODE_BITSIZE (mode)) - 1;
 198         }
 199       return GEN_INT (val);
 200     }
 201   else
 202     return expand_unop (GET_MODE (x), neg_optab, x, NULL_RTX, 0);
 203 }
 204 \f
 205 /* Generate code to store value from rtx VALUE
 206    into a bit-field within structure STR_RTX
 207    containing BITSIZE bits starting at bit BITNUM.
 208    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
 209    ALIGN is the alignment that STR_RTX is known to have, measured in bytes.
 210    TOTAL_SIZE is the size of the structure in bytes, or -1 if varying.  */
 211
 212 /* ??? Note that there are two different ideas here for how
 213    to determine the size to count bits within, for a register.
 214    One is BITS_PER_WORD, and the other is the size of operand 3
 215    of the insv pattern.  (The latter assumes that an n-bit machine
 216    will be able to insert bit fields up to n bits wide.)
 217    It isn't certain that either of these is right.
 218    extract_bit_field has the same quandary.  */
 219
 220 rtx
 221 store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, align, total_size)
 222      rtx str_rtx;
 223      register int bitsize;
 224      int bitnum;
 225      enum machine_mode fieldmode;
 226      rtx value;
 227      int align;
 228      int total_size;
 229 {
 230   int unit = (GET_CODE (str_rtx) == MEM) ? BITS_PER_UNIT : BITS_PER_WORD;
 231   register int offset = bitnum / unit;
 232   register int bitpos = bitnum % unit;
 233   register rtx op0 = str_rtx;
 234
 235   if (GET_CODE (str_rtx) == MEM && ! MEM_IN_STRUCT_P (str_rtx))
 236     abort ();
 237
 238   /* Discount the part of the structure before the desired byte.
 239      We need to know how many bytes are safe to reference after it.  */
 240   if (total_size >= 0)
 241     total_size -= (bitpos / BIGGEST_ALIGNMENT
 242                    * (BIGGEST_ALIGNMENT / BITS_PER_UNIT));
 243
 244   while (GET_CODE (op0) == SUBREG)
 245     {
 246       /* The following line once was done only if WORDS_BIG_ENDIAN,
 247          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 248          meaningful at a much higher level; when structures are copied
 249          between memory and regs, the higher-numbered regs
 250          always get higher addresses.  */
 251       offset += SUBREG_WORD (op0);
 252       /* We used to adjust BITPOS here, but now we do the whole adjustment
 253          right after the loop.  */
 254       op0 = SUBREG_REG (op0);
 255     }
 256
 257   /* If OP0 is a register, BITPOS must count within a word.
 258      But as we have it, it counts within whatever size OP0 now has.
 259      On a bigendian machine, these are not the same, so convert.  */
 260   if (BYTES_BIG_ENDIAN
 261       && GET_CODE (op0) != MEM
 262       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 263     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 264
 265   value = protect_from_queue (value, 0);
 266
 267   if (flag_force_mem)
 268     value = force_not_mem (value);
 269
 270   /* Note that the adjustment of BITPOS above has no effect on whether
 271      BITPOS is 0 in a REG bigger than a word.  */
 272   if (GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 273       && (GET_CODE (op0) != MEM
 274           || ! SLOW_UNALIGNED_ACCESS
 275           || (offset * BITS_PER_UNIT % bitsize == 0
 276               && align % GET_MODE_SIZE (fieldmode) == 0))
 277       && bitpos == 0 && bitsize == GET_MODE_BITSIZE (fieldmode))
 278     {
 279       /* Storing in a full-word or multi-word field in a register
 280          can be done with just SUBREG.  */
 281       if (GET_MODE (op0) != fieldmode)
 282         {
 283           if (GET_CODE (op0) == REG)
 284             op0 = gen_rtx (SUBREG, fieldmode, op0, offset);
 285           else
 286             op0 = change_address (op0, fieldmode,
 287                                   plus_constant (XEXP (op0, 0), offset));
 288         }
 289       emit_move_insn (op0, value);
 290       return value;
 291     }
 292
 293   /* Storing an lsb-aligned field in a register
 294      can be done with a movestrict instruction.  */
 295
 296   if (GET_CODE (op0) != MEM
 297       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 298       && bitsize == GET_MODE_BITSIZE (fieldmode)
 299       && (GET_MODE (op0) == fieldmode
 300           || (movstrict_optab->handlers[(int) fieldmode].insn_code
 301               != CODE_FOR_nothing)))
 302     {
 303       /* Get appropriate low part of the value being stored.  */
 304       if (GET_CODE (value) == CONST_INT || GET_CODE (value) == REG)
 305         value = gen_lowpart (fieldmode, value);
 306       else if (!(GET_CODE (value) == SYMBOL_REF
 307                  || GET_CODE (value) == LABEL_REF
 308                  || GET_CODE (value) == CONST))
 309         value = convert_to_mode (fieldmode, value, 0);
 310
 311       if (GET_MODE (op0) == fieldmode)
 312         emit_move_insn (op0, value);
 313       else
 314         {
 315           int icode = movstrict_optab->handlers[(int) fieldmode].insn_code;
 316           if(! (*insn_operand_predicate[icode][1]) (value, fieldmode))
 317             value = copy_to_mode_reg (fieldmode, value);
 318           emit_insn (GEN_FCN (icode)
 319                    (gen_rtx (SUBREG, fieldmode, op0, offset), value));
 320         }
 321       return value;
 322     }
 323
 324   /* Handle fields bigger than a word.  */
 325
 326   if (bitsize > BITS_PER_WORD)
 327     {
 328       /* Here we transfer the words of the field
 329          in the order least significant first.
 330          This is because the most significant word is the one which may
 331          be less than full.
 332          However, only do that if the value is not BLKmode.  */
 333
 334       int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 335
 336       int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 337       int i;
 338
 339       /* This is the mode we must force value to, so that there will be enough
 340          subwords to extract.  Note that fieldmode will often (always?) be
 341          VOIDmode, because that is what store_field uses to indicate that this
 342          is a bit field, but passing VOIDmode to operand_subword_force will
 343          result in an abort.  */
 344       fieldmode = mode_for_size (nwords * BITS_PER_WORD, MODE_INT, 0);
 345
 346       for (i = 0; i < nwords; i++)
 347         {
 348           /* If I is 0, use the low-order word in both field and target;
 349              if I is 1, use the next to lowest word; and so on.  */
 350           int wordnum = (backwards ? nwords - i - 1 : i);
 351           int bit_offset = (backwards
 352                             ? MAX (bitsize - (i + 1) * BITS_PER_WORD, 0)
 353                             : i * BITS_PER_WORD);
 354           store_bit_field (op0, MIN (BITS_PER_WORD,
 355                                      bitsize - i * BITS_PER_WORD),
 356                            bitnum + bit_offset, word_mode,
 357                            operand_subword_force (value, wordnum,
 358                                                   (GET_MODE (value) == VOIDmode
 359                                                    ? fieldmode
 360                                                    : GET_MODE (value))),
 361                            align, total_size);
 362         }
 363       return value;
 364     }
 365
 366   /* From here on we can assume that the field to be stored in is
 367      a full-word (whatever type that is), since it is shorter than a word.  */
 368
 369   /* OFFSET is the number of words or bytes (UNIT says which)
 370      from STR_RTX to the first word or byte containing part of the field.  */
 371
 372   if (GET_CODE (op0) == REG)
 373     {
 374       if (offset != 0
 375           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 376         op0 = gen_rtx (SUBREG, TYPE_MODE (type_for_size (BITS_PER_WORD, 0)),
 377                        op0, offset);
 378       offset = 0;
 379     }
 380   else
 381     {
 382       op0 = protect_from_queue (op0, 1);
 383     }
 384
 385   /* If VALUE is a floating-point mode, access it as an integer of the
 386      corresponding size.  This can occur on a machine with 64 bit registers
 387      that uses SFmode for float.  This can also occur for unaligned float
 388      structure fields.  */
 389   if (GET_MODE_CLASS (GET_MODE (value)) == MODE_FLOAT)
 390     {
 391       if (GET_CODE (value) != REG)
 392         value = copy_to_reg (value);
 393       value = gen_rtx (SUBREG, word_mode, value, 0);
 394     }
 395
 396   /* Now OFFSET is nonzero only if OP0 is memory
 397      and is therefore always measured in bytes.  */
 398
 399 #ifdef HAVE_insv
 400   if (HAVE_insv
 401       && GET_MODE (value) != BLKmode
 402       && !(bitsize == 1 && GET_CODE (value) == CONST_INT)
 403       /* Ensure insv's size is wide enough for this field.  */
 404       && (GET_MODE_BITSIZE (insn_operand_mode[(int) CODE_FOR_insv][3])
 405           >= bitsize)
 406       && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 407             && (bitsize + bitpos
 408                 > GET_MODE_BITSIZE (insn_operand_mode[(int) CODE_FOR_insv][3]))))
 409     {
 410       int xbitpos = bitpos;
 411       rtx value1;
 412       rtx xop0 = op0;
 413       rtx last = get_last_insn ();
 414       rtx pat;
 415       enum machine_mode maxmode
 416         = insn_operand_mode[(int) CODE_FOR_insv][3];
 417
 418       int save_volatile_ok = volatile_ok;
 419       volatile_ok = 1;
 420
 421       /* If this machine's insv can only insert into a register, or if we
 422          are to force MEMs into a register, copy OP0 into a register and
 423          save it back later.  */
 424       if (GET_CODE (op0) == MEM
 425           && (flag_force_mem
 426               || ! ((*insn_operand_predicate[(int) CODE_FOR_insv][0])
 427                     (op0, VOIDmode))))
 428         {
 429           rtx tempreg;
 430           enum machine_mode bestmode;
 431
 432           /* Get the mode to use for inserting into this field.  If OP0 is
 433              BLKmode, get the smallest mode consistent with the alignment. If
 434              OP0 is a non-BLKmode object that is no wider than MAXMODE, use its
 435              mode. Otherwise, use the smallest mode containing the field.  */
 436
 437           if (GET_MODE (op0) == BLKmode
 438               || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (maxmode))
 439             bestmode
 440               = get_best_mode (bitsize, bitnum, align * BITS_PER_UNIT, maxmode,
 441                                MEM_VOLATILE_P (op0));
 442           else
 443             bestmode = GET_MODE (op0);
 444
 445           if (bestmode == VOIDmode
 446               || (SLOW_UNALIGNED_ACCESS && GET_MODE_SIZE (bestmode) > align))
 447             goto insv_loses;
 448
 449           /* Adjust address to point to the containing unit of that mode.  */
 450           unit = GET_MODE_BITSIZE (bestmode);
 451           /* Compute offset as multiple of this unit, counting in bytes.  */
 452           offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 453           bitpos = bitnum % unit;
 454           op0 = change_address (op0, bestmode,
 455                                 plus_constant (XEXP (op0, 0), offset));
 456
 457           /* Fetch that unit, store the bitfield in it, then store the unit.  */
 458           tempreg = copy_to_reg (op0);
 459           store_bit_field (tempreg, bitsize, bitpos, fieldmode, value,
 460                            align, total_size);
 461           emit_move_insn (op0, tempreg);
 462           return value;
 463         }
 464       volatile_ok = save_volatile_ok;
 465
 466       /* Add OFFSET into OP0's address.  */
 467       if (GET_CODE (xop0) == MEM)
 468         xop0 = change_address (xop0, byte_mode,
 469                                plus_constant (XEXP (xop0, 0), offset));
 470
 471       /* If xop0 is a register, we need it in MAXMODE
 472          to make it acceptable to the format of insv.  */
 473       if (GET_CODE (xop0) == SUBREG)
 474         /* We can't just change the mode, because this might clobber op0,
 475            and we will need the original value of op0 if insv fails.  */
 476         xop0 = gen_rtx (SUBREG, maxmode, SUBREG_REG (xop0), SUBREG_WORD (xop0));
 477       if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
 478         xop0 = gen_rtx (SUBREG, maxmode, xop0, 0);
 479
 480       /* On big-endian machines, we count bits from the most significant.
 481          If the bit field insn does not, we must invert.  */
 482
 483       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 484         xbitpos = unit - bitsize - xbitpos;
 485
 486       /* We have been counting XBITPOS within UNIT.
 487          Count instead within the size of the register.  */
 488       if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
 489         xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
 490
 491       unit = GET_MODE_BITSIZE (maxmode);
 492
 493       /* Convert VALUE to maxmode (which insv insn wants) in VALUE1.  */
 494       value1 = value;
 495       if (GET_MODE (value) != maxmode)
 496         {
 497           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 498             {
 499               /* Optimization: Don't bother really extending VALUE
 500                  if it has all the bits we will actually use.  However,
 501                  if we must narrow it, be sure we do it correctly.  */
 502
 503               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (maxmode))
 504                 {
 505                   /* Avoid making subreg of a subreg, or of a mem.  */
 506                   if (GET_CODE (value1) != REG)
 507                 value1 = copy_to_reg (value1);
 508                   value1 = gen_rtx (SUBREG, maxmode, value1, 0);
 509                 }
 510               else
 511                 value1 = gen_lowpart (maxmode, value1);
 512             }
 513           else if (!CONSTANT_P (value))
 514             /* Parse phase is supposed to make VALUE's data type
 515                match that of the component reference, which is a type
 516                at least as wide as the field; so VALUE should have
 517                a mode that corresponds to that type.  */
 518             abort ();
 519         }
 520
 521       /* If this machine's insv insists on a register,
 522          get VALUE1 into a register.  */
 523       if (! ((*insn_operand_predicate[(int) CODE_FOR_insv][3])
 524              (value1, maxmode)))
 525         value1 = force_reg (maxmode, value1);
 526
 527       pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
 528       if (pat)
 529         emit_insn (pat);
 530       else
 531         {
 532           delete_insns_since (last);
 533           store_fixed_bit_field (op0, offset, bitsize, bitpos, value, align);
 534         }
 535     }
 536   else
 537     insv_loses:
 538 #endif
 539     /* Insv is not available; store using shifts and boolean ops.  */
 540     store_fixed_bit_field (op0, offset, bitsize, bitpos, value, align);
 541   return value;
 542 }
 543 \f
 544 /* Use shifts and boolean operations to store VALUE
 545    into a bit field of width BITSIZE
 546    in a memory location specified by OP0 except offset by OFFSET bytes.
 547      (OFFSET must be 0 if OP0 is a register.)
 548    The field starts at position BITPOS within the byte.
 549     (If OP0 is a register, it may be a full word or a narrower mode,
 550      but BITPOS still counts within a full word,
 551      which is significant on bigendian machines.)
 552    STRUCT_ALIGN is the alignment the structure is known to have (in bytes).
 553
 554    Note that protect_from_queue has already been done on OP0 and VALUE.  */
 555
 556 static void
 557 store_fixed_bit_field (op0, offset, bitsize, bitpos, value, struct_align)
 558      register rtx op0;
 559      register int offset, bitsize, bitpos;
 560      register rtx value;
 561      int struct_align;
 562 {
 563   register enum machine_mode mode;
 564   int total_bits = BITS_PER_WORD;
 565   rtx subtarget, temp;
 566   int all_zero = 0;
 567   int all_one = 0;
 568
 569   /* There is a case not handled here:
 570      a structure with a known alignment of just a halfword
 571      and a field split across two aligned halfwords within the structure.
 572      Or likewise a structure with a known alignment of just a byte
 573      and a field split across two bytes.
 574      Such cases are not supposed to be able to occur.  */
 575
 576   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 577     {
 578       if (offset != 0)
 579         abort ();
 580       /* Special treatment for a bit field split across two registers.  */
 581       if (bitsize + bitpos > BITS_PER_WORD)
 582         {
 583           store_split_bit_field (op0, bitsize, bitpos,
 584                                  value, BITS_PER_WORD);
 585           return;
 586         }
 587     }
 588   else
 589     {
 590       /* Get the proper mode to use for this field.  We want a mode that
 591          includes the entire field.  If such a mode would be larger than
 592          a word, we won't be doing the extraction the normal way.  */
 593
 594       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 595                             struct_align * BITS_PER_UNIT, word_mode,
 596                             GET_CODE (op0) == MEM && MEM_VOLATILE_P (op0));
 597
 598       if (mode == VOIDmode)
 599         {
 600           /* The only way this should occur is if the field spans word
 601              boundaries.  */
 602           store_split_bit_field (op0,
 603                                  bitsize, bitpos + offset * BITS_PER_UNIT,
 604                                  value, struct_align);
 605           return;
 606         }
 607
 608       total_bits = GET_MODE_BITSIZE (mode);
 609
 610       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 611          be be in the range 0 to total_bits-1, and put any excess bytes in
 612          OFFSET.  */
 613       if (bitpos >= total_bits)
 614         {
 615           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 616           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 617                      * BITS_PER_UNIT);
 618         }
 619
 620       /* Get ref to an aligned byte, halfword, or word containing the field.
 621          Adjust BITPOS to be position within a word,
 622          and OFFSET to be the offset of that word.
 623          Then alter OP0 to refer to that word.  */
 624       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 625       offset -= (offset % (total_bits / BITS_PER_UNIT));
 626       op0 = change_address (op0, mode,
 627                             plus_constant (XEXP (op0, 0), offset));
 628     }
 629
 630   mode = GET_MODE (op0);
 631
 632   /* Now MODE is either some integral mode for a MEM as OP0,
 633      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 634      The bit field is contained entirely within OP0.
 635      BITPOS is the starting bit number within OP0.
 636      (OP0's mode may actually be narrower than MODE.)  */
 637
 638   if (BYTES_BIG_ENDIAN)
 639       /* BITPOS is the distance between our msb
 640          and that of the containing datum.
 641          Convert it to the distance from the lsb.  */
 642       bitpos = total_bits - bitsize - bitpos;
 643
 644   /* Now BITPOS is always the distance between our lsb
 645      and that of OP0.  */
 646
 647   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 648      we must first convert its mode to MODE.  */
 649
 650   if (GET_CODE (value) == CONST_INT)
 651     {
 652       register HOST_WIDE_INT v = INTVAL (value);
 653
 654       if (bitsize < HOST_BITS_PER_WIDE_INT)
 655         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 656
 657       if (v == 0)
 658         all_zero = 1;
 659       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 660                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 661                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 662         all_one = 1;
 663
 664       value = lshift_value (mode, value, bitpos, bitsize);
 665     }
 666   else
 667     {
 668       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 669                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 670
 671       if (GET_MODE (value) != mode)
 672         {
 673           if ((GET_CODE (value) == REG || GET_CODE (value) == SUBREG)
 674               && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
 675             value = gen_lowpart (mode, value);
 676           else
 677             value = convert_to_mode (mode, value, 1);
 678         }
 679
 680       if (must_and)
 681         value = expand_binop (mode, and_optab, value,
 682                               mask_rtx (mode, 0, bitsize, 0),
 683                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
 684       if (bitpos > 0)
 685         value = expand_shift (LSHIFT_EXPR, mode, value,
 686                               build_int_2 (bitpos, 0), NULL_RTX, 1);
 687     }
 688
 689   /* Now clear the chosen bits in OP0,
 690      except that if VALUE is -1 we need not bother.  */
 691
 692   subtarget = (GET_CODE (op0) == REG || ! flag_force_mem) ? op0 : 0;
 693
 694   if (! all_one)
 695     {
 696       temp = expand_binop (mode, and_optab, op0,
 697                            mask_rtx (mode, bitpos, bitsize, 1),
 698                            subtarget, 1, OPTAB_LIB_WIDEN);
 699       subtarget = temp;
 700     }
 701   else
 702     temp = op0;
 703
 704   /* Now logical-or VALUE into OP0, unless it is zero.  */
 705
 706   if (! all_zero)
 707     temp = expand_binop (mode, ior_optab, temp, value,
 708                          subtarget, 1, OPTAB_LIB_WIDEN);
 709   if (op0 != temp)
 710     emit_move_insn (op0, temp);
 711 }
 712 \f
 713 /* Store a bit field that is split across multiple accessible memory objects.
 714
 715    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
 716    BITSIZE is the field width; BITPOS the position of its first bit
 717    (within the word).
 718    VALUE is the value to store.
 719    ALIGN is the known alignment of OP0, measured in bytes.
 720    This is also the size of the memory objects to be used.
 721
 722    This does not yet handle fields wider than BITS_PER_WORD.  */
 723
 724 static void
 725 store_split_bit_field (op0, bitsize, bitpos, value, align)
 726      rtx op0;
 727      int bitsize, bitpos;
 728      rtx value;
 729      int align;
 730 {
 731   int unit;
 732   int bitsdone = 0;
 733
 734   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
 735      much at a time.  */
 736   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 737     unit = BITS_PER_WORD;
 738   else
 739     unit = MIN (align * BITS_PER_UNIT, BITS_PER_WORD);
 740
 741   /* If VALUE is a constant other than a CONST_INT, get it into a register in
 742      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
 743      that VALUE might be a floating-point constant.  */
 744   if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
 745     {
 746       rtx word = gen_lowpart_common (word_mode, value);
 747
 748       if (word && (value != word))
 749         value = word;
 750       else
 751         value = gen_lowpart_common (word_mode,
 752                                     force_reg (GET_MODE (value) != VOIDmode
 753                                                ? GET_MODE (value)
 754                                                : word_mode, value));
 755     }
 756
 757   while (bitsdone < bitsize)
 758     {
 759       int thissize;
 760       rtx part, word;
 761       int thispos;
 762       int offset;
 763
 764       offset = (bitpos + bitsdone) / unit;
 765       thispos = (bitpos + bitsdone) % unit;
 766
 767       /* THISSIZE must not overrun a word boundary.  Otherwise,
 768          store_fixed_bit_field will call us again, and we will mutually
 769          recurse forever.  */
 770       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
 771       thissize = MIN (thissize, unit - thispos);
 772
 773       if (BYTES_BIG_ENDIAN)
 774         {
 775           int total_bits;
 776
 777           /* We must do an endian conversion exactly the same way as it is
 778              done in extract_bit_field, so that the two calls to
 779              extract_fixed_bit_field will have comparable arguments.  */
 780           if (GET_CODE (value) != MEM || GET_MODE (value) == BLKmode)
 781             total_bits = BITS_PER_WORD;
 782           else
 783             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
 784
 785           /* Fetch successively less significant portions.  */
 786           if (GET_CODE (value) == CONST_INT)
 787             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 788                              >> (bitsize - bitsdone - thissize))
 789                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
 790           else
 791             /* The args are chosen so that the last part includes the
 792                lsb.  Give extract_bit_field the value it needs (with
 793                endianness compensation) to fetch the piece we want.  */
 794             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
 795                                             total_bits - bitsize + bitsdone,
 796                                             NULL_RTX, 1, align);
 797         }
 798       else
 799         {
 800           /* Fetch successively more significant portions.  */
 801           if (GET_CODE (value) == CONST_INT)
 802             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 803                              >> bitsdone)
 804                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
 805           else
 806             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
 807                                             bitsdone, NULL_RTX, 1, align);
 808         }
 809
 810       /* If OP0 is a register, then handle OFFSET here.
 811
 812          When handling multiword bitfields, extract_bit_field may pass
 813          down a word_mode SUBREG of a larger REG for a bitfield that actually
 814          crosses a word boundary.  Thus, for a SUBREG, we must find
 815          the current word starting from the base register.  */
 816       if (GET_CODE (op0) == SUBREG)
 817         {
 818           word = operand_subword_force (SUBREG_REG (op0),
 819                                         SUBREG_WORD (op0) + offset,
 820                                         GET_MODE (SUBREG_REG (op0)));
 821           offset = 0;
 822         }
 823       else if (GET_CODE (op0) == REG)
 824         {
 825           word = operand_subword_force (op0, offset, GET_MODE (op0));
 826           offset = 0;
 827         }
 828       else
 829         word = op0;
 830
 831       /* OFFSET is in UNITs, and UNIT is in bits.
 832          store_fixed_bit_field wants offset in bytes.  */
 833       store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT,
 834                              thissize, thispos, part, align);
 835       bitsdone += thissize;
 836     }
 837 }
 838 \f
 839 /* Generate code to extract a byte-field from STR_RTX
 840    containing BITSIZE bits, starting at BITNUM,
 841    and put it in TARGET if possible (if TARGET is nonzero).
 842    Regardless of TARGET, we return the rtx for where the value is placed.
 843    It may be a QUEUED.
 844
 845    STR_RTX is the structure containing the byte (a REG or MEM).
 846    UNSIGNEDP is nonzero if this is an unsigned bit field.
 847    MODE is the natural mode of the field value once extracted.
 848    TMODE is the mode the caller would like the value to have;
 849    but the value may be returned with type MODE instead.
 850
 851    ALIGN is the alignment that STR_RTX is known to have, measured in bytes.
 852    TOTAL_SIZE is the size in bytes of the containing structure,
 853    or -1 if varying.
 854
 855    If a TARGET is specified and we can store in it at no extra cost,
 856    we do so, and return TARGET.
 857    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
 858    if they are equally easy.  */
 859
 860 rtx
 861 extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
 862                    target, mode, tmode, align, total_size)
 863      rtx str_rtx;
 864      register int bitsize;
 865      int bitnum;
 866      int unsignedp;
 867      rtx target;
 868      enum machine_mode mode, tmode;
 869      int align;
 870      int total_size;
 871 {
 872   int unit = (GET_CODE (str_rtx) == MEM) ? BITS_PER_UNIT : BITS_PER_WORD;
 873   register int offset = bitnum / unit;
 874   register int bitpos = bitnum % unit;
 875   register rtx op0 = str_rtx;
 876   rtx spec_target = target;
 877   rtx spec_target_subreg = 0;
 878
 879   /* Discount the part of the structure before the desired byte.
 880      We need to know how many bytes are safe to reference after it.  */
 881   if (total_size >= 0)
 882     total_size -= (bitpos / BIGGEST_ALIGNMENT
 883                    * (BIGGEST_ALIGNMENT / BITS_PER_UNIT));
 884
 885   if (tmode == VOIDmode)
 886     tmode = mode;
 887   while (GET_CODE (op0) == SUBREG)
 888     {
 889       int outer_size = GET_MODE_BITSIZE (GET_MODE (op0));
 890       int inner_size = GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op0)));
 891
 892       offset += SUBREG_WORD (op0);
 893
 894       if (BYTES_BIG_ENDIAN && (outer_size < inner_size))
 895         {
 896           bitpos += inner_size - outer_size;
 897           if (bitpos > unit)
 898             {
 899               offset += (bitpos / unit);
 900               bitpos %= unit;
 901             }
 902         }
 903
 904       op0 = SUBREG_REG (op0);
 905     }
 906
 907   /* ??? We currently assume TARGET is at least as big as BITSIZE.
 908      If that's wrong, the solution is to test for it and set TARGET to 0
 909      if needed.  */
 910
 911   /* If OP0 is a register, BITPOS must count within a word.
 912      But as we have it, it counts within whatever size OP0 now has.
 913      On a bigendian machine, these are not the same, so convert.  */
 914   if (BYTES_BIG_ENDIAN &&
 915       GET_CODE (op0) != MEM
 916       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 917     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 918
 919   /* Extracting a full-word or multi-word value
 920      from a structure in a register or aligned memory.
 921      This can be done with just SUBREG.
 922      So too extracting a subword value in
 923      the least significant part of the register.  */
 924
 925   if ((GET_CODE (op0) == REG
 926        || (GET_CODE (op0) == MEM
 927            && (! SLOW_UNALIGNED_ACCESS
 928                || (offset * BITS_PER_UNIT % bitsize == 0
 929                    && align * BITS_PER_UNIT % bitsize == 0))))
 930       && ((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
 931            && bitpos % BITS_PER_WORD == 0)
 932           || (mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0) != BLKmode
 933               && (BYTES_BIG_ENDIAN
 934                   ? bitpos + bitsize == BITS_PER_WORD
 935                   : bitpos == 0))))
 936     {
 937       enum machine_mode mode1
 938         = mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0);
 939
 940       if (mode1 != GET_MODE (op0))
 941         {
 942           if (GET_CODE (op0) == REG)
 943             op0 = gen_rtx (SUBREG, mode1, op0, offset);
 944           else
 945             op0 = change_address (op0, mode1,
 946                                   plus_constant (XEXP (op0, 0), offset));
 947         }
 948       if (mode1 != mode)
 949         return convert_to_mode (tmode, op0, unsignedp);
 950       return op0;
 951     }
 952
 953   /* Handle fields bigger than a word.  */
 954
 955   if (bitsize > BITS_PER_WORD)
 956     {
 957       /* Here we transfer the words of the field
 958          in the order least significant first.
 959          This is because the most significant word is the one which may
 960          be less than full.  */
 961
 962       int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 963       int i;
 964
 965       if (target == 0 || GET_CODE (target) != REG)
 966         target = gen_reg_rtx (mode);
 967
 968       /* Indicate for flow that the entire target reg is being set.  */
 969       emit_insn (gen_rtx (CLOBBER, VOIDmode, target));
 970
 971       for (i = 0; i < nwords; i++)
 972         {
 973           /* If I is 0, use the low-order word in both field and target;
 974              if I is 1, use the next to lowest word; and so on.  */
 975           /* Word number in TARGET to use.  */
 976           int wordnum = (WORDS_BIG_ENDIAN
 977                          ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
 978                          : i);
 979           /* Offset from start of field in OP0.  */
 980           int bit_offset = (WORDS_BIG_ENDIAN
 981                             ? MAX (0, bitsize - (i + 1) * BITS_PER_WORD)
 982                             : i * BITS_PER_WORD);
 983           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
 984           rtx result_part
 985             = extract_bit_field (op0, MIN (BITS_PER_WORD,
 986                                            bitsize - i * BITS_PER_WORD),
 987                                  bitnum + bit_offset,
 988                                  1, target_part, mode, word_mode,
 989                                  align, total_size);
 990
 991           if (target_part == 0)
 992             abort ();
 993
 994           if (result_part != target_part)
 995             emit_move_insn (target_part, result_part);
 996         }
 997
 998       if (unsignedp)
 999         {
1000           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1001              need to be zero'd out.  */
1002           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1003             {
1004               int i,total_words;
1005
1006               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1007               for (i = nwords; i < total_words; i++)
1008                 {
1009                   int wordnum = WORDS_BIG_ENDIAN ? total_words - i - 1 : i;
1010                   rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1011                   emit_move_insn (target_part, const0_rtx);
1012                 }
1013             }
1014           return target;
1015         }
1016
1017       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1018       target = expand_shift (LSHIFT_EXPR, mode, target,
1019                              build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1020                              NULL_RTX, 0);
1021       return expand_shift (RSHIFT_EXPR, mode, target,
1022                            build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1023                            NULL_RTX, 0);
1024     }
1025
1026   /* From here on we know the desired field is smaller than a word
1027      so we can assume it is an integer.  So we can safely extract it as one
1028      size of integer, if necessary, and then truncate or extend
1029      to the size that is wanted.  */
1030
1031   /* OFFSET is the number of words or bytes (UNIT says which)
1032      from STR_RTX to the first word or byte containing part of the field.  */
1033
1034   if (GET_CODE (op0) == REG)
1035     {
1036       if (offset != 0
1037           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1038         op0 = gen_rtx (SUBREG, TYPE_MODE (type_for_size (BITS_PER_WORD, 0)),
1039                        op0, offset);
1040       offset = 0;
1041     }
1042   else
1043     {
1044       op0 = protect_from_queue (str_rtx, 1);
1045     }
1046
1047   /* Now OFFSET is nonzero only for memory operands.  */
1048
1049   if (unsignedp)
1050     {
1051 #ifdef HAVE_extzv
1052       if (HAVE_extzv
1053           && (GET_MODE_BITSIZE (insn_operand_mode[(int) CODE_FOR_extzv][0])
1054               >= bitsize)
1055           && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1056                 && (bitsize + bitpos
1057                     > GET_MODE_BITSIZE (insn_operand_mode[(int) CODE_FOR_extzv][0]))))
1058         {
1059           int xbitpos = bitpos, xoffset = offset;
1060           rtx bitsize_rtx, bitpos_rtx;
1061           rtx last = get_last_insn();
1062           rtx xop0 = op0;
1063           rtx xtarget = target;
1064           rtx xspec_target = spec_target;
1065           rtx xspec_target_subreg = spec_target_subreg;
1066           rtx pat;
1067           enum machine_mode maxmode
1068             = insn_operand_mode[(int) CODE_FOR_extzv][0];
1069
1070           if (GET_CODE (xop0) == MEM)
1071             {
1072               int save_volatile_ok = volatile_ok;
1073               volatile_ok = 1;
1074
1075               /* Is the memory operand acceptable?  */
1076               if (flag_force_mem
1077                   || ! ((*insn_operand_predicate[(int) CODE_FOR_extzv][1])
1078                         (xop0, GET_MODE (xop0))))
1079                 {
1080                   /* No, load into a reg and extract from there.  */
1081                   enum machine_mode bestmode;
1082
1083                   /* Get the mode to use for inserting into this field.  If
1084                      OP0 is BLKmode, get the smallest mode consistent with the
1085                      alignment. If OP0 is a non-BLKmode object that is no
1086                      wider than MAXMODE, use its mode. Otherwise, use the
1087                      smallest mode containing the field.  */
1088
1089                   if (GET_MODE (xop0) == BLKmode
1090                       || (GET_MODE_SIZE (GET_MODE (op0))
1091                           > GET_MODE_SIZE (maxmode)))
1092                     bestmode = get_best_mode (bitsize, bitnum,
1093                                               align * BITS_PER_UNIT, maxmode,
1094                                               MEM_VOLATILE_P (xop0));
1095                   else
1096                     bestmode = GET_MODE (xop0);
1097
1098                   if (bestmode == VOIDmode
1099                       || (SLOW_UNALIGNED_ACCESS && GET_MODE_SIZE (bestmode) > align))
1100                     goto extzv_loses;
1101
1102                   /* Compute offset as multiple of this unit,
1103                      counting in bytes.  */
1104                   unit = GET_MODE_BITSIZE (bestmode);
1105                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1106                   xbitpos = bitnum % unit;
1107                   xop0 = change_address (xop0, bestmode,
1108                                          plus_constant (XEXP (xop0, 0),
1109                                                         xoffset));
1110                   /* Fetch it to a register in that size.  */
1111                   xop0 = force_reg (bestmode, xop0);
1112
1113                   /* XBITPOS counts within UNIT, which is what is expected.  */
1114                 }
1115               else
1116                 /* Get ref to first byte containing part of the field.  */
1117                 xop0 = change_address (xop0, byte_mode,
1118                                        plus_constant (XEXP (xop0, 0), xoffset));
1119
1120               volatile_ok = save_volatile_ok;
1121             }
1122
1123           /* If op0 is a register, we need it in MAXMODE (which is usually
1124              SImode). to make it acceptable to the format of extzv.  */
1125           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1126             abort ();
1127           if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
1128             xop0 = gen_rtx (SUBREG, maxmode, xop0, 0);
1129
1130           /* On big-endian machines, we count bits from the most significant.
1131              If the bit field insn does not, we must invert.  */
1132           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1133             xbitpos = unit - bitsize - xbitpos;
1134
1135           /* Now convert from counting within UNIT to counting in MAXMODE.  */
1136           if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
1137             xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
1138
1139           unit = GET_MODE_BITSIZE (maxmode);
1140
1141           if (xtarget == 0
1142               || (flag_force_mem && GET_CODE (xtarget) == MEM))
1143             xtarget = xspec_target = gen_reg_rtx (tmode);
1144
1145           if (GET_MODE (xtarget) != maxmode)
1146             {
1147               if (GET_CODE (xtarget) == REG)
1148                 {
1149                   int wider = (GET_MODE_SIZE (maxmode)
1150                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1151                   xtarget = gen_lowpart (maxmode, xtarget);
1152                   if (wider)
1153                     xspec_target_subreg = xtarget;
1154                 }
1155               else
1156                 xtarget = gen_reg_rtx (maxmode);
1157             }
1158
1159           /* If this machine's extzv insists on a register target,
1160              make sure we have one.  */
1161           if (! ((*insn_operand_predicate[(int) CODE_FOR_extzv][0])
1162                  (xtarget, maxmode)))
1163             xtarget = gen_reg_rtx (maxmode);
1164
1165           bitsize_rtx = GEN_INT (bitsize);
1166           bitpos_rtx = GEN_INT (xbitpos);
1167
1168           pat = gen_extzv (protect_from_queue (xtarget, 1),
1169                            xop0, bitsize_rtx, bitpos_rtx);
1170           if (pat)
1171             {
1172               emit_insn (pat);
1173               target = xtarget;
1174               spec_target = xspec_target;
1175               spec_target_subreg = xspec_target_subreg;
1176             }
1177           else
1178             {
1179               delete_insns_since (last);
1180               target = extract_fixed_bit_field (tmode, op0, offset, bitsize,
1181                                                 bitpos, target, 1, align);
1182             }
1183         }
1184       else
1185         extzv_loses:
1186 #endif
1187         target = extract_fixed_bit_field (tmode, op0, offset, bitsize, bitpos,
1188                                           target, 1, align);
1189     }
1190   else
1191     {
1192 #ifdef HAVE_extv
1193       if (HAVE_extv
1194           && (GET_MODE_BITSIZE (insn_operand_mode[(int) CODE_FOR_extv][0])
1195               >= bitsize)
1196           && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1197                 && (bitsize + bitpos
1198                     > GET_MODE_BITSIZE (insn_operand_mode[(int) CODE_FOR_extv][0]))))
1199         {
1200           int xbitpos = bitpos, xoffset = offset;
1201           rtx bitsize_rtx, bitpos_rtx;
1202           rtx last = get_last_insn();
1203           rtx xop0 = op0, xtarget = target;
1204           rtx xspec_target = spec_target;
1205           rtx xspec_target_subreg = spec_target_subreg;
1206           rtx pat;
1207           enum machine_mode maxmode
1208             = insn_operand_mode[(int) CODE_FOR_extv][0];
1209
1210           if (GET_CODE (xop0) == MEM)
1211             {
1212               /* Is the memory operand acceptable?  */
1213               if (! ((*insn_operand_predicate[(int) CODE_FOR_extv][1])
1214                      (xop0, GET_MODE (xop0))))
1215                 {
1216                   /* No, load into a reg and extract from there.  */
1217                   enum machine_mode bestmode;
1218
1219                   /* Get the mode to use for inserting into this field.  If
1220                      OP0 is BLKmode, get the smallest mode consistent with the
1221                      alignment. If OP0 is a non-BLKmode object that is no
1222                      wider than MAXMODE, use its mode. Otherwise, use the
1223                      smallest mode containing the field.  */
1224
1225                   if (GET_MODE (xop0) == BLKmode
1226                       || (GET_MODE_SIZE (GET_MODE (op0))
1227                           > GET_MODE_SIZE (maxmode)))
1228                     bestmode = get_best_mode (bitsize, bitnum,
1229                                               align * BITS_PER_UNIT, maxmode,
1230                                               MEM_VOLATILE_P (xop0));
1231                   else
1232                     bestmode = GET_MODE (xop0);
1233
1234                   if (bestmode == VOIDmode
1235                       || (SLOW_UNALIGNED_ACCESS && GET_MODE_SIZE (bestmode) > align))
1236                     goto extv_loses;
1237
1238                   /* Compute offset as multiple of this unit,
1239                      counting in bytes.  */
1240                   unit = GET_MODE_BITSIZE (bestmode);
1241                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1242                   xbitpos = bitnum % unit;
1243                   xop0 = change_address (xop0, bestmode,
1244                                          plus_constant (XEXP (xop0, 0),
1245                                                         xoffset));
1246                   /* Fetch it to a register in that size.  */
1247                   xop0 = force_reg (bestmode, xop0);
1248
1249                   /* XBITPOS counts within UNIT, which is what is expected.  */
1250                 }
1251               else
1252                 /* Get ref to first byte containing part of the field.  */
1253                 xop0 = change_address (xop0, byte_mode,
1254                                        plus_constant (XEXP (xop0, 0), xoffset));
1255             }
1256
1257           /* If op0 is a register, we need it in MAXMODE (which is usually
1258              SImode) to make it acceptable to the format of extv.  */
1259           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1260             abort ();
1261           if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
1262             xop0 = gen_rtx (SUBREG, maxmode, xop0, 0);
1263
1264           /* On big-endian machines, we count bits from the most significant.
1265              If the bit field insn does not, we must invert.  */
1266           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1267             xbitpos = unit - bitsize - xbitpos;
1268
1269           /* XBITPOS counts within a size of UNIT.
1270              Adjust to count within a size of MAXMODE.  */
1271           if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
1272             xbitpos += (GET_MODE_BITSIZE (maxmode) - unit);
1273
1274           unit = GET_MODE_BITSIZE (maxmode);
1275
1276           if (xtarget == 0
1277               || (flag_force_mem && GET_CODE (xtarget) == MEM))
1278             xtarget = xspec_target = gen_reg_rtx (tmode);
1279
1280           if (GET_MODE (xtarget) != maxmode)
1281             {
1282               if (GET_CODE (xtarget) == REG)
1283                 {
1284                   int wider = (GET_MODE_SIZE (maxmode)
1285                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1286                   xtarget = gen_lowpart (maxmode, xtarget);
1287                   if (wider)
1288                     xspec_target_subreg = xtarget;
1289                 }
1290               else
1291                 xtarget = gen_reg_rtx (maxmode);
1292             }
1293
1294           /* If this machine's extv insists on a register target,
1295              make sure we have one.  */
1296           if (! ((*insn_operand_predicate[(int) CODE_FOR_extv][0])
1297                  (xtarget, maxmode)))
1298             xtarget = gen_reg_rtx (maxmode);
1299
1300           bitsize_rtx = GEN_INT (bitsize);
1301           bitpos_rtx = GEN_INT (xbitpos);
1302
1303           pat = gen_extv (protect_from_queue (xtarget, 1),
1304                           xop0, bitsize_rtx, bitpos_rtx);
1305           if (pat)
1306             {
1307               emit_insn (pat);
1308               target = xtarget;
1309               spec_target = xspec_target;
1310               spec_target_subreg = xspec_target_subreg;
1311             }
1312           else
1313             {
1314               delete_insns_since (last);
1315               target = extract_fixed_bit_field (tmode, op0, offset, bitsize,
1316                                                 bitpos, target, 0, align);
1317             }
1318         }
1319       else
1320         extv_loses:
1321 #endif
1322         target = extract_fixed_bit_field (tmode, op0, offset, bitsize, bitpos,
1323                                           target, 0, align);
1324     }
1325   if (target == spec_target)
1326     return target;
1327   if (target == spec_target_subreg)
1328     return spec_target;
1329   if (GET_MODE (target) != tmode && GET_MODE (target) != mode)
1330     {
1331       /* If the target mode is floating-point, first convert to the
1332          integer mode of that size and then access it as a floating-point
1333          value via a SUBREG.  */
1334       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1335         {
1336           target = convert_to_mode (mode_for_size (GET_MODE_BITSIZE (tmode),
1337                                                    MODE_INT, 0),
1338                                     target, unsignedp);
1339           if (GET_CODE (target) != REG)
1340             target = copy_to_reg (target);
1341           return gen_rtx (SUBREG, tmode, target, 0);
1342         }
1343       else
1344         return convert_to_mode (tmode, target, unsignedp);
1345     }
1346   return target;
1347 }
1348 \f
1349 /* Extract a bit field using shifts and boolean operations
1350    Returns an rtx to represent the value.
1351    OP0 addresses a register (word) or memory (byte).
1352    BITPOS says which bit within the word or byte the bit field starts in.
1353    OFFSET says how many bytes farther the bit field starts;
1354     it is 0 if OP0 is a register.
1355    BITSIZE says how many bits long the bit field is.
1356     (If OP0 is a register, it may be narrower than a full word,
1357      but BITPOS still counts within a full word,
1358      which is significant on bigendian machines.)
1359
1360    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1361    If TARGET is nonzero, attempts to store the value there
1362    and return TARGET, but this is not guaranteed.
1363    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.
1364
1365    ALIGN is the alignment that STR_RTX is known to have, measured in bytes.  */
1366
1367 static rtx
1368 extract_fixed_bit_field (tmode, op0, offset, bitsize, bitpos,
1369                          target, unsignedp, align)
1370      enum machine_mode tmode;
1371      register rtx op0, target;
1372      register int offset, bitsize, bitpos;
1373      int unsignedp;
1374      int align;
1375 {
1376   int total_bits = BITS_PER_WORD;
1377   enum machine_mode mode;
1378
1379   if (GET_CODE (op0) == SUBREG || GET_CODE (op0) == REG)
1380     {
1381       /* Special treatment for a bit field split across two registers.  */
1382       if (bitsize + bitpos > BITS_PER_WORD)
1383         return extract_split_bit_field (op0, bitsize, bitpos,
1384                                         unsignedp, align);
1385     }
1386   else
1387     {
1388       /* Get the proper mode to use for this field.  We want a mode that
1389          includes the entire field.  If such a mode would be larger than
1390          a word, we won't be doing the extraction the normal way.  */
1391
1392       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1393                             align * BITS_PER_UNIT, word_mode,
1394                             GET_CODE (op0) == MEM && MEM_VOLATILE_P (op0));
1395
1396       if (mode == VOIDmode)
1397         /* The only way this should occur is if the field spans word
1398            boundaries.  */
1399         return extract_split_bit_field (op0, bitsize,
1400                                         bitpos + offset * BITS_PER_UNIT,
1401                                         unsignedp, align);
1402
1403       total_bits = GET_MODE_BITSIZE (mode);
1404
1405       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1406          be be in the range 0 to total_bits-1, and put any excess bytes in
1407          OFFSET.  */
1408       if (bitpos >= total_bits)
1409         {
1410           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1411           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1412                      * BITS_PER_UNIT);
1413         }
1414
1415       /* Get ref to an aligned byte, halfword, or word containing the field.
1416          Adjust BITPOS to be position within a word,
1417          and OFFSET to be the offset of that word.
1418          Then alter OP0 to refer to that word.  */
1419       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1420       offset -= (offset % (total_bits / BITS_PER_UNIT));
1421       op0 = change_address (op0, mode,
1422                             plus_constant (XEXP (op0, 0), offset));
1423     }
1424
1425   mode = GET_MODE (op0);
1426
1427   if (BYTES_BIG_ENDIAN)
1428     {
1429       /* BITPOS is the distance between our msb and that of OP0.
1430          Convert it to the distance from the lsb.  */
1431
1432       bitpos = total_bits - bitsize - bitpos;
1433     }
1434
1435   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1436      We have reduced the big-endian case to the little-endian case.  */
1437
1438   if (unsignedp)
1439     {
1440       if (bitpos)
1441         {
1442           /* If the field does not already start at the lsb,
1443              shift it so it does.  */
1444           tree amount = build_int_2 (bitpos, 0);
1445           /* Maybe propagate the target for the shift.  */
1446           /* But not if we will return it--could confuse integrate.c.  */
1447           rtx subtarget = (target != 0 && GET_CODE (target) == REG
1448                            && !REG_FUNCTION_VALUE_P (target)
1449                            ? target : 0);
1450           if (tmode != mode) subtarget = 0;
1451           op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1452         }
1453       /* Convert the value to the desired mode.  */
1454       if (mode != tmode)
1455         op0 = convert_to_mode (tmode, op0, 1);
1456
1457       /* Unless the msb of the field used to be the msb when we shifted,
1458          mask out the upper bits.  */
1459
1460       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize
1461 #if 0
1462 #ifdef SLOW_ZERO_EXTEND
1463           /* Always generate an `and' if
1464              we just zero-extended op0 and SLOW_ZERO_EXTEND, since it
1465              will combine fruitfully with the zero-extend. */
1466           || tmode != mode
1467 #endif
1468 #endif
1469           )
1470         return expand_binop (GET_MODE (op0), and_optab, op0,
1471                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1472                              target, 1, OPTAB_LIB_WIDEN);
1473       return op0;
1474     }
1475
1476   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1477      then arithmetic-shift its lsb to the lsb of the word.  */
1478   op0 = force_reg (mode, op0);
1479   if (mode != tmode)
1480     target = 0;
1481
1482   /* Find the narrowest integer mode that contains the field.  */
1483
1484   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1485        mode = GET_MODE_WIDER_MODE (mode))
1486     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1487       {
1488         op0 = convert_to_mode (mode, op0, 0);
1489         break;
1490       }
1491
1492   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1493     {
1494       tree amount = build_int_2 (GET_MODE_BITSIZE (mode) - (bitsize + bitpos), 0);
1495       /* Maybe propagate the target for the shift.  */
1496       /* But not if we will return the result--could confuse integrate.c.  */
1497       rtx subtarget = (target != 0 && GET_CODE (target) == REG
1498                        && ! REG_FUNCTION_VALUE_P (target)
1499                        ? target : 0);
1500       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1501     }
1502
1503   return expand_shift (RSHIFT_EXPR, mode, op0,
1504                        build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1505                        target, 0);
1506 }
1507 \f
1508 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1509    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1510    complement of that if COMPLEMENT.  The mask is truncated if
1511    necessary to the width of mode MODE.  The mask is zero-extended if
1512    BITSIZE+BITPOS is too small for MODE.  */
1513
1514 static rtx
1515 mask_rtx (mode, bitpos, bitsize, complement)
1516      enum machine_mode mode;
1517      int bitpos, bitsize, complement;
1518 {
1519   HOST_WIDE_INT masklow, maskhigh;
1520
1521   if (bitpos < HOST_BITS_PER_WIDE_INT)
1522     masklow = (HOST_WIDE_INT) -1 << bitpos;
1523   else
1524     masklow = 0;
1525
1526   if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1527     masklow &= ((unsigned HOST_WIDE_INT) -1
1528                 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1529
1530   if (bitpos <= HOST_BITS_PER_WIDE_INT)
1531     maskhigh = -1;
1532   else
1533     maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1534
1535   if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1536     maskhigh &= ((unsigned HOST_WIDE_INT) -1
1537                  >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1538   else
1539     maskhigh = 0;
1540
1541   if (complement)
1542     {
1543       maskhigh = ~maskhigh;
1544       masklow = ~masklow;
1545     }
1546
1547   return immed_double_const (masklow, maskhigh, mode);
1548 }
1549
1550 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1551    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1552
1553 static rtx
1554 lshift_value (mode, value, bitpos, bitsize)
1555      enum machine_mode mode;
1556      rtx value;
1557      int bitpos, bitsize;
1558 {
1559   unsigned HOST_WIDE_INT v = INTVAL (value);
1560   HOST_WIDE_INT low, high;
1561
1562   if (bitsize < HOST_BITS_PER_WIDE_INT)
1563     v &= ~((HOST_WIDE_INT) -1 << bitsize);
1564
1565   if (bitpos < HOST_BITS_PER_WIDE_INT)
1566     {
1567       low = v << bitpos;
1568       high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1569     }
1570   else
1571     {
1572       low = 0;
1573       high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1574     }
1575
1576   return immed_double_const (low, high, mode);
1577 }
1578 \f
1579 /* Extract a bit field that is split across two words
1580    and return an RTX for the result.
1581
1582    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1583    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1584    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
1585
1586    ALIGN is the known alignment of OP0, measured in bytes.
1587    This is also the size of the memory objects to be used.  */
1588
1589 static rtx
1590 extract_split_bit_field (op0, bitsize, bitpos, unsignedp, align)
1591      rtx op0;
1592      int bitsize, bitpos, unsignedp, align;
1593 {
1594   int unit;
1595   int bitsdone = 0;
1596   rtx result;
1597   int first = 1;
1598
1599   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1600      much at a time.  */
1601   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1602     unit = BITS_PER_WORD;
1603   else
1604     unit = MIN (align * BITS_PER_UNIT, BITS_PER_WORD);
1605
1606   while (bitsdone < bitsize)
1607     {
1608       int thissize;
1609       rtx part, word;
1610       int thispos;
1611       int offset;
1612
1613       offset = (bitpos + bitsdone) / unit;
1614       thispos = (bitpos + bitsdone) % unit;
1615
1616       /* THISSIZE must not overrun a word boundary.  Otherwise,
1617          extract_fixed_bit_field will call us again, and we will mutually
1618          recurse forever.  */
1619       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1620       thissize = MIN (thissize, unit - thispos);
1621
1622       /* If OP0 is a register, then handle OFFSET here.
1623
1624          When handling multiword bitfields, extract_bit_field may pass
1625          down a word_mode SUBREG of a larger REG for a bitfield that actually
1626          crosses a word boundary.  Thus, for a SUBREG, we must find
1627          the current word starting from the base register.  */
1628       if (GET_CODE (op0) == SUBREG)
1629         {
1630           word = operand_subword_force (SUBREG_REG (op0),
1631                                         SUBREG_WORD (op0) + offset,
1632                                         GET_MODE (SUBREG_REG (op0)));
1633           offset = 0;
1634         }
1635       else if (GET_CODE (op0) == REG)
1636         {
1637           word = operand_subword_force (op0, offset, GET_MODE (op0));
1638           offset = 0;
1639         }
1640       else
1641         word = op0;
1642
1643       /* Extract the parts in bit-counting order,
1644          whose meaning is determined by BYTES_PER_UNIT.
1645          OFFSET is in UNITs, and UNIT is in bits.
1646          extract_fixed_bit_field wants offset in bytes.  */
1647       part = extract_fixed_bit_field (word_mode, word,
1648                                       offset * unit / BITS_PER_UNIT,
1649                                       thissize, thispos, 0, 1, align);
1650       bitsdone += thissize;
1651
1652       /* Shift this part into place for the result.  */
1653       if (BYTES_BIG_ENDIAN)
1654         {
1655           if (bitsize != bitsdone)
1656             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1657                                  build_int_2 (bitsize - bitsdone, 0), 0, 1);
1658         }
1659       else
1660         {
1661           if (bitsdone != thissize)
1662             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1663                                  build_int_2 (bitsdone - thissize, 0), 0, 1);
1664         }
1665
1666       if (first)
1667         result = part;
1668       else
1669         /* Combine the parts with bitwise or.  This works
1670            because we extracted each part as an unsigned bit field.  */
1671         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1672                                OPTAB_LIB_WIDEN);
1673
1674       first = 0;
1675     }
1676
1677   /* Unsigned bit field: we are done.  */
1678   if (unsignedp)
1679     return result;
1680   /* Signed bit field: sign-extend with two arithmetic shifts.  */
1681   result = expand_shift (LSHIFT_EXPR, word_mode, result,
1682                          build_int_2 (BITS_PER_WORD - bitsize, 0),
1683                          NULL_RTX, 0);
1684   return expand_shift (RSHIFT_EXPR, word_mode, result,
1685                        build_int_2 (BITS_PER_WORD - bitsize, 0), NULL_RTX, 0);
1686 }
1687 \f
1688 /* Add INC into TARGET.  */
1689
1690 void
1691 expand_inc (target, inc)
1692      rtx target, inc;
1693 {
1694   rtx value = expand_binop (GET_MODE (target), add_optab,
1695                             target, inc,
1696                             target, 0, OPTAB_LIB_WIDEN);
1697   if (value != target)
1698     emit_move_insn (target, value);
1699 }
1700
1701 /* Subtract DEC from TARGET.  */
1702
1703 void
1704 expand_dec (target, dec)
1705      rtx target, dec;
1706 {
1707   rtx value = expand_binop (GET_MODE (target), sub_optab,
1708                             target, dec,
1709                             target, 0, OPTAB_LIB_WIDEN);
1710   if (value != target)
1711     emit_move_insn (target, value);
1712 }
1713 \f
1714 /* Output a shift instruction for expression code CODE,
1715    with SHIFTED being the rtx for the value to shift,
1716    and AMOUNT the tree for the amount to shift by.
1717    Store the result in the rtx TARGET, if that is convenient.
1718    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
1719    Return the rtx for where the value is.  */
1720
1721 rtx
1722 expand_shift (code, mode, shifted, amount, target, unsignedp)
1723      enum tree_code code;
1724      register enum machine_mode mode;
1725      rtx shifted;
1726      tree amount;
1727      register rtx target;
1728      int unsignedp;
1729 {
1730   register rtx op1, temp = 0;
1731   register int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
1732   register int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
1733   int try;
1734
1735   /* Previously detected shift-counts computed by NEGATE_EXPR
1736      and shifted in the other direction; but that does not work
1737      on all machines.  */
1738
1739   op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0);
1740
1741 #ifdef SHIFT_COUNT_TRUNCATED
1742   if (SHIFT_COUNT_TRUNCATED
1743       && GET_CODE (op1) == CONST_INT
1744       && (unsigned HOST_WIDE_INT) INTVAL (op1) >= GET_MODE_BITSIZE (mode))
1745     op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
1746                    % GET_MODE_BITSIZE (mode));
1747 #endif
1748
1749   if (op1 == const0_rtx)
1750     return shifted;
1751
1752   for (try = 0; temp == 0 && try < 3; try++)
1753     {
1754       enum optab_methods methods;
1755
1756       if (try == 0)
1757         methods = OPTAB_DIRECT;
1758       else if (try == 1)
1759         methods = OPTAB_WIDEN;
1760       else
1761         methods = OPTAB_LIB_WIDEN;
1762
1763       if (rotate)
1764         {
1765           /* Widening does not work for rotation.  */
1766           if (methods == OPTAB_WIDEN)
1767             continue;
1768           else if (methods == OPTAB_LIB_WIDEN)
1769             {
1770               /* If we have been unable to open-code this by a rotation,
1771                  do it as the IOR of two shifts.  I.e., to rotate A
1772                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
1773                  where C is the bitsize of A.
1774
1775                  It is theoretically possible that the target machine might
1776                  not be able to perform either shift and hence we would
1777                  be making two libcalls rather than just the one for the
1778                  shift (similarly if IOR could not be done).  We will allow
1779                  this extremely unlikely lossage to avoid complicating the
1780                  code below.  */
1781
1782               rtx subtarget = target == shifted ? 0 : target;
1783               rtx temp1;
1784               tree type = TREE_TYPE (amount);
1785               tree new_amount = make_tree (type, op1);
1786               tree other_amount
1787                 = fold (build (MINUS_EXPR, type,
1788                                convert (type,
1789                                         build_int_2 (GET_MODE_BITSIZE (mode),
1790                                                      0)),
1791                                amount));
1792
1793               shifted = force_reg (mode, shifted);
1794
1795               temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
1796                                    mode, shifted, new_amount, subtarget, 1);
1797               temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
1798                                     mode, shifted, other_amount, 0, 1);
1799               return expand_binop (mode, ior_optab, temp, temp1, target,
1800                                    unsignedp, methods);
1801             }
1802
1803           temp = expand_binop (mode,
1804                                left ? rotl_optab : rotr_optab,
1805                                shifted, op1, target, unsignedp, methods);
1806
1807           /* If we don't have the rotate, but we are rotating by a constant
1808              that is in range, try a rotate in the opposite direction.  */
1809
1810           if (temp == 0 && GET_CODE (op1) == CONST_INT
1811               && INTVAL (op1) > 0 && INTVAL (op1) < GET_MODE_BITSIZE (mode))
1812             temp = expand_binop (mode,
1813                                  left ? rotr_optab : rotl_optab,
1814                                  shifted,
1815                                  GEN_INT (GET_MODE_BITSIZE (mode)
1816                                           - INTVAL (op1)),
1817                                  target, unsignedp, methods);
1818         }
1819       else if (unsignedp)
1820         temp = expand_binop (mode,
1821                              left ? ashl_optab : lshr_optab,
1822                              shifted, op1, target, unsignedp, methods);
1823
1824       /* Do arithmetic shifts.
1825          Also, if we are going to widen the operand, we can just as well
1826          use an arithmetic right-shift instead of a logical one.  */
1827       if (temp == 0 && ! rotate
1828           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
1829         {
1830           enum optab_methods methods1 = methods;
1831
1832           /* If trying to widen a log shift to an arithmetic shift,
1833              don't accept an arithmetic shift of the same size.  */
1834           if (unsignedp)
1835             methods1 = OPTAB_MUST_WIDEN;
1836
1837           /* Arithmetic shift */
1838
1839           temp = expand_binop (mode,
1840                                left ? ashl_optab : ashr_optab,
1841                                shifted, op1, target, unsignedp, methods1);
1842         }
1843
1844       /* We used to try extzv here for logical right shifts, but that was
1845          only useful for one machine, the VAX, and caused poor code
1846          generation there for lshrdi3, so the code was deleted and a
1847          define_expand for lshrsi3 was added to vax.md.  */
1848     }
1849
1850   if (temp == 0)
1851     abort ();
1852   return temp;
1853 }
1854 \f
1855 enum alg_code { alg_zero, alg_m, alg_shift,
1856                   alg_add_t_m2, alg_sub_t_m2,
1857                   alg_add_factor, alg_sub_factor,
1858                   alg_add_t2_m, alg_sub_t2_m,
1859                   alg_add, alg_subtract, alg_factor, alg_shiftop };
1860
1861 /* This structure records a sequence of operations.
1862    `ops' is the number of operations recorded.
1863    `cost' is their total cost.
1864    The operations are stored in `op' and the corresponding
1865    logarithms of the integer coefficients in `log'.
1866
1867    These are the operations:
1868    alg_zero             total := 0;
1869    alg_m                total := multiplicand;
1870    alg_shift            total := total * coeff
1871    alg_add_t_m2         total := total + multiplicand * coeff;
1872    alg_sub_t_m2         total := total - multiplicand * coeff;
1873    alg_add_factor       total := total * coeff + total;
1874    alg_sub_factor       total := total * coeff - total;
1875    alg_add_t2_m         total := total * coeff + multiplicand;
1876    alg_sub_t2_m         total := total * coeff - multiplicand;
1877
1878    The first operand must be either alg_zero or alg_m.  */
1879
1880 struct algorithm
1881 {
1882   short cost;
1883   short ops;
1884   /* The size of the OP and LOG fields are not directly related to the
1885      word size, but the worst-case algorithms will be if we have few
1886      consecutive ones or zeros, i.e., a multiplicand like 10101010101...
1887      In that case we will generate shift-by-2, add, shift-by-2, add,...,
1888      in total wordsize operations.  */
1889   enum alg_code op[MAX_BITS_PER_WORD];
1890   char log[MAX_BITS_PER_WORD];
1891 };
1892
1893 /* Compute and return the best algorithm for multiplying by T.
1894    The algorithm must cost less than cost_limit
1895    If retval.cost >= COST_LIMIT, no algorithm was found and all
1896    other field of the returned struct are undefined.  */
1897
1898 static void
1899 synth_mult (alg_out, t, cost_limit)
1900      struct algorithm *alg_out;
1901      unsigned HOST_WIDE_INT t;
1902      int cost_limit;
1903 {
1904   int m;
1905   struct algorithm *alg_in, *best_alg;
1906   unsigned int cost;
1907   unsigned HOST_WIDE_INT q;
1908
1909   /* Indicate that no algorithm is yet found.  If no algorithm
1910      is found, this value will be returned and indicate failure.  */
1911   alg_out->cost = cost_limit;
1912
1913   if (cost_limit <= 0)
1914     return;
1915
1916   /* t == 1 can be done in zero cost.  */
1917   if (t == 1)
1918     {
1919       alg_out->ops = 1;
1920       alg_out->cost = 0;
1921       alg_out->op[0] = alg_m;
1922       return;
1923     }
1924
1925   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
1926      fail now.  */
1927   if (t == 0)
1928     {
1929       if (zero_cost >= cost_limit)
1930         return;
1931       else
1932         {
1933           alg_out->ops = 1;
1934           alg_out->cost = zero_cost;
1935           alg_out->op[0] = alg_zero;
1936           return;
1937         }
1938     }
1939
1940   /* We'll be needing a couple extra algorithm structures now.  */
1941
1942   alg_in = (struct algorithm *)alloca (sizeof (struct algorithm));
1943   best_alg = (struct algorithm *)alloca (sizeof (struct algorithm));
1944
1945   /* If we have a group of zero bits at the low-order part of T, try
1946      multiplying by the remaining bits and then doing a shift.  */
1947
1948   if ((t & 1) == 0)
1949     {
1950       m = floor_log2 (t & -t);  /* m = number of low zero bits */
1951       q = t >> m;
1952       cost = shift_cost[m];
1953       synth_mult (alg_in, q, cost_limit - cost);
1954
1955       cost += alg_in->cost;
1956       if (cost < cost_limit)
1957         {
1958           struct algorithm *x;
1959           x = alg_in, alg_in = best_alg, best_alg = x;
1960           best_alg->log[best_alg->ops] = m;
1961           best_alg->op[best_alg->ops] = alg_shift;
1962           cost_limit = cost;
1963         }
1964     }
1965
1966   /* If we have an odd number, add or subtract one.  */
1967   if ((t & 1) != 0)
1968     {
1969       unsigned HOST_WIDE_INT w;
1970
1971       for (w = 1; (w & t) != 0; w <<= 1)
1972         ;
1973       if (w > 2
1974           /* Reject the case where t is 3.
1975              Thus we prefer addition in that case.  */
1976           && t != 3)
1977         {
1978           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
1979
1980           cost = add_cost;
1981           synth_mult (alg_in, t + 1, cost_limit - cost);
1982
1983           cost += alg_in->cost;
1984           if (cost < cost_limit)
1985             {
1986               struct algorithm *x;
1987               x = alg_in, alg_in = best_alg, best_alg = x;
1988               best_alg->log[best_alg->ops] = 0;
1989               best_alg->op[best_alg->ops] = alg_sub_t_m2;
1990               cost_limit = cost;
1991             }
1992         }
1993       else
1994         {
1995           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
1996
1997           cost = add_cost;
1998           synth_mult (alg_in, t - 1, cost_limit - cost);
1999
2000           cost += alg_in->cost;
2001           if (cost < cost_limit)
2002             {
2003               struct algorithm *x;
2004               x = alg_in, alg_in = best_alg, best_alg = x;
2005               best_alg->log[best_alg->ops] = 0;
2006               best_alg->op[best_alg->ops] = alg_add_t_m2;
2007               cost_limit = cost;
2008             }
2009         }
2010     }
2011
2012   /* Look for factors of t of the form
2013      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2014      If we find such a factor, we can multiply by t using an algorithm that
2015      multiplies by q, shift the result by m and add/subtract it to itself.
2016
2017      We search for large factors first and loop down, even if large factors
2018      are less probable than small; if we find a large factor we will find a
2019      good sequence quickly, and therefore be able to prune (by decreasing
2020      COST_LIMIT) the search.  */
2021
2022   for (m = floor_log2 (t - 1); m >= 2; m--)
2023     {
2024       unsigned HOST_WIDE_INT d;
2025
2026       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2027       if (t % d == 0 && t > d)
2028         {
2029           cost = MIN (shiftadd_cost[m], add_cost + shift_cost[m]);
2030           synth_mult (alg_in, t / d, cost_limit - cost);
2031
2032           cost += alg_in->cost;
2033           if (cost < cost_limit)
2034             {
2035               struct algorithm *x;
2036               x = alg_in, alg_in = best_alg, best_alg = x;
2037               best_alg->log[best_alg->ops] = m;
2038               best_alg->op[best_alg->ops] = alg_add_factor;
2039               cost_limit = cost;
2040             }
2041           /* Other factors will have been taken care of in the recursion.  */
2042           break;
2043         }
2044
2045       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2046       if (t % d == 0 && t > d)
2047         {
2048           cost = MIN (shiftsub_cost[m], add_cost + shift_cost[m]);
2049           synth_mult (alg_in, t / d, cost_limit - cost);
2050
2051           cost += alg_in->cost;
2052           if (cost < cost_limit)
2053             {
2054               struct algorithm *x;
2055               x = alg_in, alg_in = best_alg, best_alg = x;
2056               best_alg->log[best_alg->ops] = m;
2057               best_alg->op[best_alg->ops] = alg_sub_factor;
2058               cost_limit = cost;
2059             }
2060           break;
2061         }
2062     }
2063
2064   /* Try shift-and-add (load effective address) instructions,
2065      i.e. do a*3, a*5, a*9.  */
2066   if ((t & 1) != 0)
2067     {
2068       q = t - 1;
2069       q = q & -q;
2070       m = exact_log2 (q);
2071       if (m >= 0)
2072         {
2073           cost = shiftadd_cost[m];
2074           synth_mult (alg_in, (t - 1) >> m, cost_limit - cost);
2075
2076           cost += alg_in->cost;
2077           if (cost < cost_limit)
2078             {
2079               struct algorithm *x;
2080               x = alg_in, alg_in = best_alg, best_alg = x;
2081               best_alg->log[best_alg->ops] = m;
2082               best_alg->op[best_alg->ops] = alg_add_t2_m;
2083               cost_limit = cost;
2084             }
2085         }
2086
2087       q = t + 1;
2088       q = q & -q;
2089       m = exact_log2 (q);
2090       if (m >= 0)
2091         {
2092           cost = shiftsub_cost[m];
2093           synth_mult (alg_in, (t + 1) >> m, cost_limit - cost);
2094
2095           cost += alg_in->cost;
2096           if (cost < cost_limit)
2097             {
2098               struct algorithm *x;
2099               x = alg_in, alg_in = best_alg, best_alg = x;
2100               best_alg->log[best_alg->ops] = m;
2101               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2102               cost_limit = cost;
2103             }
2104         }
2105     }
2106
2107   /* If cost_limit has not decreased since we stored it in alg_out->cost,
2108      we have not found any algorithm.  */
2109   if (cost_limit == alg_out->cost)
2110     return;
2111
2112   /* If we are getting a too long sequence for `struct algorithm'
2113      to record, make this search fail.  */
2114   if (best_alg->ops == MAX_BITS_PER_WORD)
2115     return;
2116
2117   /* Copy the algorithm from temporary space to the space at alg_out.
2118      We avoid using structure assignment because the majority of
2119      best_alg is normally undefined, and this is a critical function.  */
2120   alg_out->ops = best_alg->ops + 1;
2121   alg_out->cost = cost_limit;
2122   bcopy ((char *) best_alg->op, (char *) alg_out->op,
2123          alg_out->ops * sizeof *alg_out->op);
2124   bcopy ((char *) best_alg->log, (char *) alg_out->log,
2125          alg_out->ops * sizeof *alg_out->log);
2126 }
2127 \f
2128 /* Perform a multiplication and return an rtx for the result.
2129    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
2130    TARGET is a suggestion for where to store the result (an rtx).
2131
2132    We check specially for a constant integer as OP1.
2133    If you want this check for OP0 as well, then before calling
2134    you should swap the two operands if OP0 would be constant.  */
2135
2136 rtx
2137 expand_mult (mode, op0, op1, target, unsignedp)
2138      enum machine_mode mode;
2139      register rtx op0, op1, target;
2140      int unsignedp;
2141 {
2142   rtx const_op1 = op1;
2143
2144   /* synth_mult does an `unsigned int' multiply.  As long as the mode is
2145      less than or equal in size to `unsigned int' this doesn't matter.
2146      If the mode is larger than `unsigned int', then synth_mult works only
2147      if the constant value exactly fits in an `unsigned int' without any
2148      truncation.  This means that multiplying by negative values does
2149      not work; results are off by 2^32 on a 32 bit machine.  */
2150
2151   /* If we are multiplying in DImode, it may still be a win
2152      to try to work with shifts and adds.  */
2153   if (GET_CODE (op1) == CONST_DOUBLE
2154       && GET_MODE_CLASS (GET_MODE (op1)) == MODE_INT
2155       && HOST_BITS_PER_INT >= BITS_PER_WORD
2156       && CONST_DOUBLE_HIGH (op1) == 0)
2157     const_op1 = GEN_INT (CONST_DOUBLE_LOW (op1));
2158   else if (HOST_BITS_PER_INT < GET_MODE_BITSIZE (mode)
2159            && GET_CODE (op1) == CONST_INT
2160            && INTVAL (op1) < 0)
2161     const_op1 = 0;
2162
2163   /* We used to test optimize here, on the grounds that it's better to
2164      produce a smaller program when -O is not used.
2165      But this causes such a terrible slowdown sometimes
2166      that it seems better to use synth_mult always.  */
2167
2168   if (const_op1 && GET_CODE (const_op1) == CONST_INT)
2169     {
2170       struct algorithm alg;
2171       struct algorithm alg2;
2172       HOST_WIDE_INT val = INTVAL (op1);
2173       HOST_WIDE_INT val_so_far;
2174       rtx insn;
2175       int mult_cost;
2176       enum {basic_variant, negate_variant, add_variant} variant = basic_variant;
2177
2178       /* Try to do the computation three ways: multiply by the negative of OP1
2179          and then negate, do the multiplication directly, or do multiplication
2180          by OP1 - 1.  */
2181
2182       mult_cost = rtx_cost (gen_rtx (MULT, mode, op0, op1), SET);
2183       mult_cost = MIN (12 * add_cost, mult_cost);
2184
2185       synth_mult (&alg, val, mult_cost);
2186
2187       /* This works only if the inverted value actually fits in an
2188          `unsigned int' */
2189       if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2190         {
2191           synth_mult (&alg2, - val,
2192                       (alg.cost < mult_cost ? alg.cost : mult_cost) - negate_cost);
2193           if (alg2.cost + negate_cost < alg.cost)
2194             alg = alg2, variant = negate_variant;
2195         }
2196
2197       /* This proves very useful for division-by-constant.  */
2198       synth_mult (&alg2, val - 1,
2199                   (alg.cost < mult_cost ? alg.cost : mult_cost) - add_cost);
2200       if (alg2.cost + add_cost < alg.cost)
2201         alg = alg2, variant = add_variant;
2202
2203       if (alg.cost < mult_cost)
2204         {
2205           /* We found something cheaper than a multiply insn.  */
2206           int opno;
2207           rtx accum, tem;
2208
2209           op0 = protect_from_queue (op0, 0);
2210
2211           /* Avoid referencing memory over and over.
2212              For speed, but also for correctness when mem is volatile.  */
2213           if (GET_CODE (op0) == MEM)
2214             op0 = force_reg (mode, op0);
2215
2216           /* ACCUM starts out either as OP0 or as a zero, depending on
2217              the first operation.  */
2218
2219           if (alg.op[0] == alg_zero)
2220             {
2221               accum = copy_to_mode_reg (mode, const0_rtx);
2222               val_so_far = 0;
2223             }
2224           else if (alg.op[0] == alg_m)
2225             {
2226               accum = copy_to_mode_reg (mode, op0);
2227               val_so_far = 1;
2228             }
2229           else
2230             abort ();
2231
2232           for (opno = 1; opno < alg.ops; opno++)
2233             {
2234               int log = alg.log[opno];
2235               int preserve = preserve_subexpressions_p ();
2236               rtx shift_subtarget = preserve ? 0 : accum;
2237               rtx add_target
2238                 = (opno == alg.ops - 1 && target != 0 && variant != add_variant
2239                   ? target : 0);
2240               rtx accum_target = preserve ? 0 : accum;
2241
2242               switch (alg.op[opno])
2243                 {
2244                 case alg_shift:
2245                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2246                                         build_int_2 (log, 0), NULL_RTX, 0);
2247                   val_so_far <<= log;
2248                   break;
2249
2250                 case alg_add_t_m2:
2251                   tem = expand_shift (LSHIFT_EXPR, mode, op0,
2252                                       build_int_2 (log, 0), NULL_RTX, 0);
2253                   accum = force_operand (gen_rtx (PLUS, mode, accum, tem),
2254                                          add_target ? add_target : accum_target);
2255                   val_so_far += (HOST_WIDE_INT) 1 << log;
2256                   break;
2257
2258                 case alg_sub_t_m2:
2259                   tem = expand_shift (LSHIFT_EXPR, mode, op0,
2260                                       build_int_2 (log, 0), NULL_RTX, 0);
2261                   accum = force_operand (gen_rtx (MINUS, mode, accum, tem),
2262                                          add_target ? add_target : accum_target);
2263                   val_so_far -= (HOST_WIDE_INT) 1 << log;
2264                   break;
2265
2266                 case alg_add_t2_m:
2267                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2268                                         build_int_2 (log, 0), shift_subtarget,
2269                                         0);
2270                   accum = force_operand (gen_rtx (PLUS, mode, accum, op0),
2271                                          add_target ? add_target : accum_target);
2272                   val_so_far = (val_so_far << log) + 1;
2273                   break;
2274
2275                 case alg_sub_t2_m:
2276                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2277                                         build_int_2 (log, 0), shift_subtarget,
2278                                         0);
2279                   accum = force_operand (gen_rtx (MINUS, mode, accum, op0),
2280                                          add_target ? add_target : accum_target);
2281                   val_so_far = (val_so_far << log) - 1;
2282                   break;
2283
2284                 case alg_add_factor:
2285                   tem = expand_shift (LSHIFT_EXPR, mode, accum,
2286                                       build_int_2 (log, 0), NULL_RTX, 0);
2287                   accum = force_operand (gen_rtx (PLUS, mode, accum, tem),
2288                                          add_target ? add_target : accum_target);
2289                   val_so_far += val_so_far << log;
2290                   break;
2291
2292                 case alg_sub_factor:
2293                   tem = expand_shift (LSHIFT_EXPR, mode, accum,
2294                                       build_int_2 (log, 0), NULL_RTX, 0);
2295                   accum = force_operand (gen_rtx (MINUS, mode, tem, accum),
2296                                          (add_target ? add_target
2297                                           : preserve ? 0 : tem));
2298                   val_so_far = (val_so_far << log) - val_so_far;
2299                   break;
2300
2301                 default:
2302                   abort ();;
2303                 }
2304
2305               /* Write a REG_EQUAL note on the last insn so that we can cse
2306                  multiplication sequences.  */
2307
2308               insn = get_last_insn ();
2309               REG_NOTES (insn)
2310                 = gen_rtx (EXPR_LIST, REG_EQUAL,
2311                            gen_rtx (MULT, mode, op0, GEN_INT (val_so_far)),
2312                            REG_NOTES (insn));
2313             }
2314
2315           if (variant == negate_variant)
2316             {
2317               val_so_far = - val_so_far;
2318               accum = expand_unop (mode, neg_optab, accum, target, 0);
2319             }
2320           else if (variant == add_variant)
2321             {
2322               val_so_far = val_so_far + 1;
2323               accum = force_operand (gen_rtx (PLUS, mode, accum, op0), target);
2324             }
2325
2326           if (val != val_so_far)
2327             abort ();
2328
2329           return accum;
2330         }
2331     }
2332
2333   /* This used to use umul_optab if unsigned, but for non-widening multiply
2334      there is no difference between signed and unsigned.  */
2335   op0 = expand_binop (mode, smul_optab,
2336                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
2337   if (op0 == 0)
2338     abort ();
2339   return op0;
2340 }
2341 \f
2342 /* Return the smallest n such that 2**n >= X.  */
2343
2344 int
2345 ceil_log2 (x)
2346      unsigned HOST_WIDE_INT x;
2347 {
2348   return floor_log2 (x - 1) + 1;
2349 }
2350
2351 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
2352    replace division by D, and put the least significant N bits of the result
2353    in *MULTIPLIER_PTR and return the most significant bit.
2354
2355    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
2356    needed precision is in PRECISION (should be <= N).
2357
2358    PRECISION should be as small as possible so this function can choose
2359    multiplier more freely.
2360
2361    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
2362    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
2363
2364    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
2365    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
2366
2367 static
2368 unsigned HOST_WIDE_INT
2369 choose_multiplier (d, n, precision, multiplier_ptr, post_shift_ptr, lgup_ptr)
2370      unsigned HOST_WIDE_INT d;
2371      int n;
2372      int precision;
2373      unsigned HOST_WIDE_INT *multiplier_ptr;
2374      int *post_shift_ptr;
2375      int *lgup_ptr;
2376 {
2377   unsigned HOST_WIDE_INT mhigh_hi, mhigh_lo;
2378   unsigned HOST_WIDE_INT mlow_hi, mlow_lo;
2379   int lgup, post_shift;
2380   int pow, pow2;
2381   unsigned HOST_WIDE_INT nh, nl, dummy1, dummy2;
2382
2383   /* lgup = ceil(log2(divisor)); */
2384   lgup = ceil_log2 (d);
2385
2386   if (lgup > n)
2387     abort ();
2388
2389   pow = n + lgup;
2390   pow2 = n + lgup - precision;
2391
2392   if (pow == 2 * HOST_BITS_PER_WIDE_INT)
2393     {
2394       /* We could handle this with some effort, but this case is much better
2395          handled directly with a scc insn, so rely on caller using that.  */
2396       abort ();
2397     }
2398
2399   /* mlow = 2^(N + lgup)/d */
2400  if (pow >= HOST_BITS_PER_WIDE_INT)
2401     {
2402       nh = (unsigned HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
2403       nl = 0;
2404     }
2405   else
2406     {
2407       nh = 0;
2408       nl = (unsigned HOST_WIDE_INT) 1 << pow;
2409     }
2410   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2411                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
2412
2413   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
2414   if (pow2 >= HOST_BITS_PER_WIDE_INT)
2415     nh |= (unsigned HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
2416   else
2417     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
2418   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2419                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
2420
2421   if (mhigh_hi && nh - d >= d)
2422     abort ();
2423   if (mhigh_hi > 1 || mlow_hi > 1)
2424     abort ();
2425   /* assert that mlow < mhigh.  */
2426   if (! (mlow_hi < mhigh_hi || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo)))
2427     abort();
2428
2429   /* If precision == N, then mlow, mhigh exceed 2^N
2430      (but they do not exceed 2^(N+1)).  */
2431
2432   /* Reduce to lowest terms */
2433   for (post_shift = lgup; post_shift > 0; post_shift--)
2434     {
2435       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
2436       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
2437       if (ml_lo >= mh_lo)
2438         break;
2439
2440       mlow_hi = 0;
2441       mlow_lo = ml_lo;
2442       mhigh_hi = 0;
2443       mhigh_lo = mh_lo;
2444     }
2445
2446   *post_shift_ptr = post_shift;
2447   *lgup_ptr = lgup;
2448   if (n < HOST_BITS_PER_WIDE_INT)
2449     {
2450       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
2451       *multiplier_ptr = mhigh_lo & mask;
2452       return mhigh_lo >= mask;
2453     }
2454   else
2455     {
2456       *multiplier_ptr = mhigh_lo;
2457       return mhigh_hi;
2458     }
2459 }
2460
2461 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
2462    congruent to 1 (mod 2**N).  */
2463
2464 static unsigned HOST_WIDE_INT
2465 invert_mod2n (x, n)
2466      unsigned HOST_WIDE_INT x;
2467      int n;
2468 {
2469   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y. */
2470
2471   /* The algorithm notes that the choice y = x satisfies
2472      x*y == 1 mod 2^3, since x is assumed odd.
2473      Each iteration doubles the number of bits of significance in y.  */
2474
2475   unsigned HOST_WIDE_INT mask;
2476   unsigned HOST_WIDE_INT y = x;
2477   int nbit = 3;
2478
2479   mask = (n == HOST_BITS_PER_WIDE_INT
2480           ? ~(unsigned HOST_WIDE_INT) 0
2481           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
2482
2483   while (nbit < n)
2484     {
2485       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
2486       nbit *= 2;
2487     }
2488   return y;
2489 }
2490
2491 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
2492    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
2493    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
2494    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
2495    become signed.
2496
2497    The result is put in TARGET if that is convenient.
2498
2499    MODE is the mode of operation.  */
2500
2501 rtx
2502 expand_mult_highpart_adjust (mode, adj_operand, op0, op1, target, unsignedp)
2503      enum machine_mode mode;
2504      register rtx adj_operand, op0, op1, target;
2505      int unsignedp;
2506 {
2507   rtx tem;
2508   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
2509
2510   tem = expand_shift (RSHIFT_EXPR, mode, op0,
2511                       build_int_2 (GET_MODE_BITSIZE (mode) - 1, 0),
2512                       NULL_RTX, 0);
2513   tem = expand_and (tem, op1, NULL_RTX);
2514   adj_operand = force_operand (gen_rtx (adj_code, mode, adj_operand, tem),
2515                                adj_operand);
2516
2517   tem = expand_shift (RSHIFT_EXPR, mode, op1,
2518                       build_int_2 (GET_MODE_BITSIZE (mode) - 1, 0),
2519                       NULL_RTX, 0);
2520   tem = expand_and (tem, op0, NULL_RTX);
2521   target = force_operand (gen_rtx (adj_code, mode, adj_operand, tem), target);
2522
2523   return target;
2524 }
2525
2526 /* Emit code to multiply OP0 and CNST1, putting the high half of the result
2527    in TARGET if that is convenient, and return where the result is.  If the
2528    operation can not be performed, 0 is returned.
2529
2530    MODE is the mode of operation and result.
2531
2532    UNSIGNEDP nonzero means unsigned multiply.
2533
2534    MAX_COST is the total allowed cost for the expanded RTL.  */
2535
2536 rtx
2537 expand_mult_highpart (mode, op0, cnst1, target, unsignedp, max_cost)
2538      enum machine_mode mode;
2539      register rtx op0, target;
2540      unsigned HOST_WIDE_INT cnst1;
2541      int unsignedp;
2542      int max_cost;
2543 {
2544   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
2545   optab mul_highpart_optab;
2546   optab moptab;
2547   rtx tem;
2548   int size = GET_MODE_BITSIZE (mode);
2549   rtx op1, wide_op1;
2550
2551   /* We can't support modes wider than HOST_BITS_PER_INT.  */
2552   if (size > HOST_BITS_PER_WIDE_INT)
2553     abort ();
2554
2555   op1 = GEN_INT (cnst1);
2556
2557   if (GET_MODE_BITSIZE (wider_mode) <= HOST_BITS_PER_INT)
2558     wide_op1 = op1;
2559   else
2560     wide_op1
2561       = immed_double_const (cnst1,
2562                             (unsignedp
2563                              ? (HOST_WIDE_INT) 0
2564                              : -(cnst1 >> (HOST_BITS_PER_WIDE_INT - 1))),
2565                             wider_mode);
2566
2567   /* expand_mult handles constant multiplication of word_mode
2568      or narrower.  It does a poor job for large modes.  */
2569   if (size < BITS_PER_WORD
2570       && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
2571     {
2572       /* We have to do this, since expand_binop doesn't do conversion for
2573          multiply.  Maybe change expand_binop to handle widening multiply?  */
2574       op0 = convert_to_mode (wider_mode, op0, unsignedp);
2575
2576       tem = expand_mult (wider_mode, op0, wide_op1, NULL_RTX, unsignedp);
2577       tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2578                           build_int_2 (size, 0), NULL_RTX, 1);
2579       return convert_modes (mode, wider_mode, tem, unsignedp);
2580     }
2581
2582   if (target == 0)
2583     target = gen_reg_rtx (mode);
2584
2585   /* Firstly, try using a multiplication insn that only generates the needed
2586      high part of the product, and in the sign flavor of unsignedp.  */
2587   if (mul_highpart_cost[(int) mode] < max_cost)
2588     {
2589       mul_highpart_optab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
2590       target = expand_binop (mode, mul_highpart_optab,
2591                              op0, wide_op1, target, unsignedp, OPTAB_DIRECT);
2592       if (target)
2593         return target;
2594     }
2595
2596   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
2597      Need to adjust the result after the multiplication.  */
2598   if (mul_highpart_cost[(int) mode] + 2 * shift_cost[size-1] + 4 * add_cost < max_cost)
2599     {
2600       mul_highpart_optab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
2601       target = expand_binop (mode, mul_highpart_optab,
2602                              op0, wide_op1, target, unsignedp, OPTAB_DIRECT);
2603       if (target)
2604         /* We used the wrong signedness.  Adjust the result.  */
2605         return expand_mult_highpart_adjust (mode, target, op0,
2606                                             op1, target, unsignedp);
2607     }
2608
2609   /* Try widening multiplication.  */
2610   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
2611   if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2612       && mul_widen_cost[(int) wider_mode] < max_cost)
2613     {
2614       op1 = force_reg (mode, op1);
2615       goto try;
2616     }
2617
2618   /* Try widening the mode and perform a non-widening multiplication.  */
2619   moptab = smul_optab;
2620   if (smul_optab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2621       && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
2622     {
2623       op1 = wide_op1;
2624       goto try;
2625     }
2626
2627   /* Try widening multiplication of opposite signedness, and adjust.  */
2628   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
2629   if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2630       && (mul_widen_cost[(int) wider_mode]
2631           + 2 * shift_cost[size-1] + 4 * add_cost < max_cost))
2632     {
2633       rtx regop1 = force_reg (mode, op1);
2634       tem = expand_binop (wider_mode, moptab, op0, regop1,
2635                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
2636       if (tem != 0)
2637         {
2638           /* Extract the high half of the just generated product.  */
2639           tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2640                               build_int_2 (size, 0), NULL_RTX, 1);
2641           tem = convert_modes (mode, wider_mode, tem, unsignedp);
2642           /* We used the wrong signedness.  Adjust the result.  */
2643           return expand_mult_highpart_adjust (mode, tem, op0, op1,
2644                                               target, unsignedp);
2645         }
2646     }
2647
2648   return 0;
2649
2650  try:
2651   /* Pass NULL_RTX as target since TARGET has wrong mode.  */
2652   tem = expand_binop (wider_mode, moptab, op0, op1,
2653                       NULL_RTX, unsignedp, OPTAB_WIDEN);
2654   if (tem == 0)
2655     return 0;
2656
2657   /* Extract the high half of the just generated product.  */
2658   if (mode == word_mode)
2659     {
2660       return gen_highpart (mode, tem);
2661     }
2662   else
2663     {
2664       tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2665                           build_int_2 (size, 0), NULL_RTX, 1);
2666       return convert_modes (mode, wider_mode, tem, unsignedp);
2667     }
2668 }
2669 \f
2670 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
2671    if that is convenient, and returning where the result is.
2672    You may request either the quotient or the remainder as the result;
2673    specify REM_FLAG nonzero to get the remainder.
2674
2675    CODE is the expression code for which kind of division this is;
2676    it controls how rounding is done.  MODE is the machine mode to use.
2677    UNSIGNEDP nonzero means do unsigned division.  */
2678
2679 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
2680    and then correct it by or'ing in missing high bits
2681    if result of ANDI is nonzero.
2682    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
2683    This could optimize to a bfexts instruction.
2684    But C doesn't use these operations, so their optimizations are
2685    left for later.  */
2686
2687 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
2688
2689 rtx
2690 expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
2691      int rem_flag;
2692      enum tree_code code;
2693      enum machine_mode mode;
2694      register rtx op0, op1, target;
2695      int unsignedp;
2696 {
2697   enum machine_mode compute_mode;
2698   register rtx tquotient;
2699   rtx quotient = 0, remainder = 0;
2700   rtx last;
2701   int size;
2702   rtx insn, set;
2703   optab optab1, optab2;
2704   int op1_is_constant, op1_is_pow2;
2705   int max_cost, extra_cost;
2706
2707   op1_is_constant = GET_CODE (op1) == CONST_INT;
2708   op1_is_pow2 = (op1_is_constant
2709                  && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
2710                       || EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))));
2711
2712   /*
2713      This is the structure of expand_divmod:
2714
2715      First comes code to fix up the operands so we can perform the operations
2716      correctly and efficiently.
2717
2718      Second comes a switch statement with code specific for each rounding mode.
2719      For some special operands this code emits all RTL for the desired
2720      operation, for other cases, it generates only a quotient and stores it in
2721      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
2722      to indicate that it has not done anything.
2723
2724      Last comes code that finishes the operation.  If QUOTIENT is set and
2725      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
2726      QUOTIENT is not set, it is computed using trunc rounding.
2727
2728      We try to generate special code for division and remainder when OP1 is a
2729      constant.  If |OP1| = 2**n we can use shifts and some other fast
2730      operations.  For other values of OP1, we compute a carefully selected
2731      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
2732      by m.
2733
2734      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
2735      half of the product.  Different strategies for generating the product are
2736      implemented in expand_mult_highpart.
2737
2738      If what we actually want is the remainder, we generate that by another
2739      by-constant multiplication and a subtraction.  */
2740
2741   /* We shouldn't be called with OP1 == const1_rtx, but some of the
2742      code below will malfunction if we are, so check here and handle
2743      the special case if so.  */
2744   if (op1 == const1_rtx)
2745     return rem_flag ? const0_rtx : op0;
2746
2747   if (target
2748       /* Don't use the function value register as a target
2749          since we have to read it as well as write it,
2750          and function-inlining gets confused by this.  */
2751       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
2752           /* Don't clobber an operand while doing a multi-step calculation.  */
2753           || ((rem_flag || op1_is_constant)
2754               && (reg_mentioned_p (target, op0)
2755                   || (GET_CODE (op0) == MEM && GET_CODE (target) == MEM)))
2756           || reg_mentioned_p (target, op1)
2757           || (GET_CODE (op1) == MEM && GET_CODE (target) == MEM)))
2758     target = 0;
2759
2760   /* Get the mode in which to perform this computation.  Normally it will
2761      be MODE, but sometimes we can't do the desired operation in MODE.
2762      If so, pick a wider mode in which we can do the operation.  Convert
2763      to that mode at the start to avoid repeated conversions.
2764
2765      First see what operations we need.  These depend on the expression
2766      we are evaluating.  (We assume that divxx3 insns exist under the
2767      same conditions that modxx3 insns and that these insns don't normally
2768      fail.  If these assumptions are not correct, we may generate less
2769      efficient code in some cases.)
2770
2771      Then see if we find a mode in which we can open-code that operation
2772      (either a division, modulus, or shift).  Finally, check for the smallest
2773      mode for which we can do the operation with a library call.  */
2774
2775   /* We might want to refine this now that we have division-by-constant
2776      optimization.  Since expand_mult_highpart tries so many variants, it is
2777      not straightforward to generalize this.  Maybe we should make an array
2778      of possible modes in init_expmed?  Save this for GCC 2.7.  */
2779
2780   optab1 = (op1_is_pow2 ? (unsignedp ? lshr_optab : ashr_optab)
2781             : (unsignedp ? udiv_optab : sdiv_optab));
2782   optab2 = (op1_is_pow2 ? optab1 : (unsignedp ? udivmod_optab : sdivmod_optab));
2783
2784   for (compute_mode = mode; compute_mode != VOIDmode;
2785        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
2786     if (optab1->handlers[(int) compute_mode].insn_code != CODE_FOR_nothing
2787         || optab2->handlers[(int) compute_mode].insn_code != CODE_FOR_nothing)
2788       break;
2789
2790   if (compute_mode == VOIDmode)
2791     for (compute_mode = mode; compute_mode != VOIDmode;
2792          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
2793       if (optab1->handlers[(int) compute_mode].libfunc
2794           || optab2->handlers[(int) compute_mode].libfunc)
2795         break;
2796
2797   /* If we still couldn't find a mode, use MODE, but we'll probably abort
2798      in expand_binop.  */
2799   if (compute_mode == VOIDmode)
2800     compute_mode = mode;
2801
2802   if (target && GET_MODE (target) == compute_mode)
2803     tquotient = target;
2804   else
2805     tquotient = gen_reg_rtx (compute_mode);
2806
2807   size = GET_MODE_BITSIZE (compute_mode);
2808 #if 0
2809   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
2810      (mode), and thereby get better code when OP1 is a constant.  Do that
2811      later.  It will require going over all usages of SIZE below.  */
2812   size = GET_MODE_BITSIZE (mode);
2813 #endif
2814
2815   max_cost = div_cost[(int) compute_mode]
2816     - (rem_flag ? mul_cost[(int) compute_mode] + add_cost : 0);
2817
2818   /* Now convert to the best mode to use.  */
2819   if (compute_mode != mode)
2820     {
2821       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
2822       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
2823     }
2824
2825   /* If one of the operands is a volatile MEM, copy it into a register.  */
2826
2827   if (GET_CODE (op0) == MEM && MEM_VOLATILE_P (op0))
2828     op0 = force_reg (compute_mode, op0);
2829   if (GET_CODE (op1) == MEM && MEM_VOLATILE_P (op1))
2830     op1 = force_reg (compute_mode, op1);
2831
2832   /* If we need the remainder or if OP1 is constant, we need to
2833      put OP0 in a register in case it has any queued subexpressions.  */
2834   if (rem_flag || op1_is_constant)
2835     op0 = force_reg (compute_mode, op0);
2836
2837   last = get_last_insn ();
2838
2839   /* Promote floor rounding to trunc rounding for unsigned operations.  */
2840   if (unsignedp)
2841     {
2842       if (code == FLOOR_DIV_EXPR)
2843         code = TRUNC_DIV_EXPR;
2844       if (code == FLOOR_MOD_EXPR)
2845         code = TRUNC_MOD_EXPR;
2846     }
2847
2848   if (op1 != const0_rtx)
2849     switch (code)
2850       {
2851       case TRUNC_MOD_EXPR:
2852       case TRUNC_DIV_EXPR:
2853         if (op1_is_constant)
2854           {
2855             if (unsignedp)
2856               {
2857                 unsigned HOST_WIDE_INT mh, ml;
2858                 int pre_shift, post_shift;
2859                 int dummy;
2860                 unsigned HOST_WIDE_INT d = INTVAL (op1);
2861
2862                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
2863                   {
2864                     pre_shift = floor_log2 (d);
2865                     if (rem_flag)
2866                       {
2867                         remainder = expand_binop (compute_mode, and_optab, op0,
2868                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
2869                                                   remainder, 1,
2870                                                   OPTAB_LIB_WIDEN);
2871                         if (remainder)
2872                           return gen_lowpart (mode, remainder);
2873                       }
2874                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
2875                                              build_int_2 (pre_shift, 0),
2876                                              tquotient, 1);
2877                   }
2878                 else if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
2879                   {
2880                     /* Most significant bit of divisor is set, emit a scc insn.
2881                        emit_store_flag needs to be passed a place for the
2882                        result.  */
2883                     quotient = emit_store_flag (tquotient, GEU, op0, op1,
2884                                                 compute_mode, 1, 1);
2885                     if (quotient == 0)
2886                       goto fail1;
2887                   }
2888                 else if (size <= HOST_BITS_PER_WIDE_INT)
2889                   {
2890                     /* Find a suitable multiplier and right shift count instead
2891                        of multiplying with D.  */
2892
2893                     mh = choose_multiplier (d, size, size,
2894                                             &ml, &post_shift, &dummy);
2895
2896                     /* If the suggested multiplier is more than SIZE bits, we
2897                        can do better for even divisors, using an initial right
2898                        shift.  */
2899                     if (mh != 0 && (d & 1) == 0)
2900                       {
2901                         pre_shift = floor_log2 (d & -d);
2902                         mh = choose_multiplier (d >> pre_shift, size,
2903                                                 size - pre_shift,
2904                                                 &ml, &post_shift, &dummy);
2905                         if (mh)
2906                           abort ();
2907                       }
2908                     else
2909                       pre_shift = 0;
2910
2911                     if (mh != 0)
2912                       {
2913                         rtx t1, t2, t3, t4;
2914
2915                         extra_cost = (shift_cost[post_shift - 1]
2916                                       + shift_cost[1] + 2 * add_cost);
2917                         t1 = expand_mult_highpart (compute_mode, op0, ml,
2918                                                    NULL_RTX, 1,
2919                                                    max_cost - extra_cost);
2920                         if (t1 == 0)
2921                           goto fail1;
2922                         t2 = force_operand (gen_rtx (MINUS, compute_mode,
2923                                                      op0, t1),
2924                                             NULL_RTX);
2925                         t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
2926                                            build_int_2 (1, 0), NULL_RTX, 1);
2927                         t4 = force_operand (gen_rtx (PLUS, compute_mode,
2928                                                      t1, t3),
2929                                             NULL_RTX);
2930                         quotient = expand_shift (RSHIFT_EXPR, compute_mode, t4,
2931                                                  build_int_2 (post_shift - 1,
2932                                                               0),
2933                                                  tquotient, 1);
2934                       }
2935                     else
2936                       {
2937                         rtx t1, t2;
2938
2939                         t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
2940                                            build_int_2 (pre_shift, 0),
2941                                            NULL_RTX, 1);
2942                         extra_cost = (shift_cost[pre_shift]
2943                                       + shift_cost[post_shift]);
2944                         t2 = expand_mult_highpart (compute_mode, t1, ml,
2945                                                    NULL_RTX, 1,
2946                                                    max_cost - extra_cost);
2947                         if (t2 == 0)
2948                           goto fail1;
2949                         quotient = expand_shift (RSHIFT_EXPR, compute_mode, t2,
2950                                                  build_int_2 (post_shift, 0),
2951                                                  tquotient, 1);
2952                       }
2953                   }
2954                 else            /* Too wide mode to use tricky code */
2955                   break;
2956
2957                 insn = get_last_insn ();
2958                 if (insn != last
2959                     && (set = single_set (insn)) != 0
2960                     && SET_DEST (set) == quotient)
2961                   REG_NOTES (insn)
2962                     = gen_rtx (EXPR_LIST, REG_EQUAL,
2963                                gen_rtx (UDIV, compute_mode, op0, op1),
2964                                REG_NOTES (insn));
2965               }
2966             else                /* TRUNC_DIV, signed */
2967               {
2968                 unsigned HOST_WIDE_INT ml;
2969                 int lgup, post_shift;
2970                 HOST_WIDE_INT d = INTVAL (op1);
2971                 unsigned HOST_WIDE_INT abs_d = d >= 0 ? d : -d;
2972
2973                 /* n rem d = n rem -d */
2974                 if (rem_flag && d < 0)
2975                   {
2976                     d = abs_d;
2977                     op1 = GEN_INT (abs_d);
2978                   }
2979
2980                 if (d == 1)
2981                   quotient = op0;
2982                 else if (d == -1)
2983                   quotient = expand_unop (compute_mode, neg_optab, op0,
2984                                           tquotient, 0);
2985                 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
2986                   {
2987                     /* This case is not handled correctly below.  */
2988                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
2989                                                 compute_mode, 1, 1);
2990                     if (quotient == 0)
2991                       goto fail1;
2992                   }
2993                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
2994                          && (rem_flag ? smod_pow2_cheap : sdiv_pow2_cheap))
2995                   ;
2996                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
2997                   {
2998                     lgup = floor_log2 (abs_d);
2999                     if (abs_d != 2 && BRANCH_COST < 3)
3000                       {
3001                         rtx label = gen_label_rtx ();
3002                         rtx t1;
3003
3004                         t1 = copy_to_mode_reg (compute_mode, op0);
3005                         emit_cmp_insn (t1, const0_rtx, GE,
3006                                        NULL_RTX, compute_mode, 0, 0);
3007                         emit_jump_insn (gen_bge (label));
3008                         expand_inc (t1, GEN_INT (abs_d - 1));
3009                         emit_label (label);
3010                         quotient = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3011                                                  build_int_2 (lgup, 0),
3012                                                  tquotient, 0);
3013                       }
3014                     else
3015                       {
3016                         rtx t1, t2, t3;
3017                         t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3018                                            build_int_2 (size - 1, 0),
3019                                            NULL_RTX, 0);
3020                         t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3021                                            build_int_2 (size - lgup, 0),
3022                                            NULL_RTX, 1);
3023                         t3 = force_operand (gen_rtx (PLUS, compute_mode,
3024                                                      op0, t2),
3025                                             NULL_RTX);
3026                         quotient = expand_shift (RSHIFT_EXPR, compute_mode, t3,
3027                                                  build_int_2 (lgup, 0),
3028                                                  tquotient, 0);
3029                       }
3030
3031                     /* We have computed OP0 / abs(OP1).  If OP1 is negative, negate
3032                        the quotient.  */
3033                     if (d < 0)
3034                       {
3035                         insn = get_last_insn ();
3036                         if (insn != last
3037                             && (set = single_set (insn)) != 0
3038                             && SET_DEST (set) == quotient)
3039                           REG_NOTES (insn)
3040                             = gen_rtx (EXPR_LIST, REG_EQUAL,
3041                                        gen_rtx (DIV, compute_mode, op0,
3042                                                 GEN_INT (abs_d)),
3043                                        REG_NOTES (insn));
3044
3045                         quotient = expand_unop (compute_mode, neg_optab,
3046                                                 quotient, quotient, 0);
3047                       }
3048                   }
3049                 else if (size <= HOST_BITS_PER_WIDE_INT)
3050                   {
3051                     choose_multiplier (abs_d, size, size - 1,
3052                                        &ml, &post_shift, &lgup);
3053                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
3054                       {
3055                         rtx t1, t2, t3;
3056
3057                         extra_cost = (shift_cost[post_shift]
3058                                       + shift_cost[size - 1] + add_cost);
3059                         t1 = expand_mult_highpart (compute_mode, op0, ml,
3060                                                    NULL_RTX, 0,
3061                                                    max_cost - extra_cost);
3062                         if (t1 == 0)
3063                           goto fail1;
3064                         t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3065                                            build_int_2 (post_shift, 0), NULL_RTX, 0);
3066                         t3 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3067                                            build_int_2 (size - 1, 0), NULL_RTX, 0);
3068                         if (d < 0)
3069                           quotient = force_operand (gen_rtx (MINUS, compute_mode, t3, t2),
3070                                                     tquotient);
3071                         else
3072                           quotient = force_operand (gen_rtx (MINUS, compute_mode, t2, t3),
3073                                                     tquotient);
3074                       }
3075                     else
3076                       {
3077                         rtx t1, t2, t3, t4;
3078
3079                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
3080                         extra_cost = (shift_cost[post_shift]
3081                                       + shift_cost[size - 1] + 2 * add_cost);
3082                         t1 = expand_mult_highpart (compute_mode, op0, ml,
3083                                                    NULL_RTX, 0,
3084                                                    max_cost - extra_cost);
3085                         if (t1 == 0)
3086                           goto fail1;
3087                         t2 = force_operand (gen_rtx (PLUS, compute_mode, t1, op0),
3088                                             NULL_RTX);
3089                         t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3090                                            build_int_2 (post_shift, 0), NULL_RTX, 0);
3091                         t4 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3092                                            build_int_2 (size - 1, 0), NULL_RTX, 0);
3093                         if (d < 0)
3094                           quotient = force_operand (gen_rtx (MINUS, compute_mode, t4, t3),
3095                                                     tquotient);
3096                         else
3097                           quotient = force_operand (gen_rtx (MINUS, compute_mode, t3, t4),
3098                                                     tquotient);
3099                       }
3100                   }
3101                 else            /* Too wide mode to use tricky code */
3102                   break;
3103
3104                 insn = get_last_insn ();
3105                 if (insn != last
3106                     && (set = single_set (insn)) != 0
3107                     && SET_DEST (set) == quotient)
3108                   REG_NOTES (insn)
3109                     = gen_rtx (EXPR_LIST, REG_EQUAL,
3110                                gen_rtx (DIV, compute_mode, op0, op1),
3111                                REG_NOTES (insn));
3112               }
3113             break;
3114           }
3115       fail1:
3116         delete_insns_since (last);
3117         break;
3118
3119       case FLOOR_DIV_EXPR:
3120       case FLOOR_MOD_EXPR:
3121       /* We will come here only for signed operations.  */
3122         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
3123           {
3124             unsigned HOST_WIDE_INT mh, ml;
3125             int pre_shift, lgup, post_shift;
3126             HOST_WIDE_INT d = INTVAL (op1);
3127
3128             if (d > 0)
3129               {
3130                 /* We could just as easily deal with negative constants here,
3131                    but it does not seem worth the trouble for GCC 2.6.  */
3132                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3133                   {
3134                     pre_shift = floor_log2 (d);
3135                     if (rem_flag)
3136                       {
3137                         remainder = expand_binop (compute_mode, and_optab, op0,
3138                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3139                                                   remainder, 0, OPTAB_LIB_WIDEN);
3140                         if (remainder)
3141                           return gen_lowpart (mode, remainder);
3142                       }
3143                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3144                                              build_int_2 (pre_shift, 0),
3145                                              tquotient, 0);
3146                   }
3147                 else
3148                   {
3149                     rtx t1, t2, t3, t4;
3150
3151                     mh = choose_multiplier (d, size, size - 1,
3152                                             &ml, &post_shift, &lgup);
3153                     if (mh)
3154                       abort ();
3155
3156                     t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3157                                        build_int_2 (size - 1, 0), NULL_RTX, 0);
3158                     t2 = expand_binop (compute_mode, xor_optab, op0, t1,
3159                                        NULL_RTX, 0, OPTAB_WIDEN);
3160                     extra_cost = (shift_cost[post_shift]
3161                                   + shift_cost[size - 1] + 2 * add_cost);
3162                     t3 = expand_mult_highpart (compute_mode, t2, ml,
3163                                                NULL_RTX, 1,
3164                                                max_cost - extra_cost);
3165                     if (t3 != 0)
3166                       {
3167                         t4 = expand_shift (RSHIFT_EXPR, compute_mode, t3,
3168                                            build_int_2 (post_shift, 0),
3169                                            NULL_RTX, 1);
3170                         quotient = expand_binop (compute_mode, xor_optab,
3171                                                  t4, t1, tquotient, 0,
3172                                                  OPTAB_WIDEN);
3173                       }
3174                   }
3175               }
3176             else
3177               {
3178                 rtx nsign, t1, t2, t3, t4;
3179                 t1 = force_operand (gen_rtx (PLUS, compute_mode,
3180                                              op0, constm1_rtx), NULL_RTX);
3181                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
3182                                    0, OPTAB_WIDEN);
3183                 nsign = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3184                                       build_int_2 (size - 1, 0), NULL_RTX, 0);
3185                 t3 = force_operand (gen_rtx (MINUS, compute_mode, t1, nsign),
3186                                     NULL_RTX);
3187                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
3188                                     NULL_RTX, 0);
3189                 if (t4)
3190                   {
3191                     rtx t5;
3192                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
3193                                       NULL_RTX, 0);
3194                     quotient = force_operand (gen_rtx (PLUS, compute_mode,
3195                                                        t4, t5),
3196                                               tquotient);
3197                   }
3198               }
3199           }
3200
3201         if (quotient != 0)
3202           break;
3203         delete_insns_since (last);
3204
3205         /* Try using an instruction that produces both the quotient and
3206            remainder, using truncation.  We can easily compensate the quotient
3207            or remainder to get floor rounding, once we have the remainder.
3208            Notice that we compute also the final remainder value here,
3209            and return the result right away.  */
3210         if (target == 0 || GET_MODE (target) != compute_mode)
3211           target = gen_reg_rtx (compute_mode);
3212
3213         if (rem_flag)
3214           {
3215             remainder
3216               = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode);
3217             quotient = gen_reg_rtx (compute_mode);
3218           }
3219         else
3220           {
3221             quotient
3222               = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode);
3223             remainder = gen_reg_rtx (compute_mode);
3224           }
3225
3226         if (expand_twoval_binop (sdivmod_optab, op0, op1,
3227                                  quotient, remainder, 0))
3228           {
3229             /* This could be computed with a branch-less sequence.
3230                Save that for later.  */
3231             rtx tem;
3232             rtx label = gen_label_rtx ();
3233             emit_cmp_insn (remainder, const0_rtx, EQ, NULL_RTX,
3234                            compute_mode, 0, 0);
3235             emit_jump_insn (gen_beq (label));
3236             tem = expand_binop (compute_mode, xor_optab, op0, op1,
3237                                 NULL_RTX, 0, OPTAB_WIDEN);
3238             emit_cmp_insn (tem, const0_rtx, GE, NULL_RTX, compute_mode, 0, 0);
3239             emit_jump_insn (gen_bge (label));
3240             expand_dec (quotient, const1_rtx);
3241             expand_inc (remainder, op1);
3242             emit_label (label);
3243             return gen_lowpart (mode, rem_flag ? remainder : quotient);
3244           }
3245
3246         /* No luck with division elimination or divmod.  Have to do it
3247            by conditionally adjusting op0 *and* the result.  */
3248         {
3249           rtx label1, label2, label3, label4, label5;
3250           rtx adjusted_op0;
3251           rtx tem;
3252
3253           quotient = gen_reg_rtx (compute_mode);
3254           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3255           label1 = gen_label_rtx ();
3256           label2 = gen_label_rtx ();
3257           label3 = gen_label_rtx ();
3258           label4 = gen_label_rtx ();
3259           label5 = gen_label_rtx ();
3260           emit_cmp_insn (op1, const0_rtx, LT, NULL_RTX, compute_mode, 0, 0);
3261           emit_jump_insn (gen_blt (label2));
3262           emit_cmp_insn (adjusted_op0, const0_rtx, LT, NULL_RTX,
3263                          compute_mode, 0, 0);
3264           emit_jump_insn (gen_blt (label1));
3265           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3266                               quotient, 0, OPTAB_LIB_WIDEN);
3267           if (tem != quotient)
3268             emit_move_insn (quotient, tem);
3269           emit_jump_insn (gen_jump (label5));
3270           emit_barrier ();
3271           emit_label (label1);
3272           expand_inc (adjusted_op0, const1_rtx);
3273           emit_jump_insn (gen_jump (label4));
3274           emit_barrier ();
3275           emit_label (label2);
3276           emit_cmp_insn (adjusted_op0, const0_rtx, GT, NULL_RTX,
3277                          compute_mode, 0, 0);
3278           emit_jump_insn (gen_bgt (label3));
3279           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3280                               quotient, 0, OPTAB_LIB_WIDEN);
3281           if (tem != quotient)
3282             emit_move_insn (quotient, tem);
3283           emit_jump_insn (gen_jump (label5));
3284           emit_barrier ();
3285           emit_label (label3);
3286           expand_dec (adjusted_op0, const1_rtx);
3287           emit_label (label4);
3288           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3289                               quotient, 0, OPTAB_LIB_WIDEN);
3290           if (tem != quotient)
3291             emit_move_insn (quotient, tem);
3292           expand_dec (quotient, const1_rtx);
3293           emit_label (label5);
3294         }
3295         break;
3296
3297       case CEIL_DIV_EXPR:
3298       case CEIL_MOD_EXPR:
3299         if (unsignedp)
3300           {
3301             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
3302               {
3303                 rtx t1, t2, t3;
3304                 unsigned HOST_WIDE_INT d = INTVAL (op1);
3305                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3306                                    build_int_2 (floor_log2 (d), 0),
3307                                    tquotient, 1);
3308                 t2 = expand_binop (compute_mode, and_optab, op0,
3309                                    GEN_INT (d - 1),
3310                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3311                 t3 = gen_reg_rtx (compute_mode);
3312                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
3313                                       compute_mode, 1, 1);
3314                 if (t3 == 0)
3315                   {
3316                     rtx lab;
3317                     lab = gen_label_rtx ();
3318                     emit_cmp_insn (t2, const0_rtx, EQ, NULL_RTX,
3319                                    compute_mode, 0, 0);
3320                     emit_jump_insn (gen_beq (lab));
3321                     expand_inc (t1, const1_rtx);
3322                     emit_label (lab);
3323                     quotient = t1;
3324                   }
3325                 else
3326                   quotient = force_operand (gen_rtx (PLUS, compute_mode,
3327                                                      t1, t3),
3328                                             tquotient);
3329                 break;
3330               }
3331
3332             /* Try using an instruction that produces both the quotient and
3333                remainder, using truncation.  We can easily compensate the
3334                quotient or remainder to get ceiling rounding, once we have the
3335                remainder.  Notice that we compute also the final remainder
3336                value here, and return the result right away.  */
3337             if (target == 0 || GET_MODE (target) != compute_mode)
3338               target = gen_reg_rtx (compute_mode);
3339
3340             if (rem_flag)
3341               {
3342                 remainder = (GET_CODE (target) == REG
3343                              ? target : gen_reg_rtx (compute_mode));
3344                 quotient = gen_reg_rtx (compute_mode);
3345               }
3346             else
3347               {
3348                 quotient = (GET_CODE (target) == REG
3349                             ? target : gen_reg_rtx (compute_mode));
3350                 remainder = gen_reg_rtx (compute_mode);
3351               }
3352
3353             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
3354                                      remainder, 1))
3355               {
3356                 /* This could be computed with a branch-less sequence.
3357                    Save that for later.  */
3358                 rtx label = gen_label_rtx ();
3359                 emit_cmp_insn (remainder, const0_rtx, EQ, NULL_RTX,
3360                                compute_mode, 0, 0);
3361                 emit_jump_insn (gen_beq (label));
3362                 expand_inc (quotient, const1_rtx);
3363                 expand_dec (remainder, op1);
3364                 emit_label (label);
3365                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
3366               }
3367
3368             /* No luck with division elimination or divmod.  Have to do it
3369                by conditionally adjusting op0 *and* the result.  */
3370             {
3371               rtx label1, label2;
3372               rtx adjusted_op0, tem;
3373
3374               quotient = gen_reg_rtx (compute_mode);
3375               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3376               label1 = gen_label_rtx ();
3377               label2 = gen_label_rtx ();
3378               emit_cmp_insn (adjusted_op0, const0_rtx, NE, NULL_RTX,
3379                              compute_mode, 0, 0);
3380               emit_jump_insn (gen_bne (label1));
3381               emit_move_insn  (quotient, const0_rtx);
3382               emit_jump_insn (gen_jump (label2));
3383               emit_barrier ();
3384               emit_label (label1);
3385               expand_dec (adjusted_op0, const1_rtx);
3386               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
3387                                   quotient, 1, OPTAB_LIB_WIDEN);
3388               if (tem != quotient)
3389                 emit_move_insn (quotient, tem);
3390               expand_inc (quotient, const1_rtx);
3391               emit_label (label2);
3392             }
3393           }
3394         else /* signed */
3395           {
3396             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3397                 && INTVAL (op1) >= 0)
3398               {
3399                 /* This is extremely similar to the code for the unsigned case
3400                    above.  For 2.7 we should merge these variants, but for
3401                    2.6.1 I don't want to touch the code for unsigned since that
3402                    get used in C.  The signed case will only be used by other
3403                    languages (Ada).  */
3404
3405                 rtx t1, t2, t3;
3406                 unsigned HOST_WIDE_INT d = INTVAL (op1);
3407                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3408                                    build_int_2 (floor_log2 (d), 0),
3409                                    tquotient, 0);
3410                 t2 = expand_binop (compute_mode, and_optab, op0,
3411                                    GEN_INT (d - 1),
3412                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3413                 t3 = gen_reg_rtx (compute_mode);
3414                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
3415                                       compute_mode, 1, 1);
3416                 if (t3 == 0)
3417                   {
3418                     rtx lab;
3419                     lab = gen_label_rtx ();
3420                     emit_cmp_insn (t2, const0_rtx, EQ, NULL_RTX,
3421                                    compute_mode, 0, 0);
3422                     emit_jump_insn (gen_beq (lab));
3423                     expand_inc (t1, const1_rtx);
3424                     emit_label (lab);
3425                     quotient = t1;
3426                   }
3427                 else
3428                   quotient = force_operand (gen_rtx (PLUS, compute_mode,
3429                                                      t1, t3),
3430                                             tquotient);
3431                 break;
3432               }
3433
3434             /* Try using an instruction that produces both the quotient and
3435                remainder, using truncation.  We can easily compensate the
3436                quotient or remainder to get ceiling rounding, once we have the
3437                remainder.  Notice that we compute also the final remainder
3438                value here, and return the result right away.  */
3439             if (target == 0 || GET_MODE (target) != compute_mode)
3440               target = gen_reg_rtx (compute_mode);
3441             if (rem_flag)
3442               {
3443                 remainder= (GET_CODE (target) == REG
3444                             ? target : gen_reg_rtx (compute_mode));
3445                 quotient = gen_reg_rtx (compute_mode);
3446               }
3447             else
3448               {
3449                 quotient = (GET_CODE (target) == REG
3450                             ? target : gen_reg_rtx (compute_mode));
3451                 remainder = gen_reg_rtx (compute_mode);
3452               }
3453
3454             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
3455                                      remainder, 0))
3456               {
3457                 /* This could be computed with a branch-less sequence.
3458                    Save that for later.  */
3459                 rtx tem;
3460                 rtx label = gen_label_rtx ();
3461                 emit_cmp_insn (remainder, const0_rtx, EQ, NULL_RTX,
3462                                compute_mode, 0, 0);
3463                 emit_jump_insn (gen_beq (label));
3464                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
3465                                     NULL_RTX, 0, OPTAB_WIDEN);
3466                 emit_cmp_insn (tem, const0_rtx, LT, NULL_RTX,
3467                                compute_mode, 0, 0);
3468                 emit_jump_insn (gen_blt (label));
3469                 expand_inc (quotient, const1_rtx);
3470                 expand_dec (remainder, op1);
3471                 emit_label (label);
3472                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
3473               }
3474
3475             /* No luck with division elimination or divmod.  Have to do it
3476                by conditionally adjusting op0 *and* the result.  */
3477             {
3478               rtx label1, label2, label3, label4, label5;
3479               rtx adjusted_op0;
3480               rtx tem;
3481
3482               quotient = gen_reg_rtx (compute_mode);
3483               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3484               label1 = gen_label_rtx ();
3485               label2 = gen_label_rtx ();
3486               label3 = gen_label_rtx ();
3487               label4 = gen_label_rtx ();
3488               label5 = gen_label_rtx ();
3489               emit_cmp_insn (op1, const0_rtx, LT, NULL_RTX,
3490                              compute_mode, 0, 0);
3491               emit_jump_insn (gen_blt (label2));
3492               emit_cmp_insn (adjusted_op0, const0_rtx, GT, NULL_RTX,
3493                              compute_mode, 0, 0);
3494               emit_jump_insn (gen_bgt (label1));
3495               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3496                                   quotient, 0, OPTAB_LIB_WIDEN);
3497               if (tem != quotient)
3498                 emit_move_insn (quotient, tem);
3499               emit_jump_insn (gen_jump (label5));
3500               emit_barrier ();
3501               emit_label (label1);
3502               expand_dec (adjusted_op0, const1_rtx);
3503               emit_jump_insn (gen_jump (label4));
3504               emit_barrier ();
3505               emit_label (label2);
3506               emit_cmp_insn (adjusted_op0, const0_rtx, LT, NULL_RTX,
3507                              compute_mode, 0, 0);
3508               emit_jump_insn (gen_blt (label3));
3509               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3510                                   quotient, 0, OPTAB_LIB_WIDEN);
3511               if (tem != quotient)
3512                 emit_move_insn (quotient, tem);
3513               emit_jump_insn (gen_jump (label5));
3514               emit_barrier ();
3515               emit_label (label3);
3516               expand_inc (adjusted_op0, const1_rtx);
3517               emit_label (label4);
3518               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3519                                   quotient, 0, OPTAB_LIB_WIDEN);
3520               if (tem != quotient)
3521                 emit_move_insn (quotient, tem);
3522               expand_inc (quotient, const1_rtx);
3523               emit_label (label5);
3524             }
3525           }
3526         break;
3527
3528       case EXACT_DIV_EXPR:
3529         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
3530           {
3531             HOST_WIDE_INT d = INTVAL (op1);
3532             unsigned HOST_WIDE_INT ml;
3533             int post_shift;
3534             rtx t1;
3535
3536             post_shift = floor_log2 (d & -d);
3537             ml = invert_mod2n (d >> post_shift, size);
3538             t1 = expand_mult (compute_mode, op0, GEN_INT (ml), NULL_RTX,
3539                               unsignedp);
3540             quotient = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3541                                      build_int_2 (post_shift, 0),
3542                                      NULL_RTX, unsignedp);
3543
3544             insn = get_last_insn ();
3545             REG_NOTES (insn)
3546               = gen_rtx (EXPR_LIST, REG_EQUAL,
3547                          gen_rtx (unsignedp ? UDIV : DIV, compute_mode,
3548                                   op0, op1),
3549                          REG_NOTES (insn));
3550           }
3551         break;
3552
3553       case ROUND_DIV_EXPR:
3554       case ROUND_MOD_EXPR:
3555         if (unsignedp)
3556           {
3557             rtx tem;
3558             rtx label;
3559             label = gen_label_rtx ();
3560             quotient = gen_reg_rtx (compute_mode);
3561             remainder = gen_reg_rtx (compute_mode);
3562             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
3563               {
3564                 rtx tem;
3565                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
3566                                          quotient, 1, OPTAB_LIB_WIDEN);
3567                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
3568                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
3569                                           remainder, 1, OPTAB_LIB_WIDEN);
3570               }
3571             tem = plus_constant (op1, -1);
3572             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
3573                                 build_int_2 (1, 0), NULL_RTX, 1);
3574             emit_cmp_insn (remainder, tem, LEU, NULL_RTX, compute_mode, 0, 0);
3575             emit_jump_insn (gen_bleu (label));
3576             expand_inc (quotient, const1_rtx);
3577             expand_dec (remainder, op1);
3578             emit_label (label);
3579           }
3580         else
3581           {
3582             rtx abs_rem, abs_op1, tem, mask;
3583             rtx label;
3584             label = gen_label_rtx ();
3585             quotient = gen_reg_rtx (compute_mode);
3586             remainder = gen_reg_rtx (compute_mode);
3587             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
3588               {
3589                 rtx tem;
3590                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
3591                                          quotient, 0, OPTAB_LIB_WIDEN);
3592                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
3593                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
3594                                           remainder, 0, OPTAB_LIB_WIDEN);
3595               }
3596             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 0, 0);
3597             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 0, 0);
3598             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
3599                                 build_int_2 (1, 0), NULL_RTX, 1);
3600             emit_cmp_insn (tem, abs_op1, LTU, NULL_RTX, compute_mode, 0, 0);
3601             emit_jump_insn (gen_bltu (label));
3602             tem = expand_binop (compute_mode, xor_optab, op0, op1,
3603                                 NULL_RTX, 0, OPTAB_WIDEN);
3604             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
3605                                 build_int_2 (size - 1, 0), NULL_RTX, 0);
3606             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
3607                                 NULL_RTX, 0, OPTAB_WIDEN);
3608             tem = expand_binop (compute_mode, sub_optab, tem, mask,
3609                                 NULL_RTX, 0, OPTAB_WIDEN);
3610             expand_inc (quotient, tem);
3611             tem = expand_binop (compute_mode, xor_optab, mask, op1,
3612                                 NULL_RTX, 0, OPTAB_WIDEN);
3613             tem = expand_binop (compute_mode, sub_optab, tem, mask,
3614                                 NULL_RTX, 0, OPTAB_WIDEN);
3615             expand_dec (remainder, tem);
3616             emit_label (label);
3617           }
3618         return gen_lowpart (mode, rem_flag ? remainder : quotient);
3619       }
3620
3621   if (quotient == 0)
3622     {
3623       if (target && GET_MODE (target) != compute_mode)
3624         target = 0;
3625
3626       if (rem_flag)
3627         {
3628           /* Try to produce the remainder directly without a library call.  */
3629           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
3630                                          op0, op1, target,
3631                                          unsignedp, OPTAB_WIDEN);
3632           if (remainder == 0)
3633             {
3634               /* No luck there.  Can we do remainder and divide at once
3635                  without a library call?  */
3636               remainder = gen_reg_rtx (compute_mode);
3637               if (! expand_twoval_binop ((unsignedp
3638                                           ? udivmod_optab
3639                                           : sdivmod_optab),
3640                                          op0, op1,
3641                                          NULL_RTX, remainder, unsignedp))
3642                 remainder = 0;
3643             }
3644
3645           if (remainder)
3646             return gen_lowpart (mode, remainder);
3647         }
3648
3649       /* Produce the quotient.  */
3650       /* Try a quotient insn, but not a library call.  */
3651       quotient = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
3652                                     op0, op1, rem_flag ? NULL_RTX : target,
3653                                     unsignedp, OPTAB_WIDEN);
3654       if (quotient == 0)
3655         {
3656           /* No luck there.  Try a quotient-and-remainder insn,
3657              keeping the quotient alone.  */
3658           quotient = gen_reg_rtx (compute_mode);
3659           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
3660                                      op0, op1,
3661                                      quotient, NULL_RTX, unsignedp))
3662             {
3663               quotient = 0;
3664               if (! rem_flag)
3665                 /* Still no luck.  If we are not computing the remainder,
3666                    use a library call for the quotient.  */
3667                 quotient = sign_expand_binop (compute_mode,
3668                                               udiv_optab, sdiv_optab,
3669                                               op0, op1, target,
3670                                               unsignedp, OPTAB_LIB_WIDEN);
3671             }
3672         }
3673     }
3674
3675   if (rem_flag)
3676     {
3677       if (target && GET_MODE (target) != compute_mode)
3678         target = 0;
3679
3680       if (quotient == 0)
3681         /* No divide instruction either.  Use library for remainder.  */
3682         remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
3683                                        op0, op1, target,
3684                                        unsignedp, OPTAB_LIB_WIDEN);
3685       else
3686         {
3687           /* We divided.  Now finish doing X - Y * (X / Y).  */
3688           remainder = expand_mult (compute_mode, quotient, op1,
3689                                    NULL_RTX, unsignedp);
3690           remainder = expand_binop (compute_mode, sub_optab, op0,
3691                                     remainder, target, unsignedp,
3692                                     OPTAB_LIB_WIDEN);
3693         }
3694     }
3695
3696   return gen_lowpart (mode, rem_flag ? remainder : quotient);
3697 }
3698 \f
3699 /* Return a tree node with data type TYPE, describing the value of X.
3700    Usually this is an RTL_EXPR, if there is no obvious better choice.
3701    X may be an expression, however we only support those expressions
3702    generated by loop.c.   */
3703
3704 tree
3705 make_tree (type, x)
3706      tree type;
3707      rtx x;
3708 {
3709   tree t;
3710
3711   switch (GET_CODE (x))
3712     {
3713     case CONST_INT:
3714       t = build_int_2 (INTVAL (x),
3715                        TREE_UNSIGNED (type) || INTVAL (x) >= 0 ? 0 : -1);
3716       TREE_TYPE (t) = type;
3717       return t;
3718
3719     case CONST_DOUBLE:
3720       if (GET_MODE (x) == VOIDmode)
3721         {
3722           t = build_int_2 (CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
3723           TREE_TYPE (t) = type;
3724         }
3725       else
3726         {
3727           REAL_VALUE_TYPE d;
3728
3729           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
3730           t = build_real (type, d);
3731         }
3732
3733       return t;
3734
3735     case PLUS:
3736       return fold (build (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
3737                           make_tree (type, XEXP (x, 1))));
3738
3739     case MINUS:
3740       return fold (build (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
3741                           make_tree (type, XEXP (x, 1))));
3742
3743     case NEG:
3744       return fold (build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0))));
3745
3746     case MULT:
3747       return fold (build (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
3748                           make_tree (type, XEXP (x, 1))));
3749
3750     case ASHIFT:
3751       return fold (build (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
3752                           make_tree (type, XEXP (x, 1))));
3753
3754     case LSHIFTRT:
3755       return fold (convert (type,
3756                             build (RSHIFT_EXPR, unsigned_type (type),
3757                                    make_tree (unsigned_type (type),
3758                                               XEXP (x, 0)),
3759                                    make_tree (type, XEXP (x, 1)))));
3760
3761     case ASHIFTRT:
3762       return fold (convert (type,
3763                             build (RSHIFT_EXPR, signed_type (type),
3764                                    make_tree (signed_type (type), XEXP (x, 0)),
3765                                    make_tree (type, XEXP (x, 1)))));
3766
3767     case DIV:
3768       if (TREE_CODE (type) != REAL_TYPE)
3769         t = signed_type (type);
3770       else
3771         t = type;
3772
3773       return fold (convert (type,
3774                             build (TRUNC_DIV_EXPR, t,
3775                                    make_tree (t, XEXP (x, 0)),
3776                                    make_tree (t, XEXP (x, 1)))));
3777     case UDIV:
3778       t = unsigned_type (type);
3779       return fold (convert (type,
3780                             build (TRUNC_DIV_EXPR, t,
3781                                    make_tree (t, XEXP (x, 0)),
3782                                    make_tree (t, XEXP (x, 1)))));
3783    default:
3784       t = make_node (RTL_EXPR);
3785       TREE_TYPE (t) = type;
3786       RTL_EXPR_RTL (t) = x;
3787       /* There are no insns to be output
3788          when this rtl_expr is used.  */
3789       RTL_EXPR_SEQUENCE (t) = 0;
3790       return t;
3791     }
3792 }
3793
3794 /* Return an rtx representing the value of X * MULT + ADD.
3795    TARGET is a suggestion for where to store the result (an rtx).
3796    MODE is the machine mode for the computation.
3797    X and MULT must have mode MODE.  ADD may have a different mode.
3798    So can X (defaults to same as MODE).
3799    UNSIGNEDP is non-zero to do unsigned multiplication.
3800    This may emit insns.  */
3801
3802 rtx
3803 expand_mult_add (x, target, mult, add, mode, unsignedp)
3804      rtx x, target, mult, add;
3805      enum machine_mode mode;
3806      int unsignedp;
3807 {
3808   tree type = type_for_mode (mode, unsignedp);
3809   tree add_type = (GET_MODE (add) == VOIDmode
3810                    ? type : type_for_mode (GET_MODE (add), unsignedp));
3811   tree result =  fold (build (PLUS_EXPR, type,
3812                               fold (build (MULT_EXPR, type,
3813                                            make_tree (type, x),
3814                                            make_tree (type, mult))),
3815                               make_tree (add_type, add)));
3816
3817   return expand_expr (result, target, VOIDmode, 0);
3818 }
3819 \f
3820 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
3821    and returning TARGET.
3822
3823    If TARGET is 0, a pseudo-register or constant is returned.  */
3824
3825 rtx
3826 expand_and (op0, op1, target)
3827      rtx op0, op1, target;
3828 {
3829   enum machine_mode mode = VOIDmode;
3830   rtx tem;
3831
3832   if (GET_MODE (op0) != VOIDmode)
3833     mode = GET_MODE (op0);
3834   else if (GET_MODE (op1) != VOIDmode)
3835     mode = GET_MODE (op1);
3836
3837   if (mode != VOIDmode)
3838     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
3839   else if (GET_CODE (op0) == CONST_INT && GET_CODE (op1) == CONST_INT)
3840     tem = GEN_INT (INTVAL (op0) & INTVAL (op1));
3841   else
3842     abort ();
3843
3844   if (target == 0)
3845     target = tem;
3846   else if (tem != target)
3847     emit_move_insn (target, tem);
3848   return target;
3849 }
3850 \f
3851 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
3852    and storing in TARGET.  Normally return TARGET.
3853    Return 0 if that cannot be done.
3854
3855    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
3856    it is VOIDmode, they cannot both be CONST_INT.
3857
3858    UNSIGNEDP is for the case where we have to widen the operands
3859    to perform the operation.  It says to use zero-extension.
3860
3861    NORMALIZEP is 1 if we should convert the result to be either zero
3862    or one one.  Normalize is -1 if we should convert the result to be
3863    either zero or -1.  If NORMALIZEP is zero, the result will be left
3864    "raw" out of the scc insn.  */
3865
3866 rtx
3867 emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep)
3868      rtx target;
3869      enum rtx_code code;
3870      rtx op0, op1;
3871      enum machine_mode mode;
3872      int unsignedp;
3873      int normalizep;
3874 {
3875   rtx subtarget;
3876   enum insn_code icode;
3877   enum machine_mode compare_mode;
3878   enum machine_mode target_mode = GET_MODE (target);
3879   rtx tem;
3880   rtx last = get_last_insn ();
3881   rtx pattern, comparison;
3882
3883   /* If one operand is constant, make it the second one.  Only do this
3884      if the other operand is not constant as well.  */
3885
3886   if ((CONSTANT_P (op0) && ! CONSTANT_P (op1))
3887       || (GET_CODE (op0) == CONST_INT && GET_CODE (op1) != CONST_INT))
3888     {
3889       tem = op0;
3890       op0 = op1;
3891       op1 = tem;
3892       code = swap_condition (code);
3893     }
3894
3895   if (mode == VOIDmode)
3896     mode = GET_MODE (op0);
3897
3898   /* For some comparisons with 1 and -1, we can convert this to
3899      comparisons with zero.  This will often produce more opportunities for
3900      store-flag insns. */
3901
3902   switch (code)
3903     {
3904     case LT:
3905       if (op1 == const1_rtx)
3906         op1 = const0_rtx, code = LE;
3907       break;
3908     case LE:
3909       if (op1 == constm1_rtx)
3910         op1 = const0_rtx, code = LT;
3911       break;
3912     case GE:
3913       if (op1 == const1_rtx)
3914         op1 = const0_rtx, code = GT;
3915       break;
3916     case GT:
3917       if (op1 == constm1_rtx)
3918         op1 = const0_rtx, code = GE;
3919       break;
3920     case GEU:
3921       if (op1 == const1_rtx)
3922         op1 = const0_rtx, code = NE;
3923       break;
3924     case LTU:
3925       if (op1 == const1_rtx)
3926         op1 = const0_rtx, code = EQ;
3927       break;
3928     }
3929
3930   /* From now on, we won't change CODE, so set ICODE now.  */
3931   icode = setcc_gen_code[(int) code];
3932
3933   /* If this is A < 0 or A >= 0, we can do this by taking the ones
3934      complement of A (for GE) and shifting the sign bit to the low bit.  */
3935   if (op1 == const0_rtx && (code == LT || code == GE)
3936       && GET_MODE_CLASS (mode) == MODE_INT
3937       && (normalizep || STORE_FLAG_VALUE == 1
3938           || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
3939               && (STORE_FLAG_VALUE
3940                   == (HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
3941     {
3942       subtarget = target;
3943
3944       /* If the result is to be wider than OP0, it is best to convert it
3945          first.  If it is to be narrower, it is *incorrect* to convert it
3946          first.  */
3947       if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
3948         {
3949           op0 = protect_from_queue (op0, 0);
3950           op0 = convert_modes (target_mode, mode, op0, 0);
3951           mode = target_mode;
3952         }
3953
3954       if (target_mode != mode)
3955         subtarget = 0;
3956
3957       if (code == GE)
3958         op0 = expand_unop (mode, one_cmpl_optab, op0, subtarget, 0);
3959
3960       if (normalizep || STORE_FLAG_VALUE == 1)
3961         /* If we are supposed to produce a 0/1 value, we want to do
3962            a logical shift from the sign bit to the low-order bit; for
3963            a -1/0 value, we do an arithmetic shift.  */
3964         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
3965                             size_int (GET_MODE_BITSIZE (mode) - 1),
3966                             subtarget, normalizep != -1);
3967
3968       if (mode != target_mode)
3969         op0 = convert_modes (target_mode, mode, op0, 0);
3970
3971       return op0;
3972     }
3973
3974   if (icode != CODE_FOR_nothing)
3975     {
3976       /* We think we may be able to do this with a scc insn.  Emit the
3977          comparison and then the scc insn.
3978
3979          compare_from_rtx may call emit_queue, which would be deleted below
3980          if the scc insn fails.  So call it ourselves before setting LAST.  */
3981
3982       emit_queue ();
3983       last = get_last_insn ();
3984
3985       comparison
3986         = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX, 0);
3987       if (GET_CODE (comparison) == CONST_INT)
3988         return (comparison == const0_rtx ? const0_rtx
3989                 : normalizep == 1 ? const1_rtx
3990                 : normalizep == -1 ? constm1_rtx
3991                 : const_true_rtx);
3992
3993       /* If the code of COMPARISON doesn't match CODE, something is
3994          wrong; we can no longer be sure that we have the operation.
3995          We could handle this case, but it should not happen.  */
3996
3997       if (GET_CODE (comparison) != code)
3998         abort ();
3999
4000       /* Get a reference to the target in the proper mode for this insn.  */
4001       compare_mode = insn_operand_mode[(int) icode][0];
4002       subtarget = target;
4003       if (preserve_subexpressions_p ()
4004           || ! (*insn_operand_predicate[(int) icode][0]) (subtarget, compare_mode))
4005         subtarget = gen_reg_rtx (compare_mode);
4006
4007       pattern = GEN_FCN (icode) (subtarget);
4008       if (pattern)
4009         {
4010           emit_insn (pattern);
4011
4012           /* If we are converting to a wider mode, first convert to
4013              TARGET_MODE, then normalize.  This produces better combining
4014              opportunities on machines that have a SIGN_EXTRACT when we are
4015              testing a single bit.  This mostly benefits the 68k.
4016
4017              If STORE_FLAG_VALUE does not have the sign bit set when
4018              interpreted in COMPARE_MODE, we can do this conversion as
4019              unsigned, which is usually more efficient.  */
4020           if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (compare_mode))
4021             {
4022               convert_move (target, subtarget,
4023                             (GET_MODE_BITSIZE (compare_mode)
4024                              <= HOST_BITS_PER_WIDE_INT)
4025                             && 0 == (STORE_FLAG_VALUE
4026                                      & ((HOST_WIDE_INT) 1
4027                                         << (GET_MODE_BITSIZE (compare_mode) -1))));
4028               op0 = target;
4029               compare_mode = target_mode;
4030             }
4031           else
4032             op0 = subtarget;
4033
4034           /* If we want to keep subexpressions around, don't reuse our
4035              last target.  */
4036
4037           if (preserve_subexpressions_p ())
4038             subtarget = 0;
4039
4040           /* Now normalize to the proper value in COMPARE_MODE.  Sometimes
4041              we don't have to do anything.  */
4042           if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
4043             ;
4044           else if (normalizep == - STORE_FLAG_VALUE)
4045             op0 = expand_unop (compare_mode, neg_optab, op0, subtarget, 0);
4046
4047           /* We don't want to use STORE_FLAG_VALUE < 0 below since this
4048              makes it hard to use a value of just the sign bit due to
4049              ANSI integer constant typing rules.  */
4050           else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_WIDE_INT
4051                    && (STORE_FLAG_VALUE
4052                        & ((HOST_WIDE_INT) 1
4053                           << (GET_MODE_BITSIZE (compare_mode) - 1))))
4054             op0 = expand_shift (RSHIFT_EXPR, compare_mode, op0,
4055                                 size_int (GET_MODE_BITSIZE (compare_mode) - 1),
4056                                 subtarget, normalizep == 1);
4057           else if (STORE_FLAG_VALUE & 1)
4058             {
4059               op0 = expand_and (op0, const1_rtx, subtarget);
4060               if (normalizep == -1)
4061                 op0 = expand_unop (compare_mode, neg_optab, op0, op0, 0);
4062             }
4063           else
4064             abort ();
4065
4066           /* If we were converting to a smaller mode, do the
4067              conversion now.  */
4068           if (target_mode != compare_mode)
4069             {
4070               convert_move (target, op0, 0);
4071               return target;
4072             }
4073           else
4074             return op0;
4075         }
4076     }
4077
4078   delete_insns_since (last);
4079
4080   /* If expensive optimizations, use different pseudo registers for each
4081      insn, instead of reusing the same pseudo.  This leads to better CSE,
4082      but slows down the compiler, since there are more pseudos */
4083   subtarget = (!flag_expensive_optimizations
4084                && (target_mode == mode)) ? target : NULL_RTX;
4085
4086   /* If we reached here, we can't do this with a scc insn.  However, there
4087      are some comparisons that can be done directly.  For example, if
4088      this is an equality comparison of integers, we can try to exclusive-or
4089      (or subtract) the two operands and use a recursive call to try the
4090      comparison with zero.  Don't do any of these cases if branches are
4091      very cheap.  */
4092
4093   if (BRANCH_COST > 0
4094       && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
4095       && op1 != const0_rtx)
4096     {
4097       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
4098                           OPTAB_WIDEN);
4099
4100       if (tem == 0)
4101         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
4102                             OPTAB_WIDEN);
4103       if (tem != 0)
4104         tem = emit_store_flag (target, code, tem, const0_rtx,
4105                                mode, unsignedp, normalizep);
4106       if (tem == 0)
4107         delete_insns_since (last);
4108       return tem;
4109     }
4110
4111   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
4112      the constant zero.  Reject all other comparisons at this point.  Only
4113      do LE and GT if branches are expensive since they are expensive on
4114      2-operand machines.  */
4115
4116   if (BRANCH_COST == 0
4117       || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
4118       || (code != EQ && code != NE
4119           && (BRANCH_COST <= 1 || (code != LE && code != GT))))
4120     return 0;
4121
4122   /* See what we need to return.  We can only return a 1, -1, or the
4123      sign bit.  */
4124
4125   if (normalizep == 0)
4126     {
4127       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
4128         normalizep = STORE_FLAG_VALUE;
4129
4130       else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4131                && (STORE_FLAG_VALUE
4132                    == (HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
4133         ;
4134       else
4135         return 0;
4136     }
4137
4138   /* Try to put the result of the comparison in the sign bit.  Assume we can't
4139      do the necessary operation below.  */
4140
4141   tem = 0;
4142
4143   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
4144      the sign bit set.  */
4145
4146   if (code == LE)
4147     {
4148       /* This is destructive, so SUBTARGET can't be OP0.  */
4149       if (rtx_equal_p (subtarget, op0))
4150         subtarget = 0;
4151
4152       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
4153                           OPTAB_WIDEN);
4154       if (tem)
4155         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
4156                             OPTAB_WIDEN);
4157     }
4158
4159   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
4160      number of bits in the mode of OP0, minus one.  */
4161
4162   if (code == GT)
4163     {
4164       if (rtx_equal_p (subtarget, op0))
4165         subtarget = 0;
4166
4167       tem = expand_shift (RSHIFT_EXPR, mode, op0,
4168                           size_int (GET_MODE_BITSIZE (mode) - 1),
4169                           subtarget, 0);
4170       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
4171                           OPTAB_WIDEN);
4172     }
4173
4174   if (code == EQ || code == NE)
4175     {
4176       /* For EQ or NE, one way to do the comparison is to apply an operation
4177          that converts the operand into a positive number if it is non-zero
4178          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
4179          for NE we negate.  This puts the result in the sign bit.  Then we
4180          normalize with a shift, if needed.
4181
4182          Two operations that can do the above actions are ABS and FFS, so try
4183          them.  If that doesn't work, and MODE is smaller than a full word,
4184          we can use zero-extension to the wider mode (an unsigned conversion)
4185          as the operation.  */
4186
4187       if (abs_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
4188         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
4189       else if (ffs_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
4190         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
4191       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
4192         {
4193           op0 = protect_from_queue (op0, 0);
4194           tem = convert_modes (word_mode, mode, op0, 1);
4195           mode = word_mode;
4196         }
4197
4198       if (tem != 0)
4199         {
4200           if (code == EQ)
4201             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
4202                                 0, OPTAB_WIDEN);
4203           else
4204             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
4205         }
4206
4207       /* If we couldn't do it that way, for NE we can "or" the two's complement
4208          of the value with itself.  For EQ, we take the one's complement of
4209          that "or", which is an extra insn, so we only handle EQ if branches
4210          are expensive.  */
4211
4212       if (tem == 0 && (code == NE || BRANCH_COST > 1))
4213         {
4214           if (rtx_equal_p (subtarget, op0))
4215             subtarget = 0;
4216
4217           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
4218           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
4219                               OPTAB_WIDEN);
4220
4221           if (tem && code == EQ)
4222             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
4223         }
4224     }
4225
4226   if (tem && normalizep)
4227     tem = expand_shift (RSHIFT_EXPR, mode, tem,
4228                         size_int (GET_MODE_BITSIZE (mode) - 1),
4229                         subtarget, normalizep == 1);
4230
4231   if (tem)
4232     {
4233       if (GET_MODE (tem) != target_mode)
4234         {
4235           convert_move (target, tem, 0);
4236           tem = target;
4237         }
4238       else if (!subtarget)
4239         {
4240           emit_move_insn (target, tem);
4241           tem = target;
4242         }
4243     }
4244   else
4245     delete_insns_since (last);
4246
4247   return tem;
4248 }
4249   emit_jump_insn ((*bcc_gen_fctn[(int) code]) (label));
4250   emit_move_insn (target, const1_rtx);
4251   emit_label (label);
4252
4253   return target;
4254 }