gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 2, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING.  If not, write to the Free
  20 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  21 02111-1307, USA.  */
  22
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "coretypes.h"
  27 #include "tm.h"
  28 #include "toplev.h"
  29 #include "rtl.h"
  30 #include "tree.h"
  31 #include "tm_p.h"
  32 #include "flags.h"
  33 #include "insn-config.h"
  34 #include "expr.h"
  35 #include "optabs.h"
  36 #include "real.h"
  37 #include "recog.h"
  38 #include "langhooks.h"
  39
  40 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  41                                    unsigned HOST_WIDE_INT,
  42                                    unsigned HOST_WIDE_INT, rtx);
  43 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  44                                    unsigned HOST_WIDE_INT, rtx);
  45 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  46                                     unsigned HOST_WIDE_INT,
  47                                     unsigned HOST_WIDE_INT,
  48                                     unsigned HOST_WIDE_INT, rtx, int);
  49 static rtx mask_rtx (enum machine_mode, int, int, int);
  50 static rtx lshift_value (enum machine_mode, rtx, int, int);
  51 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  52                                     unsigned HOST_WIDE_INT, int);
  53 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  54
  55 /* Nonzero means divides or modulus operations are relatively cheap for
  56    powers of two, so don't use branches; emit the operation instead.
  57    Usually, this will mean that the MD file will emit non-branch
  58    sequences.  */
  59
  60 static int sdiv_pow2_cheap, smod_pow2_cheap;
  61
  62 #ifndef SLOW_UNALIGNED_ACCESS
  63 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  64 #endif
  65
  66 /* For compilers that support multiple targets with different word sizes,
  67    MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
  68    is the H8/300(H) compiler.  */
  69
  70 #ifndef MAX_BITS_PER_WORD
  71 #define MAX_BITS_PER_WORD BITS_PER_WORD
  72 #endif
  73
  74 /* Reduce conditional compilation elsewhere.  */
  75 #ifndef HAVE_insv
  76 #define HAVE_insv       0
  77 #define CODE_FOR_insv   CODE_FOR_nothing
  78 #define gen_insv(a,b,c,d) NULL_RTX
  79 #endif
  80 #ifndef HAVE_extv
  81 #define HAVE_extv       0
  82 #define CODE_FOR_extv   CODE_FOR_nothing
  83 #define gen_extv(a,b,c,d) NULL_RTX
  84 #endif
  85 #ifndef HAVE_extzv
  86 #define HAVE_extzv      0
  87 #define CODE_FOR_extzv  CODE_FOR_nothing
  88 #define gen_extzv(a,b,c,d) NULL_RTX
  89 #endif
  90
  91 /* Cost of various pieces of RTL.  Note that some of these are indexed by
  92    shift count and some by mode.  */
  93 static int add_cost, negate_cost, zero_cost;
  94 static int shift_cost[MAX_BITS_PER_WORD];
  95 static int shiftadd_cost[MAX_BITS_PER_WORD];
  96 static int shiftsub_cost[MAX_BITS_PER_WORD];
  97 static int mul_cost[NUM_MACHINE_MODES];
  98 static int div_cost[NUM_MACHINE_MODES];
  99 static int mul_widen_cost[NUM_MACHINE_MODES];
 100 static int mul_highpart_cost[NUM_MACHINE_MODES];
 101
 102 void
 103 init_expmed (void)
 104 {
 105   rtx reg, shift_insn, shiftadd_insn, shiftsub_insn;
 106   int dummy;
 107   int m;
 108   enum machine_mode mode, wider_mode;
 109
 110   start_sequence ();
 111
 112   /* This is "some random pseudo register" for purposes of calling recog
 113      to see what insns exist.  */
 114   reg = gen_rtx_REG (word_mode, 10000);
 115
 116   zero_cost = rtx_cost (const0_rtx, 0);
 117   add_cost = rtx_cost (gen_rtx_PLUS (word_mode, reg, reg), SET);
 118
 119   shift_insn = emit_insn (gen_rtx_SET (VOIDmode, reg,
 120                                        gen_rtx_ASHIFT (word_mode, reg,
 121                                                        const0_rtx)));
 122
 123   shiftadd_insn
 124     = emit_insn (gen_rtx_SET (VOIDmode, reg,
 125                               gen_rtx_PLUS (word_mode,
 126                                             gen_rtx_MULT (word_mode,
 127                                                           reg, const0_rtx),
 128                                             reg)));
 129
 130   shiftsub_insn
 131     = emit_insn (gen_rtx_SET (VOIDmode, reg,
 132                               gen_rtx_MINUS (word_mode,
 133                                              gen_rtx_MULT (word_mode,
 134                                                            reg, const0_rtx),
 135                                              reg)));
 136
 137   init_recog ();
 138
 139   shift_cost[0] = 0;
 140   shiftadd_cost[0] = shiftsub_cost[0] = add_cost;
 141
 142   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 143     {
 144       rtx c_int = GEN_INT ((HOST_WIDE_INT) 1 << m);
 145       shift_cost[m] = shiftadd_cost[m] = shiftsub_cost[m] = 32000;
 146
 147       XEXP (SET_SRC (PATTERN (shift_insn)), 1) = GEN_INT (m);
 148       if (recog (PATTERN (shift_insn), shift_insn, &dummy) >= 0)
 149         shift_cost[m] = rtx_cost (SET_SRC (PATTERN (shift_insn)), SET);
 150
 151       XEXP (XEXP (SET_SRC (PATTERN (shiftadd_insn)), 0), 1) = c_int;
 152       if (recog (PATTERN (shiftadd_insn), shiftadd_insn, &dummy) >= 0)
 153         shiftadd_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftadd_insn)), SET);
 154
 155       XEXP (XEXP (SET_SRC (PATTERN (shiftsub_insn)), 0), 1) = c_int;
 156       if (recog (PATTERN (shiftsub_insn), shiftsub_insn, &dummy) >= 0)
 157         shiftsub_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftsub_insn)), SET);
 158     }
 159
 160   negate_cost = rtx_cost (gen_rtx_NEG (word_mode, reg), SET);
 161
 162   sdiv_pow2_cheap
 163     = (rtx_cost (gen_rtx_DIV (word_mode, reg, GEN_INT (32)), SET)
 164        <= 2 * add_cost);
 165   smod_pow2_cheap
 166     = (rtx_cost (gen_rtx_MOD (word_mode, reg, GEN_INT (32)), SET)
 167        <= 2 * add_cost);
 168
 169   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 170        mode != VOIDmode;
 171        mode = GET_MODE_WIDER_MODE (mode))
 172     {
 173       reg = gen_rtx_REG (mode, 10000);
 174       div_cost[(int) mode] = rtx_cost (gen_rtx_UDIV (mode, reg, reg), SET);
 175       mul_cost[(int) mode] = rtx_cost (gen_rtx_MULT (mode, reg, reg), SET);
 176       wider_mode = GET_MODE_WIDER_MODE (mode);
 177       if (wider_mode != VOIDmode)
 178         {
 179           mul_widen_cost[(int) wider_mode]
 180             = rtx_cost (gen_rtx_MULT (wider_mode,
 181                                       gen_rtx_ZERO_EXTEND (wider_mode, reg),
 182                                       gen_rtx_ZERO_EXTEND (wider_mode, reg)),
 183                         SET);
 184           mul_highpart_cost[(int) mode]
 185             = rtx_cost (gen_rtx_TRUNCATE
 186                         (mode,
 187                          gen_rtx_LSHIFTRT (wider_mode,
 188                                            gen_rtx_MULT (wider_mode,
 189                                                          gen_rtx_ZERO_EXTEND
 190                                                          (wider_mode, reg),
 191                                                          gen_rtx_ZERO_EXTEND
 192                                                          (wider_mode, reg)),
 193                                            GEN_INT (GET_MODE_BITSIZE (mode)))),
 194                         SET);
 195         }
 196     }
 197
 198   end_sequence ();
 199 }
 200
 201 /* Return an rtx representing minus the value of X.
 202    MODE is the intended mode of the result,
 203    useful if X is a CONST_INT.  */
 204
 205 rtx
 206 negate_rtx (enum machine_mode mode, rtx x)
 207 {
 208   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 209
 210   if (result == 0)
 211     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 212
 213   return result;
 214 }
 215
 216 /* Report on the availability of insv/extv/extzv and the desired mode
 217    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
 218    is false; else the mode of the specified operand.  If OPNO is -1,
 219    all the caller cares about is whether the insn is available.  */
 220 enum machine_mode
 221 mode_for_extraction (enum extraction_pattern pattern, int opno)
 222 {
 223   const struct insn_data *data;
 224
 225   switch (pattern)
 226     {
 227     case EP_insv:
 228       if (HAVE_insv)
 229         {
 230           data = &insn_data[CODE_FOR_insv];
 231           break;
 232         }
 233       return MAX_MACHINE_MODE;
 234
 235     case EP_extv:
 236       if (HAVE_extv)
 237         {
 238           data = &insn_data[CODE_FOR_extv];
 239           break;
 240         }
 241       return MAX_MACHINE_MODE;
 242
 243     case EP_extzv:
 244       if (HAVE_extzv)
 245         {
 246           data = &insn_data[CODE_FOR_extzv];
 247           break;
 248         }
 249       return MAX_MACHINE_MODE;
 250
 251     default:
 252       abort ();
 253     }
 254
 255   if (opno == -1)
 256     return VOIDmode;
 257
 258   /* Everyone who uses this function used to follow it with
 259      if (result == VOIDmode) result = word_mode; */
 260   if (data->operand[opno].mode == VOIDmode)
 261     return word_mode;
 262   return data->operand[opno].mode;
 263 }
 264
 265 \f
 266 /* Generate code to store value from rtx VALUE
 267    into a bit-field within structure STR_RTX
 268    containing BITSIZE bits starting at bit BITNUM.
 269    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
 270    ALIGN is the alignment that STR_RTX is known to have.
 271    TOTAL_SIZE is the size of the structure in bytes, or -1 if varying.  */
 272
 273 /* ??? Note that there are two different ideas here for how
 274    to determine the size to count bits within, for a register.
 275    One is BITS_PER_WORD, and the other is the size of operand 3
 276    of the insv pattern.
 277
 278    If operand 3 of the insv pattern is VOIDmode, then we will use BITS_PER_WORD
 279    else, we use the mode of operand 3.  */
 280
 281 rtx
 282 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 283                  unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
 284                  rtx value, HOST_WIDE_INT total_size)
 285 {
 286   unsigned int unit
 287     = (GET_CODE (str_rtx) == MEM) ? BITS_PER_UNIT : BITS_PER_WORD;
 288   unsigned HOST_WIDE_INT offset = bitnum / unit;
 289   unsigned HOST_WIDE_INT bitpos = bitnum % unit;
 290   rtx op0 = str_rtx;
 291   int byte_offset;
 292
 293   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
 294
 295   /* Discount the part of the structure before the desired byte.
 296      We need to know how many bytes are safe to reference after it.  */
 297   if (total_size >= 0)
 298     total_size -= (bitpos / BIGGEST_ALIGNMENT
 299                    * (BIGGEST_ALIGNMENT / BITS_PER_UNIT));
 300
 301   while (GET_CODE (op0) == SUBREG)
 302     {
 303       /* The following line once was done only if WORDS_BIG_ENDIAN,
 304          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 305          meaningful at a much higher level; when structures are copied
 306          between memory and regs, the higher-numbered regs
 307          always get higher addresses.  */
 308       offset += (SUBREG_BYTE (op0) / UNITS_PER_WORD);
 309       /* We used to adjust BITPOS here, but now we do the whole adjustment
 310          right after the loop.  */
 311       op0 = SUBREG_REG (op0);
 312     }
 313
 314   value = protect_from_queue (value, 0);
 315
 316   if (flag_force_mem)
 317     {
 318       int old_generating_concat_p = generating_concat_p;
 319       generating_concat_p = 0;
 320       value = force_not_mem (value);
 321       generating_concat_p = old_generating_concat_p;
 322     }
 323
 324   /* If the target is a register, overwriting the entire object, or storing
 325      a full-word or multi-word field can be done with just a SUBREG.
 326
 327      If the target is memory, storing any naturally aligned field can be
 328      done with a simple store.  For targets that support fast unaligned
 329      memory, any naturally sized, unit aligned field can be done directly.  */
 330
 331   byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 332                 + (offset * UNITS_PER_WORD);
 333
 334   if (bitpos == 0
 335       && bitsize == GET_MODE_BITSIZE (fieldmode)
 336       && (GET_CODE (op0) != MEM
 337           ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 338              || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
 339              && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
 340           : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 341              || (offset * BITS_PER_UNIT % bitsize == 0
 342                  && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
 343     {
 344       if (GET_MODE (op0) != fieldmode)
 345         {
 346           if (GET_CODE (op0) == SUBREG)
 347             {
 348               if (GET_MODE (SUBREG_REG (op0)) == fieldmode
 349                   || GET_MODE_CLASS (fieldmode) == MODE_INT
 350                   || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT)
 351                 op0 = SUBREG_REG (op0);
 352               else
 353                 /* Else we've got some float mode source being extracted into
 354                    a different float mode destination -- this combination of
 355                    subregs results in Severe Tire Damage.  */
 356                 abort ();
 357             }
 358           if (GET_CODE (op0) == REG)
 359             op0 = gen_rtx_SUBREG (fieldmode, op0, byte_offset);
 360           else
 361             op0 = adjust_address (op0, fieldmode, offset);
 362         }
 363       emit_move_insn (op0, value);
 364       return value;
 365     }
 366
 367   /* Make sure we are playing with integral modes.  Pun with subregs
 368      if we aren't.  This must come after the entire register case above,
 369      since that case is valid for any mode.  The following cases are only
 370      valid for integral modes.  */
 371   {
 372     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 373     if (imode != GET_MODE (op0))
 374       {
 375         if (GET_CODE (op0) == MEM)
 376           op0 = adjust_address (op0, imode, 0);
 377         else if (imode != BLKmode)
 378           op0 = gen_lowpart (imode, op0);
 379         else
 380           abort ();
 381       }
 382   }
 383
 384   /* We may be accessing data outside the field, which means
 385      we can alias adjacent data.  */
 386   if (GET_CODE (op0) == MEM)
 387     {
 388       op0 = shallow_copy_rtx (op0);
 389       set_mem_alias_set (op0, 0);
 390       set_mem_expr (op0, 0);
 391     }
 392
 393   /* If OP0 is a register, BITPOS must count within a word.
 394      But as we have it, it counts within whatever size OP0 now has.
 395      On a bigendian machine, these are not the same, so convert.  */
 396   if (BYTES_BIG_ENDIAN
 397       && GET_CODE (op0) != MEM
 398       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 399     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 400
 401   /* Storing an lsb-aligned field in a register
 402      can be done with a movestrict instruction.  */
 403
 404   if (GET_CODE (op0) != MEM
 405       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 406       && bitsize == GET_MODE_BITSIZE (fieldmode)
 407       && (movstrict_optab->handlers[(int) fieldmode].insn_code
 408           != CODE_FOR_nothing))
 409     {
 410       int icode = movstrict_optab->handlers[(int) fieldmode].insn_code;
 411
 412       /* Get appropriate low part of the value being stored.  */
 413       if (GET_CODE (value) == CONST_INT || GET_CODE (value) == REG)
 414         value = gen_lowpart (fieldmode, value);
 415       else if (!(GET_CODE (value) == SYMBOL_REF
 416                  || GET_CODE (value) == LABEL_REF
 417                  || GET_CODE (value) == CONST))
 418         value = convert_to_mode (fieldmode, value, 0);
 419
 420       if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
 421         value = copy_to_mode_reg (fieldmode, value);
 422
 423       if (GET_CODE (op0) == SUBREG)
 424         {
 425           if (GET_MODE (SUBREG_REG (op0)) == fieldmode
 426               || GET_MODE_CLASS (fieldmode) == MODE_INT
 427               || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT)
 428             op0 = SUBREG_REG (op0);
 429           else
 430             /* Else we've got some float mode source being extracted into
 431                a different float mode destination -- this combination of
 432                subregs results in Severe Tire Damage.  */
 433             abort ();
 434         }
 435
 436       emit_insn (GEN_FCN (icode)
 437                  (gen_rtx_SUBREG (fieldmode, op0,
 438                                   (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 439                                   + (offset * UNITS_PER_WORD)),
 440                                   value));
 441
 442       return value;
 443     }
 444
 445   /* Handle fields bigger than a word.  */
 446
 447   if (bitsize > BITS_PER_WORD)
 448     {
 449       /* Here we transfer the words of the field
 450          in the order least significant first.
 451          This is because the most significant word is the one which may
 452          be less than full.
 453          However, only do that if the value is not BLKmode.  */
 454
 455       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 456       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 457       unsigned int i;
 458
 459       /* This is the mode we must force value to, so that there will be enough
 460          subwords to extract.  Note that fieldmode will often (always?) be
 461          VOIDmode, because that is what store_field uses to indicate that this
 462          is a bit field, but passing VOIDmode to operand_subword_force will
 463          result in an abort.  */
 464       fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 465
 466       for (i = 0; i < nwords; i++)
 467         {
 468           /* If I is 0, use the low-order word in both field and target;
 469              if I is 1, use the next to lowest word; and so on.  */
 470           unsigned int wordnum = (backwards ? nwords - i - 1 : i);
 471           unsigned int bit_offset = (backwards
 472                                      ? MAX ((int) bitsize - ((int) i + 1)
 473                                             * BITS_PER_WORD,
 474                                             0)
 475                                      : (int) i * BITS_PER_WORD);
 476
 477           store_bit_field (op0, MIN (BITS_PER_WORD,
 478                                      bitsize - i * BITS_PER_WORD),
 479                            bitnum + bit_offset, word_mode,
 480                            operand_subword_force (value, wordnum,
 481                                                   (GET_MODE (value) == VOIDmode
 482                                                    ? fieldmode
 483                                                    : GET_MODE (value))),
 484                            total_size);
 485         }
 486       return value;
 487     }
 488
 489   /* From here on we can assume that the field to be stored in is
 490      a full-word (whatever type that is), since it is shorter than a word.  */
 491
 492   /* OFFSET is the number of words or bytes (UNIT says which)
 493      from STR_RTX to the first word or byte containing part of the field.  */
 494
 495   if (GET_CODE (op0) != MEM)
 496     {
 497       if (offset != 0
 498           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 499         {
 500           if (GET_CODE (op0) != REG)
 501             {
 502               /* Since this is a destination (lvalue), we can't copy it to a
 503                  pseudo.  We can trivially remove a SUBREG that does not
 504                  change the size of the operand.  Such a SUBREG may have been
 505                  added above.  Otherwise, abort.  */
 506               if (GET_CODE (op0) == SUBREG
 507                   && (GET_MODE_SIZE (GET_MODE (op0))
 508                       == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))))
 509                 op0 = SUBREG_REG (op0);
 510               else
 511                 abort ();
 512             }
 513           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 514                                 op0, (offset * UNITS_PER_WORD));
 515         }
 516       offset = 0;
 517     }
 518   else
 519     op0 = protect_from_queue (op0, 1);
 520
 521   /* If VALUE is a floating-point mode, access it as an integer of the
 522      corresponding size.  This can occur on a machine with 64 bit registers
 523      that uses SFmode for float.  This can also occur for unaligned float
 524      structure fields.  */
 525   if (GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 526       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 527     value = gen_lowpart ((GET_MODE (value) == VOIDmode
 528                           ? word_mode : int_mode_for_mode (GET_MODE (value))),
 529                          value);
 530
 531   /* Now OFFSET is nonzero only if OP0 is memory
 532      and is therefore always measured in bytes.  */
 533
 534   if (HAVE_insv
 535       && GET_MODE (value) != BLKmode
 536       && !(bitsize == 1 && GET_CODE (value) == CONST_INT)
 537       /* Ensure insv's size is wide enough for this field.  */
 538       && (GET_MODE_BITSIZE (op_mode) >= bitsize)
 539       && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 540             && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))))
 541     {
 542       int xbitpos = bitpos;
 543       rtx value1;
 544       rtx xop0 = op0;
 545       rtx last = get_last_insn ();
 546       rtx pat;
 547       enum machine_mode maxmode = mode_for_extraction (EP_insv, 3);
 548       int save_volatile_ok = volatile_ok;
 549
 550       volatile_ok = 1;
 551
 552       /* If this machine's insv can only insert into a register, copy OP0
 553          into a register and save it back later.  */
 554       /* This used to check flag_force_mem, but that was a serious
 555          de-optimization now that flag_force_mem is enabled by -O2.  */
 556       if (GET_CODE (op0) == MEM
 557           && ! ((*insn_data[(int) CODE_FOR_insv].operand[0].predicate)
 558                 (op0, VOIDmode)))
 559         {
 560           rtx tempreg;
 561           enum machine_mode bestmode;
 562
 563           /* Get the mode to use for inserting into this field.  If OP0 is
 564              BLKmode, get the smallest mode consistent with the alignment. If
 565              OP0 is a non-BLKmode object that is no wider than MAXMODE, use its
 566              mode. Otherwise, use the smallest mode containing the field.  */
 567
 568           if (GET_MODE (op0) == BLKmode
 569               || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (maxmode))
 570             bestmode
 571               = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0), maxmode,
 572                                MEM_VOLATILE_P (op0));
 573           else
 574             bestmode = GET_MODE (op0);
 575
 576           if (bestmode == VOIDmode
 577               || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
 578                   && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
 579             goto insv_loses;
 580
 581           /* Adjust address to point to the containing unit of that mode.
 582              Compute offset as multiple of this unit, counting in bytes.  */
 583           unit = GET_MODE_BITSIZE (bestmode);
 584           offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 585           bitpos = bitnum % unit;
 586           op0 = adjust_address (op0, bestmode,  offset);
 587
 588           /* Fetch that unit, store the bitfield in it, then store
 589              the unit.  */
 590           tempreg = copy_to_reg (op0);
 591           store_bit_field (tempreg, bitsize, bitpos, fieldmode, value,
 592                            total_size);
 593           emit_move_insn (op0, tempreg);
 594           return value;
 595         }
 596       volatile_ok = save_volatile_ok;
 597
 598       /* Add OFFSET into OP0's address.  */
 599       if (GET_CODE (xop0) == MEM)
 600         xop0 = adjust_address (xop0, byte_mode, offset);
 601
 602       /* If xop0 is a register, we need it in MAXMODE
 603          to make it acceptable to the format of insv.  */
 604       if (GET_CODE (xop0) == SUBREG)
 605         /* We can't just change the mode, because this might clobber op0,
 606            and we will need the original value of op0 if insv fails.  */
 607         xop0 = gen_rtx_SUBREG (maxmode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 608       if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
 609         xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
 610
 611       /* On big-endian machines, we count bits from the most significant.
 612          If the bit field insn does not, we must invert.  */
 613
 614       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 615         xbitpos = unit - bitsize - xbitpos;
 616
 617       /* We have been counting XBITPOS within UNIT.
 618          Count instead within the size of the register.  */
 619       if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
 620         xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
 621
 622       unit = GET_MODE_BITSIZE (maxmode);
 623
 624       /* Convert VALUE to maxmode (which insv insn wants) in VALUE1.  */
 625       value1 = value;
 626       if (GET_MODE (value) != maxmode)
 627         {
 628           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 629             {
 630               /* Optimization: Don't bother really extending VALUE
 631                  if it has all the bits we will actually use.  However,
 632                  if we must narrow it, be sure we do it correctly.  */
 633
 634               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (maxmode))
 635                 {
 636                   rtx tmp;
 637
 638                   tmp = simplify_subreg (maxmode, value1, GET_MODE (value), 0);
 639                   if (! tmp)
 640                     tmp = simplify_gen_subreg (maxmode,
 641                                                force_reg (GET_MODE (value),
 642                                                           value1),
 643                                                GET_MODE (value), 0);
 644                   value1 = tmp;
 645                 }
 646               else
 647                 value1 = gen_lowpart (maxmode, value1);
 648             }
 649           else if (GET_CODE (value) == CONST_INT)
 650             value1 = gen_int_mode (INTVAL (value), maxmode);
 651           else if (!CONSTANT_P (value))
 652             /* Parse phase is supposed to make VALUE's data type
 653                match that of the component reference, which is a type
 654                at least as wide as the field; so VALUE should have
 655                a mode that corresponds to that type.  */
 656             abort ();
 657         }
 658
 659       /* If this machine's insv insists on a register,
 660          get VALUE1 into a register.  */
 661       if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
 662              (value1, maxmode)))
 663         value1 = force_reg (maxmode, value1);
 664
 665       pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
 666       if (pat)
 667         emit_insn (pat);
 668       else
 669         {
 670           delete_insns_since (last);
 671           store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 672         }
 673     }
 674   else
 675     insv_loses:
 676     /* Insv is not available; store using shifts and boolean ops.  */
 677     store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 678   return value;
 679 }
 680 \f
 681 /* Use shifts and boolean operations to store VALUE
 682    into a bit field of width BITSIZE
 683    in a memory location specified by OP0 except offset by OFFSET bytes.
 684      (OFFSET must be 0 if OP0 is a register.)
 685    The field starts at position BITPOS within the byte.
 686     (If OP0 is a register, it may be a full word or a narrower mode,
 687      but BITPOS still counts within a full word,
 688      which is significant on bigendian machines.)
 689
 690    Note that protect_from_queue has already been done on OP0 and VALUE.  */
 691
 692 static void
 693 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
 694                        unsigned HOST_WIDE_INT bitsize,
 695                        unsigned HOST_WIDE_INT bitpos, rtx value)
 696 {
 697   enum machine_mode mode;
 698   unsigned int total_bits = BITS_PER_WORD;
 699   rtx subtarget, temp;
 700   int all_zero = 0;
 701   int all_one = 0;
 702
 703   /* There is a case not handled here:
 704      a structure with a known alignment of just a halfword
 705      and a field split across two aligned halfwords within the structure.
 706      Or likewise a structure with a known alignment of just a byte
 707      and a field split across two bytes.
 708      Such cases are not supposed to be able to occur.  */
 709
 710   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 711     {
 712       if (offset != 0)
 713         abort ();
 714       /* Special treatment for a bit field split across two registers.  */
 715       if (bitsize + bitpos > BITS_PER_WORD)
 716         {
 717           store_split_bit_field (op0, bitsize, bitpos, value);
 718           return;
 719         }
 720     }
 721   else
 722     {
 723       /* Get the proper mode to use for this field.  We want a mode that
 724          includes the entire field.  If such a mode would be larger than
 725          a word, we won't be doing the extraction the normal way.
 726          We don't want a mode bigger than the destination.  */
 727
 728       mode = GET_MODE (op0);
 729       if (GET_MODE_BITSIZE (mode) == 0
 730           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 731         mode = word_mode;
 732       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 733                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 734
 735       if (mode == VOIDmode)
 736         {
 737           /* The only way this should occur is if the field spans word
 738              boundaries.  */
 739           store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
 740                                  value);
 741           return;
 742         }
 743
 744       total_bits = GET_MODE_BITSIZE (mode);
 745
 746       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 747          be in the range 0 to total_bits-1, and put any excess bytes in
 748          OFFSET.  */
 749       if (bitpos >= total_bits)
 750         {
 751           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 752           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 753                      * BITS_PER_UNIT);
 754         }
 755
 756       /* Get ref to an aligned byte, halfword, or word containing the field.
 757          Adjust BITPOS to be position within a word,
 758          and OFFSET to be the offset of that word.
 759          Then alter OP0 to refer to that word.  */
 760       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 761       offset -= (offset % (total_bits / BITS_PER_UNIT));
 762       op0 = adjust_address (op0, mode, offset);
 763     }
 764
 765   mode = GET_MODE (op0);
 766
 767   /* Now MODE is either some integral mode for a MEM as OP0,
 768      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 769      The bit field is contained entirely within OP0.
 770      BITPOS is the starting bit number within OP0.
 771      (OP0's mode may actually be narrower than MODE.)  */
 772
 773   if (BYTES_BIG_ENDIAN)
 774       /* BITPOS is the distance between our msb
 775          and that of the containing datum.
 776          Convert it to the distance from the lsb.  */
 777       bitpos = total_bits - bitsize - bitpos;
 778
 779   /* Now BITPOS is always the distance between our lsb
 780      and that of OP0.  */
 781
 782   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 783      we must first convert its mode to MODE.  */
 784
 785   if (GET_CODE (value) == CONST_INT)
 786     {
 787       HOST_WIDE_INT v = INTVAL (value);
 788
 789       if (bitsize < HOST_BITS_PER_WIDE_INT)
 790         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 791
 792       if (v == 0)
 793         all_zero = 1;
 794       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 795                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 796                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 797         all_one = 1;
 798
 799       value = lshift_value (mode, value, bitpos, bitsize);
 800     }
 801   else
 802     {
 803       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 804                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 805
 806       if (GET_MODE (value) != mode)
 807         {
 808           if ((GET_CODE (value) == REG || GET_CODE (value) == SUBREG)
 809               && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
 810             value = gen_lowpart (mode, value);
 811           else
 812             value = convert_to_mode (mode, value, 1);
 813         }
 814
 815       if (must_and)
 816         value = expand_binop (mode, and_optab, value,
 817                               mask_rtx (mode, 0, bitsize, 0),
 818                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
 819       if (bitpos > 0)
 820         value = expand_shift (LSHIFT_EXPR, mode, value,
 821                               build_int_2 (bitpos, 0), NULL_RTX, 1);
 822     }
 823
 824   /* Now clear the chosen bits in OP0,
 825      except that if VALUE is -1 we need not bother.  */
 826
 827   subtarget = (GET_CODE (op0) == REG || ! flag_force_mem) ? op0 : 0;
 828
 829   if (! all_one)
 830     {
 831       temp = expand_binop (mode, and_optab, op0,
 832                            mask_rtx (mode, bitpos, bitsize, 1),
 833                            subtarget, 1, OPTAB_LIB_WIDEN);
 834       subtarget = temp;
 835     }
 836   else
 837     temp = op0;
 838
 839   /* Now logical-or VALUE into OP0, unless it is zero.  */
 840
 841   if (! all_zero)
 842     temp = expand_binop (mode, ior_optab, temp, value,
 843                          subtarget, 1, OPTAB_LIB_WIDEN);
 844   if (op0 != temp)
 845     emit_move_insn (op0, temp);
 846 }
 847 \f
 848 /* Store a bit field that is split across multiple accessible memory objects.
 849
 850    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
 851    BITSIZE is the field width; BITPOS the position of its first bit
 852    (within the word).
 853    VALUE is the value to store.
 854
 855    This does not yet handle fields wider than BITS_PER_WORD.  */
 856
 857 static void
 858 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
 859                        unsigned HOST_WIDE_INT bitpos, rtx value)
 860 {
 861   unsigned int unit;
 862   unsigned int bitsdone = 0;
 863
 864   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
 865      much at a time.  */
 866   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 867     unit = BITS_PER_WORD;
 868   else
 869     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
 870
 871   /* If VALUE is a constant other than a CONST_INT, get it into a register in
 872      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
 873      that VALUE might be a floating-point constant.  */
 874   if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
 875     {
 876       rtx word = gen_lowpart_common (word_mode, value);
 877
 878       if (word && (value != word))
 879         value = word;
 880       else
 881         value = gen_lowpart_common (word_mode,
 882                                     force_reg (GET_MODE (value) != VOIDmode
 883                                                ? GET_MODE (value)
 884                                                : word_mode, value));
 885     }
 886   else if (GET_CODE (value) == ADDRESSOF)
 887     value = copy_to_reg (value);
 888
 889   while (bitsdone < bitsize)
 890     {
 891       unsigned HOST_WIDE_INT thissize;
 892       rtx part, word;
 893       unsigned HOST_WIDE_INT thispos;
 894       unsigned HOST_WIDE_INT offset;
 895
 896       offset = (bitpos + bitsdone) / unit;
 897       thispos = (bitpos + bitsdone) % unit;
 898
 899       /* THISSIZE must not overrun a word boundary.  Otherwise,
 900          store_fixed_bit_field will call us again, and we will mutually
 901          recurse forever.  */
 902       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
 903       thissize = MIN (thissize, unit - thispos);
 904
 905       if (BYTES_BIG_ENDIAN)
 906         {
 907           int total_bits;
 908
 909           /* We must do an endian conversion exactly the same way as it is
 910              done in extract_bit_field, so that the two calls to
 911              extract_fixed_bit_field will have comparable arguments.  */
 912           if (GET_CODE (value) != MEM || GET_MODE (value) == BLKmode)
 913             total_bits = BITS_PER_WORD;
 914           else
 915             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
 916
 917           /* Fetch successively less significant portions.  */
 918           if (GET_CODE (value) == CONST_INT)
 919             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 920                              >> (bitsize - bitsdone - thissize))
 921                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
 922           else
 923             /* The args are chosen so that the last part includes the
 924                lsb.  Give extract_bit_field the value it needs (with
 925                endianness compensation) to fetch the piece we want.  */
 926             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
 927                                             total_bits - bitsize + bitsdone,
 928                                             NULL_RTX, 1);
 929         }
 930       else
 931         {
 932           /* Fetch successively more significant portions.  */
 933           if (GET_CODE (value) == CONST_INT)
 934             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 935                              >> bitsdone)
 936                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
 937           else
 938             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
 939                                             bitsdone, NULL_RTX, 1);
 940         }
 941
 942       /* If OP0 is a register, then handle OFFSET here.
 943
 944          When handling multiword bitfields, extract_bit_field may pass
 945          down a word_mode SUBREG of a larger REG for a bitfield that actually
 946          crosses a word boundary.  Thus, for a SUBREG, we must find
 947          the current word starting from the base register.  */
 948       if (GET_CODE (op0) == SUBREG)
 949         {
 950           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
 951           word = operand_subword_force (SUBREG_REG (op0), word_offset,
 952                                         GET_MODE (SUBREG_REG (op0)));
 953           offset = 0;
 954         }
 955       else if (GET_CODE (op0) == REG)
 956         {
 957           word = operand_subword_force (op0, offset, GET_MODE (op0));
 958           offset = 0;
 959         }
 960       else
 961         word = op0;
 962
 963       /* OFFSET is in UNITs, and UNIT is in bits.
 964          store_fixed_bit_field wants offset in bytes.  */
 965       store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
 966                              thispos, part);
 967       bitsdone += thissize;
 968     }
 969 }
 970 \f
 971 /* Generate code to extract a byte-field from STR_RTX
 972    containing BITSIZE bits, starting at BITNUM,
 973    and put it in TARGET if possible (if TARGET is nonzero).
 974    Regardless of TARGET, we return the rtx for where the value is placed.
 975    It may be a QUEUED.
 976
 977    STR_RTX is the structure containing the byte (a REG or MEM).
 978    UNSIGNEDP is nonzero if this is an unsigned bit field.
 979    MODE is the natural mode of the field value once extracted.
 980    TMODE is the mode the caller would like the value to have;
 981    but the value may be returned with type MODE instead.
 982
 983    TOTAL_SIZE is the size in bytes of the containing structure,
 984    or -1 if varying.
 985
 986    If a TARGET is specified and we can store in it at no extra cost,
 987    we do so, and return TARGET.
 988    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
 989    if they are equally easy.  */
 990
 991 rtx
 992 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 993                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
 994                    enum machine_mode mode, enum machine_mode tmode,
 995                    HOST_WIDE_INT total_size)
 996 {
 997   unsigned int unit
 998     = (GET_CODE (str_rtx) == MEM) ? BITS_PER_UNIT : BITS_PER_WORD;
 999   unsigned HOST_WIDE_INT offset = bitnum / unit;
1000   unsigned HOST_WIDE_INT bitpos = bitnum % unit;
1001   rtx op0 = str_rtx;
1002   rtx spec_target = target;
1003   rtx spec_target_subreg = 0;
1004   enum machine_mode int_mode;
1005   enum machine_mode extv_mode = mode_for_extraction (EP_extv, 0);
1006   enum machine_mode extzv_mode = mode_for_extraction (EP_extzv, 0);
1007   enum machine_mode mode1;
1008   int byte_offset;
1009
1010   /* Discount the part of the structure before the desired byte.
1011      We need to know how many bytes are safe to reference after it.  */
1012   if (total_size >= 0)
1013     total_size -= (bitpos / BIGGEST_ALIGNMENT
1014                    * (BIGGEST_ALIGNMENT / BITS_PER_UNIT));
1015
1016   if (tmode == VOIDmode)
1017     tmode = mode;
1018
1019   while (GET_CODE (op0) == SUBREG)
1020     {
1021       bitpos += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1022       if (bitpos > unit)
1023         {
1024           offset += (bitpos / unit);
1025           bitpos %= unit;
1026         }
1027       op0 = SUBREG_REG (op0);
1028     }
1029
1030   if (GET_CODE (op0) == REG
1031       && mode == GET_MODE (op0)
1032       && bitnum == 0
1033       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1034     {
1035       /* We're trying to extract a full register from itself.  */
1036       return op0;
1037     }
1038
1039   /* Make sure we are playing with integral modes.  Pun with subregs
1040      if we aren't.  */
1041   {
1042     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1043     if (imode != GET_MODE (op0))
1044       {
1045         if (GET_CODE (op0) == MEM)
1046           op0 = adjust_address (op0, imode, 0);
1047         else if (imode != BLKmode)
1048           op0 = gen_lowpart (imode, op0);
1049         else
1050           abort ();
1051       }
1052   }
1053
1054   /* We may be accessing data outside the field, which means
1055      we can alias adjacent data.  */
1056   if (GET_CODE (op0) == MEM)
1057     {
1058       op0 = shallow_copy_rtx (op0);
1059       set_mem_alias_set (op0, 0);
1060       set_mem_expr (op0, 0);
1061     }
1062
1063   /* Extraction of a full-word or multi-word value from a structure
1064      in a register or aligned memory can be done with just a SUBREG.
1065      A subword value in the least significant part of a register
1066      can also be extracted with a SUBREG.  For this, we need the
1067      byte offset of the value in op0.  */
1068
1069   byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1070
1071   /* If OP0 is a register, BITPOS must count within a word.
1072      But as we have it, it counts within whatever size OP0 now has.
1073      On a bigendian machine, these are not the same, so convert.  */
1074   if (BYTES_BIG_ENDIAN
1075       && GET_CODE (op0) != MEM
1076       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1077     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1078
1079   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1080      If that's wrong, the solution is to test for it and set TARGET to 0
1081      if needed.  */
1082
1083   mode1  = (VECTOR_MODE_P (tmode)
1084             ? mode
1085             : mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0));
1086
1087   if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1088         && bitpos % BITS_PER_WORD == 0)
1089        || (mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0) != BLKmode
1090            /* ??? The big endian test here is wrong.  This is correct
1091               if the value is in a register, and if mode_for_size is not
1092               the same mode as op0.  This causes us to get unnecessarily
1093               inefficient code from the Thumb port when -mbig-endian.  */
1094            && (BYTES_BIG_ENDIAN
1095                ? bitpos + bitsize == BITS_PER_WORD
1096                : bitpos == 0)))
1097       && ((GET_CODE (op0) != MEM
1098            && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
1099                                      GET_MODE_BITSIZE (GET_MODE (op0)))
1100            && GET_MODE_SIZE (mode1) != 0
1101            && byte_offset % GET_MODE_SIZE (mode1) == 0)
1102           || (GET_CODE (op0) == MEM
1103               && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1104                   || (offset * BITS_PER_UNIT % bitsize == 0
1105                       && MEM_ALIGN (op0) % bitsize == 0)))))
1106     {
1107       if (mode1 != GET_MODE (op0))
1108         {
1109           if (GET_CODE (op0) == SUBREG)
1110             {
1111               if (GET_MODE (SUBREG_REG (op0)) == mode1
1112                   || GET_MODE_CLASS (mode1) == MODE_INT
1113                   || GET_MODE_CLASS (mode1) == MODE_PARTIAL_INT)
1114                 op0 = SUBREG_REG (op0);
1115               else
1116                 /* Else we've got some float mode source being extracted into
1117                    a different float mode destination -- this combination of
1118                    subregs results in Severe Tire Damage.  */
1119                 goto no_subreg_mode_swap;
1120             }
1121           if (GET_CODE (op0) == REG)
1122             op0 = gen_rtx_SUBREG (mode1, op0, byte_offset);
1123           else
1124             op0 = adjust_address (op0, mode1, offset);
1125         }
1126       if (mode1 != mode)
1127         return convert_to_mode (tmode, op0, unsignedp);
1128       return op0;
1129     }
1130  no_subreg_mode_swap:
1131
1132   /* Handle fields bigger than a word.  */
1133
1134   if (bitsize > BITS_PER_WORD)
1135     {
1136       /* Here we transfer the words of the field
1137          in the order least significant first.
1138          This is because the most significant word is the one which may
1139          be less than full.  */
1140
1141       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1142       unsigned int i;
1143
1144       if (target == 0 || GET_CODE (target) != REG)
1145         target = gen_reg_rtx (mode);
1146
1147       /* Indicate for flow that the entire target reg is being set.  */
1148       emit_insn (gen_rtx_CLOBBER (VOIDmode, target));
1149
1150       for (i = 0; i < nwords; i++)
1151         {
1152           /* If I is 0, use the low-order word in both field and target;
1153              if I is 1, use the next to lowest word; and so on.  */
1154           /* Word number in TARGET to use.  */
1155           unsigned int wordnum
1156             = (WORDS_BIG_ENDIAN
1157                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1158                : i);
1159           /* Offset from start of field in OP0.  */
1160           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1161                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1162                                                 * (int) BITS_PER_WORD))
1163                                      : (int) i * BITS_PER_WORD);
1164           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1165           rtx result_part
1166             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1167                                            bitsize - i * BITS_PER_WORD),
1168                                  bitnum + bit_offset, 1, target_part, mode,
1169                                  word_mode, total_size);
1170
1171           if (target_part == 0)
1172             abort ();
1173
1174           if (result_part != target_part)
1175             emit_move_insn (target_part, result_part);
1176         }
1177
1178       if (unsignedp)
1179         {
1180           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1181              need to be zero'd out.  */
1182           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1183             {
1184               unsigned int i, total_words;
1185
1186               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1187               for (i = nwords; i < total_words; i++)
1188                 emit_move_insn
1189                   (operand_subword (target,
1190                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1191                                     1, VOIDmode),
1192                    const0_rtx);
1193             }
1194           return target;
1195         }
1196
1197       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1198       target = expand_shift (LSHIFT_EXPR, mode, target,
1199                              build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1200                              NULL_RTX, 0);
1201       return expand_shift (RSHIFT_EXPR, mode, target,
1202                            build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1203                            NULL_RTX, 0);
1204     }
1205
1206   /* From here on we know the desired field is smaller than a word.  */
1207
1208   /* Check if there is a correspondingly-sized integer field, so we can
1209      safely extract it as one size of integer, if necessary; then
1210      truncate or extend to the size that is wanted; then use SUBREGs or
1211      convert_to_mode to get one of the modes we really wanted.  */
1212
1213   int_mode = int_mode_for_mode (tmode);
1214   if (int_mode == BLKmode)
1215     int_mode = int_mode_for_mode (mode);
1216   if (int_mode == BLKmode)
1217     abort ();    /* Should probably push op0 out to memory and then
1218                     do a load.  */
1219
1220   /* OFFSET is the number of words or bytes (UNIT says which)
1221      from STR_RTX to the first word or byte containing part of the field.  */
1222
1223   if (GET_CODE (op0) != MEM)
1224     {
1225       if (offset != 0
1226           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1227         {
1228           if (GET_CODE (op0) != REG)
1229             op0 = copy_to_reg (op0);
1230           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1231                                 op0, (offset * UNITS_PER_WORD));
1232         }
1233       offset = 0;
1234     }
1235   else
1236     op0 = protect_from_queue (str_rtx, 1);
1237
1238   /* Now OFFSET is nonzero only for memory operands.  */
1239
1240   if (unsignedp)
1241     {
1242       if (HAVE_extzv
1243           && (GET_MODE_BITSIZE (extzv_mode) >= bitsize)
1244           && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1245                 && (bitsize + bitpos > GET_MODE_BITSIZE (extzv_mode))))
1246         {
1247           unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1248           rtx bitsize_rtx, bitpos_rtx;
1249           rtx last = get_last_insn ();
1250           rtx xop0 = op0;
1251           rtx xtarget = target;
1252           rtx xspec_target = spec_target;
1253           rtx xspec_target_subreg = spec_target_subreg;
1254           rtx pat;
1255           enum machine_mode maxmode = mode_for_extraction (EP_extzv, 0);
1256
1257           if (GET_CODE (xop0) == MEM)
1258             {
1259               int save_volatile_ok = volatile_ok;
1260               volatile_ok = 1;
1261
1262               /* Is the memory operand acceptable?  */
1263               if (! ((*insn_data[(int) CODE_FOR_extzv].operand[1].predicate)
1264                      (xop0, GET_MODE (xop0))))
1265                 {
1266                   /* No, load into a reg and extract from there.  */
1267                   enum machine_mode bestmode;
1268
1269                   /* Get the mode to use for inserting into this field.  If
1270                      OP0 is BLKmode, get the smallest mode consistent with the
1271                      alignment. If OP0 is a non-BLKmode object that is no
1272                      wider than MAXMODE, use its mode. Otherwise, use the
1273                      smallest mode containing the field.  */
1274
1275                   if (GET_MODE (xop0) == BLKmode
1276                       || (GET_MODE_SIZE (GET_MODE (op0))
1277                           > GET_MODE_SIZE (maxmode)))
1278                     bestmode = get_best_mode (bitsize, bitnum,
1279                                               MEM_ALIGN (xop0), maxmode,
1280                                               MEM_VOLATILE_P (xop0));
1281                   else
1282                     bestmode = GET_MODE (xop0);
1283
1284                   if (bestmode == VOIDmode
1285                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1286                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1287                     goto extzv_loses;
1288
1289                   /* Compute offset as multiple of this unit,
1290                      counting in bytes.  */
1291                   unit = GET_MODE_BITSIZE (bestmode);
1292                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1293                   xbitpos = bitnum % unit;
1294                   xop0 = adjust_address (xop0, bestmode, xoffset);
1295
1296                   /* Fetch it to a register in that size.  */
1297                   xop0 = force_reg (bestmode, xop0);
1298
1299                   /* XBITPOS counts within UNIT, which is what is expected.  */
1300                 }
1301               else
1302                 /* Get ref to first byte containing part of the field.  */
1303                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1304
1305               volatile_ok = save_volatile_ok;
1306             }
1307
1308           /* If op0 is a register, we need it in MAXMODE (which is usually
1309              SImode). to make it acceptable to the format of extzv.  */
1310           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1311             goto extzv_loses;
1312           if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
1313             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1314
1315           /* On big-endian machines, we count bits from the most significant.
1316              If the bit field insn does not, we must invert.  */
1317           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1318             xbitpos = unit - bitsize - xbitpos;
1319
1320           /* Now convert from counting within UNIT to counting in MAXMODE.  */
1321           if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
1322             xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
1323
1324           unit = GET_MODE_BITSIZE (maxmode);
1325
1326           if (xtarget == 0
1327               || (flag_force_mem && GET_CODE (xtarget) == MEM))
1328             xtarget = xspec_target = gen_reg_rtx (tmode);
1329
1330           if (GET_MODE (xtarget) != maxmode)
1331             {
1332               if (GET_CODE (xtarget) == REG)
1333                 {
1334                   int wider = (GET_MODE_SIZE (maxmode)
1335                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1336                   xtarget = gen_lowpart (maxmode, xtarget);
1337                   if (wider)
1338                     xspec_target_subreg = xtarget;
1339                 }
1340               else
1341                 xtarget = gen_reg_rtx (maxmode);
1342             }
1343
1344           /* If this machine's extzv insists on a register target,
1345              make sure we have one.  */
1346           if (! ((*insn_data[(int) CODE_FOR_extzv].operand[0].predicate)
1347                  (xtarget, maxmode)))
1348             xtarget = gen_reg_rtx (maxmode);
1349
1350           bitsize_rtx = GEN_INT (bitsize);
1351           bitpos_rtx = GEN_INT (xbitpos);
1352
1353           pat = gen_extzv (protect_from_queue (xtarget, 1),
1354                            xop0, bitsize_rtx, bitpos_rtx);
1355           if (pat)
1356             {
1357               emit_insn (pat);
1358               target = xtarget;
1359               spec_target = xspec_target;
1360               spec_target_subreg = xspec_target_subreg;
1361             }
1362           else
1363             {
1364               delete_insns_since (last);
1365               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1366                                                 bitpos, target, 1);
1367             }
1368         }
1369       else
1370       extzv_loses:
1371         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1372                                           bitpos, target, 1);
1373     }
1374   else
1375     {
1376       if (HAVE_extv
1377           && (GET_MODE_BITSIZE (extv_mode) >= bitsize)
1378           && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1379                 && (bitsize + bitpos > GET_MODE_BITSIZE (extv_mode))))
1380         {
1381           int xbitpos = bitpos, xoffset = offset;
1382           rtx bitsize_rtx, bitpos_rtx;
1383           rtx last = get_last_insn ();
1384           rtx xop0 = op0, xtarget = target;
1385           rtx xspec_target = spec_target;
1386           rtx xspec_target_subreg = spec_target_subreg;
1387           rtx pat;
1388           enum machine_mode maxmode = mode_for_extraction (EP_extv, 0);
1389
1390           if (GET_CODE (xop0) == MEM)
1391             {
1392               /* Is the memory operand acceptable?  */
1393               if (! ((*insn_data[(int) CODE_FOR_extv].operand[1].predicate)
1394                      (xop0, GET_MODE (xop0))))
1395                 {
1396                   /* No, load into a reg and extract from there.  */
1397                   enum machine_mode bestmode;
1398
1399                   /* Get the mode to use for inserting into this field.  If
1400                      OP0 is BLKmode, get the smallest mode consistent with the
1401                      alignment. If OP0 is a non-BLKmode object that is no
1402                      wider than MAXMODE, use its mode. Otherwise, use the
1403                      smallest mode containing the field.  */
1404
1405                   if (GET_MODE (xop0) == BLKmode
1406                       || (GET_MODE_SIZE (GET_MODE (op0))
1407                           > GET_MODE_SIZE (maxmode)))
1408                     bestmode = get_best_mode (bitsize, bitnum,
1409                                               MEM_ALIGN (xop0), maxmode,
1410                                               MEM_VOLATILE_P (xop0));
1411                   else
1412                     bestmode = GET_MODE (xop0);
1413
1414                   if (bestmode == VOIDmode
1415                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1416                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1417                     goto extv_loses;
1418
1419                   /* Compute offset as multiple of this unit,
1420                      counting in bytes.  */
1421                   unit = GET_MODE_BITSIZE (bestmode);
1422                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1423                   xbitpos = bitnum % unit;
1424                   xop0 = adjust_address (xop0, bestmode, xoffset);
1425
1426                   /* Fetch it to a register in that size.  */
1427                   xop0 = force_reg (bestmode, xop0);
1428
1429                   /* XBITPOS counts within UNIT, which is what is expected.  */
1430                 }
1431               else
1432                 /* Get ref to first byte containing part of the field.  */
1433                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1434             }
1435
1436           /* If op0 is a register, we need it in MAXMODE (which is usually
1437              SImode) to make it acceptable to the format of extv.  */
1438           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1439             goto extv_loses;
1440           if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
1441             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1442
1443           /* On big-endian machines, we count bits from the most significant.
1444              If the bit field insn does not, we must invert.  */
1445           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1446             xbitpos = unit - bitsize - xbitpos;
1447
1448           /* XBITPOS counts within a size of UNIT.
1449              Adjust to count within a size of MAXMODE.  */
1450           if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
1451             xbitpos += (GET_MODE_BITSIZE (maxmode) - unit);
1452
1453           unit = GET_MODE_BITSIZE (maxmode);
1454
1455           if (xtarget == 0
1456               || (flag_force_mem && GET_CODE (xtarget) == MEM))
1457             xtarget = xspec_target = gen_reg_rtx (tmode);
1458
1459           if (GET_MODE (xtarget) != maxmode)
1460             {
1461               if (GET_CODE (xtarget) == REG)
1462                 {
1463                   int wider = (GET_MODE_SIZE (maxmode)
1464                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1465                   xtarget = gen_lowpart (maxmode, xtarget);
1466                   if (wider)
1467                     xspec_target_subreg = xtarget;
1468                 }
1469               else
1470                 xtarget = gen_reg_rtx (maxmode);
1471             }
1472
1473           /* If this machine's extv insists on a register target,
1474              make sure we have one.  */
1475           if (! ((*insn_data[(int) CODE_FOR_extv].operand[0].predicate)
1476                  (xtarget, maxmode)))
1477             xtarget = gen_reg_rtx (maxmode);
1478
1479           bitsize_rtx = GEN_INT (bitsize);
1480           bitpos_rtx = GEN_INT (xbitpos);
1481
1482           pat = gen_extv (protect_from_queue (xtarget, 1),
1483                           xop0, bitsize_rtx, bitpos_rtx);
1484           if (pat)
1485             {
1486               emit_insn (pat);
1487               target = xtarget;
1488               spec_target = xspec_target;
1489               spec_target_subreg = xspec_target_subreg;
1490             }
1491           else
1492             {
1493               delete_insns_since (last);
1494               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1495                                                 bitpos, target, 0);
1496             }
1497         }
1498       else
1499       extv_loses:
1500         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1501                                           bitpos, target, 0);
1502     }
1503   if (target == spec_target)
1504     return target;
1505   if (target == spec_target_subreg)
1506     return spec_target;
1507   if (GET_MODE (target) != tmode && GET_MODE (target) != mode)
1508     {
1509       /* If the target mode is floating-point, first convert to the
1510          integer mode of that size and then access it as a floating-point
1511          value via a SUBREG.  */
1512       if (GET_MODE_CLASS (tmode) != MODE_INT
1513           && GET_MODE_CLASS (tmode) != MODE_PARTIAL_INT)
1514         {
1515           target = convert_to_mode (mode_for_size (GET_MODE_BITSIZE (tmode),
1516                                                    MODE_INT, 0),
1517                                     target, unsignedp);
1518           return gen_lowpart (tmode, target);
1519         }
1520       else
1521         return convert_to_mode (tmode, target, unsignedp);
1522     }
1523   return target;
1524 }
1525 \f
1526 /* Extract a bit field using shifts and boolean operations
1527    Returns an rtx to represent the value.
1528    OP0 addresses a register (word) or memory (byte).
1529    BITPOS says which bit within the word or byte the bit field starts in.
1530    OFFSET says how many bytes farther the bit field starts;
1531     it is 0 if OP0 is a register.
1532    BITSIZE says how many bits long the bit field is.
1533     (If OP0 is a register, it may be narrower than a full word,
1534      but BITPOS still counts within a full word,
1535      which is significant on bigendian machines.)
1536
1537    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1538    If TARGET is nonzero, attempts to store the value there
1539    and return TARGET, but this is not guaranteed.
1540    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1541
1542 static rtx
1543 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1544                          unsigned HOST_WIDE_INT offset,
1545                          unsigned HOST_WIDE_INT bitsize,
1546                          unsigned HOST_WIDE_INT bitpos, rtx target,
1547                          int unsignedp)
1548 {
1549   unsigned int total_bits = BITS_PER_WORD;
1550   enum machine_mode mode;
1551
1552   if (GET_CODE (op0) == SUBREG || GET_CODE (op0) == REG)
1553     {
1554       /* Special treatment for a bit field split across two registers.  */
1555       if (bitsize + bitpos > BITS_PER_WORD)
1556         return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1557     }
1558   else
1559     {
1560       /* Get the proper mode to use for this field.  We want a mode that
1561          includes the entire field.  If such a mode would be larger than
1562          a word, we won't be doing the extraction the normal way.  */
1563
1564       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1565                             MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1566
1567       if (mode == VOIDmode)
1568         /* The only way this should occur is if the field spans word
1569            boundaries.  */
1570         return extract_split_bit_field (op0, bitsize,
1571                                         bitpos + offset * BITS_PER_UNIT,
1572                                         unsignedp);
1573
1574       total_bits = GET_MODE_BITSIZE (mode);
1575
1576       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1577          be in the range 0 to total_bits-1, and put any excess bytes in
1578          OFFSET.  */
1579       if (bitpos >= total_bits)
1580         {
1581           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1582           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1583                      * BITS_PER_UNIT);
1584         }
1585
1586       /* Get ref to an aligned byte, halfword, or word containing the field.
1587          Adjust BITPOS to be position within a word,
1588          and OFFSET to be the offset of that word.
1589          Then alter OP0 to refer to that word.  */
1590       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1591       offset -= (offset % (total_bits / BITS_PER_UNIT));
1592       op0 = adjust_address (op0, mode, offset);
1593     }
1594
1595   mode = GET_MODE (op0);
1596
1597   if (BYTES_BIG_ENDIAN)
1598     /* BITPOS is the distance between our msb and that of OP0.
1599        Convert it to the distance from the lsb.  */
1600     bitpos = total_bits - bitsize - bitpos;
1601
1602   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1603      We have reduced the big-endian case to the little-endian case.  */
1604
1605   if (unsignedp)
1606     {
1607       if (bitpos)
1608         {
1609           /* If the field does not already start at the lsb,
1610              shift it so it does.  */
1611           tree amount = build_int_2 (bitpos, 0);
1612           /* Maybe propagate the target for the shift.  */
1613           /* But not if we will return it--could confuse integrate.c.  */
1614           rtx subtarget = (target != 0 && GET_CODE (target) == REG
1615                            && !REG_FUNCTION_VALUE_P (target)
1616                            ? target : 0);
1617           if (tmode != mode) subtarget = 0;
1618           op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1619         }
1620       /* Convert the value to the desired mode.  */
1621       if (mode != tmode)
1622         op0 = convert_to_mode (tmode, op0, 1);
1623
1624       /* Unless the msb of the field used to be the msb when we shifted,
1625          mask out the upper bits.  */
1626
1627       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1628         return expand_binop (GET_MODE (op0), and_optab, op0,
1629                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1630                              target, 1, OPTAB_LIB_WIDEN);
1631       return op0;
1632     }
1633
1634   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1635      then arithmetic-shift its lsb to the lsb of the word.  */
1636   op0 = force_reg (mode, op0);
1637   if (mode != tmode)
1638     target = 0;
1639
1640   /* Find the narrowest integer mode that contains the field.  */
1641
1642   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1643        mode = GET_MODE_WIDER_MODE (mode))
1644     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1645       {
1646         op0 = convert_to_mode (mode, op0, 0);
1647         break;
1648       }
1649
1650   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1651     {
1652       tree amount
1653         = build_int_2 (GET_MODE_BITSIZE (mode) - (bitsize + bitpos), 0);
1654       /* Maybe propagate the target for the shift.  */
1655       /* But not if we will return the result--could confuse integrate.c.  */
1656       rtx subtarget = (target != 0 && GET_CODE (target) == REG
1657                        && ! REG_FUNCTION_VALUE_P (target)
1658                        ? target : 0);
1659       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1660     }
1661
1662   return expand_shift (RSHIFT_EXPR, mode, op0,
1663                        build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1664                        target, 0);
1665 }
1666 \f
1667 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1668    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1669    complement of that if COMPLEMENT.  The mask is truncated if
1670    necessary to the width of mode MODE.  The mask is zero-extended if
1671    BITSIZE+BITPOS is too small for MODE.  */
1672
1673 static rtx
1674 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1675 {
1676   HOST_WIDE_INT masklow, maskhigh;
1677
1678   if (bitsize == 0)
1679     masklow = 0;
1680   else if (bitpos < HOST_BITS_PER_WIDE_INT)
1681     masklow = (HOST_WIDE_INT) -1 << bitpos;
1682   else
1683     masklow = 0;
1684
1685   if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1686     masklow &= ((unsigned HOST_WIDE_INT) -1
1687                 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1688
1689   if (bitpos <= HOST_BITS_PER_WIDE_INT)
1690     maskhigh = -1;
1691   else
1692     maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1693
1694   if (bitsize == 0)
1695     maskhigh = 0;
1696   else if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1697     maskhigh &= ((unsigned HOST_WIDE_INT) -1
1698                  >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1699   else
1700     maskhigh = 0;
1701
1702   if (complement)
1703     {
1704       maskhigh = ~maskhigh;
1705       masklow = ~masklow;
1706     }
1707
1708   return immed_double_const (masklow, maskhigh, mode);
1709 }
1710
1711 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1712    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1713
1714 static rtx
1715 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1716 {
1717   unsigned HOST_WIDE_INT v = INTVAL (value);
1718   HOST_WIDE_INT low, high;
1719
1720   if (bitsize < HOST_BITS_PER_WIDE_INT)
1721     v &= ~((HOST_WIDE_INT) -1 << bitsize);
1722
1723   if (bitpos < HOST_BITS_PER_WIDE_INT)
1724     {
1725       low = v << bitpos;
1726       high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1727     }
1728   else
1729     {
1730       low = 0;
1731       high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1732     }
1733
1734   return immed_double_const (low, high, mode);
1735 }
1736 \f
1737 /* Extract a bit field that is split across two words
1738    and return an RTX for the result.
1739
1740    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1741    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1742    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1743
1744 static rtx
1745 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1746                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1747 {
1748   unsigned int unit;
1749   unsigned int bitsdone = 0;
1750   rtx result = NULL_RTX;
1751   int first = 1;
1752
1753   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1754      much at a time.  */
1755   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1756     unit = BITS_PER_WORD;
1757   else
1758     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1759
1760   while (bitsdone < bitsize)
1761     {
1762       unsigned HOST_WIDE_INT thissize;
1763       rtx part, word;
1764       unsigned HOST_WIDE_INT thispos;
1765       unsigned HOST_WIDE_INT offset;
1766
1767       offset = (bitpos + bitsdone) / unit;
1768       thispos = (bitpos + bitsdone) % unit;
1769
1770       /* THISSIZE must not overrun a word boundary.  Otherwise,
1771          extract_fixed_bit_field will call us again, and we will mutually
1772          recurse forever.  */
1773       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1774       thissize = MIN (thissize, unit - thispos);
1775
1776       /* If OP0 is a register, then handle OFFSET here.
1777
1778          When handling multiword bitfields, extract_bit_field may pass
1779          down a word_mode SUBREG of a larger REG for a bitfield that actually
1780          crosses a word boundary.  Thus, for a SUBREG, we must find
1781          the current word starting from the base register.  */
1782       if (GET_CODE (op0) == SUBREG)
1783         {
1784           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1785           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1786                                         GET_MODE (SUBREG_REG (op0)));
1787           offset = 0;
1788         }
1789       else if (GET_CODE (op0) == REG)
1790         {
1791           word = operand_subword_force (op0, offset, GET_MODE (op0));
1792           offset = 0;
1793         }
1794       else
1795         word = op0;
1796
1797       /* Extract the parts in bit-counting order,
1798          whose meaning is determined by BYTES_PER_UNIT.
1799          OFFSET is in UNITs, and UNIT is in bits.
1800          extract_fixed_bit_field wants offset in bytes.  */
1801       part = extract_fixed_bit_field (word_mode, word,
1802                                       offset * unit / BITS_PER_UNIT,
1803                                       thissize, thispos, 0, 1);
1804       bitsdone += thissize;
1805
1806       /* Shift this part into place for the result.  */
1807       if (BYTES_BIG_ENDIAN)
1808         {
1809           if (bitsize != bitsdone)
1810             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1811                                  build_int_2 (bitsize - bitsdone, 0), 0, 1);
1812         }
1813       else
1814         {
1815           if (bitsdone != thissize)
1816             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1817                                  build_int_2 (bitsdone - thissize, 0), 0, 1);
1818         }
1819
1820       if (first)
1821         result = part;
1822       else
1823         /* Combine the parts with bitwise or.  This works
1824            because we extracted each part as an unsigned bit field.  */
1825         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1826                                OPTAB_LIB_WIDEN);
1827
1828       first = 0;
1829     }
1830
1831   /* Unsigned bit field: we are done.  */
1832   if (unsignedp)
1833     return result;
1834   /* Signed bit field: sign-extend with two arithmetic shifts.  */
1835   result = expand_shift (LSHIFT_EXPR, word_mode, result,
1836                          build_int_2 (BITS_PER_WORD - bitsize, 0),
1837                          NULL_RTX, 0);
1838   return expand_shift (RSHIFT_EXPR, word_mode, result,
1839                        build_int_2 (BITS_PER_WORD - bitsize, 0), NULL_RTX, 0);
1840 }
1841 \f
1842 /* Add INC into TARGET.  */
1843
1844 void
1845 expand_inc (rtx target, rtx inc)
1846 {
1847   rtx value = expand_binop (GET_MODE (target), add_optab,
1848                             target, inc,
1849                             target, 0, OPTAB_LIB_WIDEN);
1850   if (value != target)
1851     emit_move_insn (target, value);
1852 }
1853
1854 /* Subtract DEC from TARGET.  */
1855
1856 void
1857 expand_dec (rtx target, rtx dec)
1858 {
1859   rtx value = expand_binop (GET_MODE (target), sub_optab,
1860                             target, dec,
1861                             target, 0, OPTAB_LIB_WIDEN);
1862   if (value != target)
1863     emit_move_insn (target, value);
1864 }
1865 \f
1866 /* Output a shift instruction for expression code CODE,
1867    with SHIFTED being the rtx for the value to shift,
1868    and AMOUNT the tree for the amount to shift by.
1869    Store the result in the rtx TARGET, if that is convenient.
1870    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
1871    Return the rtx for where the value is.  */
1872
1873 rtx
1874 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
1875               tree amount, rtx target, int unsignedp)
1876 {
1877   rtx op1, temp = 0;
1878   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
1879   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
1880   int try;
1881
1882   /* Previously detected shift-counts computed by NEGATE_EXPR
1883      and shifted in the other direction; but that does not work
1884      on all machines.  */
1885
1886   op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0);
1887
1888 #ifdef SHIFT_COUNT_TRUNCATED
1889   if (SHIFT_COUNT_TRUNCATED)
1890     {
1891       if (GET_CODE (op1) == CONST_INT
1892           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
1893               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
1894         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
1895                        % GET_MODE_BITSIZE (mode));
1896       else if (GET_CODE (op1) == SUBREG
1897                && subreg_lowpart_p (op1))
1898         op1 = SUBREG_REG (op1);
1899     }
1900 #endif
1901
1902   if (op1 == const0_rtx)
1903     return shifted;
1904
1905   for (try = 0; temp == 0 && try < 3; try++)
1906     {
1907       enum optab_methods methods;
1908
1909       if (try == 0)
1910         methods = OPTAB_DIRECT;
1911       else if (try == 1)
1912         methods = OPTAB_WIDEN;
1913       else
1914         methods = OPTAB_LIB_WIDEN;
1915
1916       if (rotate)
1917         {
1918           /* Widening does not work for rotation.  */
1919           if (methods == OPTAB_WIDEN)
1920             continue;
1921           else if (methods == OPTAB_LIB_WIDEN)
1922             {
1923               /* If we have been unable to open-code this by a rotation,
1924                  do it as the IOR of two shifts.  I.e., to rotate A
1925                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
1926                  where C is the bitsize of A.
1927
1928                  It is theoretically possible that the target machine might
1929                  not be able to perform either shift and hence we would
1930                  be making two libcalls rather than just the one for the
1931                  shift (similarly if IOR could not be done).  We will allow
1932                  this extremely unlikely lossage to avoid complicating the
1933                  code below.  */
1934
1935               rtx subtarget = target == shifted ? 0 : target;
1936               rtx temp1;
1937               tree type = TREE_TYPE (amount);
1938               tree new_amount = make_tree (type, op1);
1939               tree other_amount
1940                 = fold (build (MINUS_EXPR, type,
1941                                convert (type,
1942                                         build_int_2 (GET_MODE_BITSIZE (mode),
1943                                                      0)),
1944                                amount));
1945
1946               shifted = force_reg (mode, shifted);
1947
1948               temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
1949                                    mode, shifted, new_amount, subtarget, 1);
1950               temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
1951                                     mode, shifted, other_amount, 0, 1);
1952               return expand_binop (mode, ior_optab, temp, temp1, target,
1953                                    unsignedp, methods);
1954             }
1955
1956           temp = expand_binop (mode,
1957                                left ? rotl_optab : rotr_optab,
1958                                shifted, op1, target, unsignedp, methods);
1959
1960           /* If we don't have the rotate, but we are rotating by a constant
1961              that is in range, try a rotate in the opposite direction.  */
1962
1963           if (temp == 0 && GET_CODE (op1) == CONST_INT
1964               && INTVAL (op1) > 0
1965               && (unsigned int) INTVAL (op1) < GET_MODE_BITSIZE (mode))
1966             temp = expand_binop (mode,
1967                                  left ? rotr_optab : rotl_optab,
1968                                  shifted,
1969                                  GEN_INT (GET_MODE_BITSIZE (mode)
1970                                           - INTVAL (op1)),
1971                                  target, unsignedp, methods);
1972         }
1973       else if (unsignedp)
1974         temp = expand_binop (mode,
1975                              left ? ashl_optab : lshr_optab,
1976                              shifted, op1, target, unsignedp, methods);
1977
1978       /* Do arithmetic shifts.
1979          Also, if we are going to widen the operand, we can just as well
1980          use an arithmetic right-shift instead of a logical one.  */
1981       if (temp == 0 && ! rotate
1982           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
1983         {
1984           enum optab_methods methods1 = methods;
1985
1986           /* If trying to widen a log shift to an arithmetic shift,
1987              don't accept an arithmetic shift of the same size.  */
1988           if (unsignedp)
1989             methods1 = OPTAB_MUST_WIDEN;
1990
1991           /* Arithmetic shift */
1992
1993           temp = expand_binop (mode,
1994                                left ? ashl_optab : ashr_optab,
1995                                shifted, op1, target, unsignedp, methods1);
1996         }
1997
1998       /* We used to try extzv here for logical right shifts, but that was
1999          only useful for one machine, the VAX, and caused poor code
2000          generation there for lshrdi3, so the code was deleted and a
2001          define_expand for lshrsi3 was added to vax.md.  */
2002     }
2003
2004   if (temp == 0)
2005     abort ();
2006   return temp;
2007 }
2008 \f
2009 enum alg_code { alg_zero, alg_m, alg_shift,
2010                   alg_add_t_m2, alg_sub_t_m2,
2011                   alg_add_factor, alg_sub_factor,
2012                   alg_add_t2_m, alg_sub_t2_m,
2013                   alg_add, alg_subtract, alg_factor, alg_shiftop };
2014
2015 /* This structure records a sequence of operations.
2016    `ops' is the number of operations recorded.
2017    `cost' is their total cost.
2018    The operations are stored in `op' and the corresponding
2019    logarithms of the integer coefficients in `log'.
2020
2021    These are the operations:
2022    alg_zero             total := 0;
2023    alg_m                total := multiplicand;
2024    alg_shift            total := total * coeff
2025    alg_add_t_m2         total := total + multiplicand * coeff;
2026    alg_sub_t_m2         total := total - multiplicand * coeff;
2027    alg_add_factor       total := total * coeff + total;
2028    alg_sub_factor       total := total * coeff - total;
2029    alg_add_t2_m         total := total * coeff + multiplicand;
2030    alg_sub_t2_m         total := total * coeff - multiplicand;
2031
2032    The first operand must be either alg_zero or alg_m.  */
2033
2034 struct algorithm
2035 {
2036   short cost;
2037   short ops;
2038   /* The size of the OP and LOG fields are not directly related to the
2039      word size, but the worst-case algorithms will be if we have few
2040      consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2041      In that case we will generate shift-by-2, add, shift-by-2, add,...,
2042      in total wordsize operations.  */
2043   enum alg_code op[MAX_BITS_PER_WORD];
2044   char log[MAX_BITS_PER_WORD];
2045 };
2046
2047 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT, int);
2048 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2049                                                  int, unsigned HOST_WIDE_INT *,
2050                                                  int *, int *);
2051 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2052 /* Compute and return the best algorithm for multiplying by T.
2053    The algorithm must cost less than cost_limit
2054    If retval.cost >= COST_LIMIT, no algorithm was found and all
2055    other field of the returned struct are undefined.  */
2056
2057 static void
2058 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2059             int cost_limit)
2060 {
2061   int m;
2062   struct algorithm *alg_in, *best_alg;
2063   int cost;
2064   unsigned HOST_WIDE_INT q;
2065
2066   /* Indicate that no algorithm is yet found.  If no algorithm
2067      is found, this value will be returned and indicate failure.  */
2068   alg_out->cost = cost_limit;
2069
2070   if (cost_limit <= 0)
2071     return;
2072
2073   /* t == 1 can be done in zero cost.  */
2074   if (t == 1)
2075     {
2076       alg_out->ops = 1;
2077       alg_out->cost = 0;
2078       alg_out->op[0] = alg_m;
2079       return;
2080     }
2081
2082   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2083      fail now.  */
2084   if (t == 0)
2085     {
2086       if (zero_cost >= cost_limit)
2087         return;
2088       else
2089         {
2090           alg_out->ops = 1;
2091           alg_out->cost = zero_cost;
2092           alg_out->op[0] = alg_zero;
2093           return;
2094         }
2095     }
2096
2097   /* We'll be needing a couple extra algorithm structures now.  */
2098
2099   alg_in = alloca (sizeof (struct algorithm));
2100   best_alg = alloca (sizeof (struct algorithm));
2101
2102   /* If we have a group of zero bits at the low-order part of T, try
2103      multiplying by the remaining bits and then doing a shift.  */
2104
2105   if ((t & 1) == 0)
2106     {
2107       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2108       if (m < BITS_PER_WORD)
2109         {
2110           q = t >> m;
2111           cost = shift_cost[m];
2112           synth_mult (alg_in, q, cost_limit - cost);
2113
2114           cost += alg_in->cost;
2115           if (cost < cost_limit)
2116             {
2117               struct algorithm *x;
2118               x = alg_in, alg_in = best_alg, best_alg = x;
2119               best_alg->log[best_alg->ops] = m;
2120               best_alg->op[best_alg->ops] = alg_shift;
2121               cost_limit = cost;
2122             }
2123         }
2124     }
2125
2126   /* If we have an odd number, add or subtract one.  */
2127   if ((t & 1) != 0)
2128     {
2129       unsigned HOST_WIDE_INT w;
2130
2131       for (w = 1; (w & t) != 0; w <<= 1)
2132         ;
2133       /* If T was -1, then W will be zero after the loop.  This is another
2134          case where T ends with ...111.  Handling this with (T + 1) and
2135          subtract 1 produces slightly better code and results in algorithm
2136          selection much faster than treating it like the ...0111 case
2137          below.  */
2138       if (w == 0
2139           || (w > 2
2140               /* Reject the case where t is 3.
2141                  Thus we prefer addition in that case.  */
2142               && t != 3))
2143         {
2144           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2145
2146           cost = add_cost;
2147           synth_mult (alg_in, t + 1, cost_limit - cost);
2148
2149           cost += alg_in->cost;
2150           if (cost < cost_limit)
2151             {
2152               struct algorithm *x;
2153               x = alg_in, alg_in = best_alg, best_alg = x;
2154               best_alg->log[best_alg->ops] = 0;
2155               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2156               cost_limit = cost;
2157             }
2158         }
2159       else
2160         {
2161           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2162
2163           cost = add_cost;
2164           synth_mult (alg_in, t - 1, cost_limit - cost);
2165
2166           cost += alg_in->cost;
2167           if (cost < cost_limit)
2168             {
2169               struct algorithm *x;
2170               x = alg_in, alg_in = best_alg, best_alg = x;
2171               best_alg->log[best_alg->ops] = 0;
2172               best_alg->op[best_alg->ops] = alg_add_t_m2;
2173               cost_limit = cost;
2174             }
2175         }
2176     }
2177
2178   /* Look for factors of t of the form
2179      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2180      If we find such a factor, we can multiply by t using an algorithm that
2181      multiplies by q, shift the result by m and add/subtract it to itself.
2182
2183      We search for large factors first and loop down, even if large factors
2184      are less probable than small; if we find a large factor we will find a
2185      good sequence quickly, and therefore be able to prune (by decreasing
2186      COST_LIMIT) the search.  */
2187
2188   for (m = floor_log2 (t - 1); m >= 2; m--)
2189     {
2190       unsigned HOST_WIDE_INT d;
2191
2192       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2193       if (t % d == 0 && t > d && m < BITS_PER_WORD)
2194         {
2195           cost = MIN (shiftadd_cost[m], add_cost + shift_cost[m]);
2196           synth_mult (alg_in, t / d, cost_limit - cost);
2197
2198           cost += alg_in->cost;
2199           if (cost < cost_limit)
2200             {
2201               struct algorithm *x;
2202               x = alg_in, alg_in = best_alg, best_alg = x;
2203               best_alg->log[best_alg->ops] = m;
2204               best_alg->op[best_alg->ops] = alg_add_factor;
2205               cost_limit = cost;
2206             }
2207           /* Other factors will have been taken care of in the recursion.  */
2208           break;
2209         }
2210
2211       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2212       if (t % d == 0 && t > d && m < BITS_PER_WORD)
2213         {
2214           cost = MIN (shiftsub_cost[m], add_cost + shift_cost[m]);
2215           synth_mult (alg_in, t / d, cost_limit - cost);
2216
2217           cost += alg_in->cost;
2218           if (cost < cost_limit)
2219             {
2220               struct algorithm *x;
2221               x = alg_in, alg_in = best_alg, best_alg = x;
2222               best_alg->log[best_alg->ops] = m;
2223               best_alg->op[best_alg->ops] = alg_sub_factor;
2224               cost_limit = cost;
2225             }
2226           break;
2227         }
2228     }
2229
2230   /* Try shift-and-add (load effective address) instructions,
2231      i.e. do a*3, a*5, a*9.  */
2232   if ((t & 1) != 0)
2233     {
2234       q = t - 1;
2235       q = q & -q;
2236       m = exact_log2 (q);
2237       if (m >= 0 && m < BITS_PER_WORD)
2238         {
2239           cost = shiftadd_cost[m];
2240           synth_mult (alg_in, (t - 1) >> m, cost_limit - cost);
2241
2242           cost += alg_in->cost;
2243           if (cost < cost_limit)
2244             {
2245               struct algorithm *x;
2246               x = alg_in, alg_in = best_alg, best_alg = x;
2247               best_alg->log[best_alg->ops] = m;
2248               best_alg->op[best_alg->ops] = alg_add_t2_m;
2249               cost_limit = cost;
2250             }
2251         }
2252
2253       q = t + 1;
2254       q = q & -q;
2255       m = exact_log2 (q);
2256       if (m >= 0 && m < BITS_PER_WORD)
2257         {
2258           cost = shiftsub_cost[m];
2259           synth_mult (alg_in, (t + 1) >> m, cost_limit - cost);
2260
2261           cost += alg_in->cost;
2262           if (cost < cost_limit)
2263             {
2264               struct algorithm *x;
2265               x = alg_in, alg_in = best_alg, best_alg = x;
2266               best_alg->log[best_alg->ops] = m;
2267               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2268               cost_limit = cost;
2269             }
2270         }
2271     }
2272
2273   /* If cost_limit has not decreased since we stored it in alg_out->cost,
2274      we have not found any algorithm.  */
2275   if (cost_limit == alg_out->cost)
2276     return;
2277
2278   /* If we are getting a too long sequence for `struct algorithm'
2279      to record, make this search fail.  */
2280   if (best_alg->ops == MAX_BITS_PER_WORD)
2281     return;
2282
2283   /* Copy the algorithm from temporary space to the space at alg_out.
2284      We avoid using structure assignment because the majority of
2285      best_alg is normally undefined, and this is a critical function.  */
2286   alg_out->ops = best_alg->ops + 1;
2287   alg_out->cost = cost_limit;
2288   memcpy (alg_out->op, best_alg->op,
2289           alg_out->ops * sizeof *alg_out->op);
2290   memcpy (alg_out->log, best_alg->log,
2291           alg_out->ops * sizeof *alg_out->log);
2292 }
2293 \f
2294 /* Perform a multiplication and return an rtx for the result.
2295    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
2296    TARGET is a suggestion for where to store the result (an rtx).
2297
2298    We check specially for a constant integer as OP1.
2299    If you want this check for OP0 as well, then before calling
2300    you should swap the two operands if OP0 would be constant.  */
2301
2302 rtx
2303 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
2304              int unsignedp)
2305 {
2306   rtx const_op1 = op1;
2307
2308   /* synth_mult does an `unsigned int' multiply.  As long as the mode is
2309      less than or equal in size to `unsigned int' this doesn't matter.
2310      If the mode is larger than `unsigned int', then synth_mult works only
2311      if the constant value exactly fits in an `unsigned int' without any
2312      truncation.  This means that multiplying by negative values does
2313      not work; results are off by 2^32 on a 32 bit machine.  */
2314
2315   /* If we are multiplying in DImode, it may still be a win
2316      to try to work with shifts and adds.  */
2317   if (GET_CODE (op1) == CONST_DOUBLE
2318       && GET_MODE_CLASS (GET_MODE (op1)) == MODE_INT
2319       && HOST_BITS_PER_INT >= BITS_PER_WORD
2320       && CONST_DOUBLE_HIGH (op1) == 0)
2321     const_op1 = GEN_INT (CONST_DOUBLE_LOW (op1));
2322   else if (HOST_BITS_PER_INT < GET_MODE_BITSIZE (mode)
2323            && GET_CODE (op1) == CONST_INT
2324            && INTVAL (op1) < 0)
2325     const_op1 = 0;
2326
2327   /* We used to test optimize here, on the grounds that it's better to
2328      produce a smaller program when -O is not used.
2329      But this causes such a terrible slowdown sometimes
2330      that it seems better to use synth_mult always.  */
2331
2332   if (const_op1 && GET_CODE (const_op1) == CONST_INT
2333       && (unsignedp || ! flag_trapv))
2334     {
2335       struct algorithm alg;
2336       struct algorithm alg2;
2337       HOST_WIDE_INT val = INTVAL (op1);
2338       HOST_WIDE_INT val_so_far;
2339       rtx insn;
2340       int mult_cost;
2341       enum {basic_variant, negate_variant, add_variant} variant = basic_variant;
2342
2343       /* op0 must be register to make mult_cost match the precomputed
2344          shiftadd_cost array.  */
2345       op0 = force_reg (mode, op0);
2346
2347       /* Try to do the computation three ways: multiply by the negative of OP1
2348          and then negate, do the multiplication directly, or do multiplication
2349          by OP1 - 1.  */
2350
2351       mult_cost = rtx_cost (gen_rtx_MULT (mode, op0, op1), SET);
2352       mult_cost = MIN (12 * add_cost, mult_cost);
2353
2354       synth_mult (&alg, val, mult_cost);
2355
2356       /* This works only if the inverted value actually fits in an
2357          `unsigned int' */
2358       if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2359         {
2360           synth_mult (&alg2, - val,
2361                       (alg.cost < mult_cost ? alg.cost : mult_cost) - negate_cost);
2362           if (alg2.cost + negate_cost < alg.cost)
2363             alg = alg2, variant = negate_variant;
2364         }
2365
2366       /* This proves very useful for division-by-constant.  */
2367       synth_mult (&alg2, val - 1,
2368                   (alg.cost < mult_cost ? alg.cost : mult_cost) - add_cost);
2369       if (alg2.cost + add_cost < alg.cost)
2370         alg = alg2, variant = add_variant;
2371
2372       if (alg.cost < mult_cost)
2373         {
2374           /* We found something cheaper than a multiply insn.  */
2375           int opno;
2376           rtx accum, tem;
2377           enum machine_mode nmode;
2378
2379           op0 = protect_from_queue (op0, 0);
2380
2381           /* Avoid referencing memory over and over.
2382              For speed, but also for correctness when mem is volatile.  */
2383           if (GET_CODE (op0) == MEM)
2384             op0 = force_reg (mode, op0);
2385
2386           /* ACCUM starts out either as OP0 or as a zero, depending on
2387              the first operation.  */
2388
2389           if (alg.op[0] == alg_zero)
2390             {
2391               accum = copy_to_mode_reg (mode, const0_rtx);
2392               val_so_far = 0;
2393             }
2394           else if (alg.op[0] == alg_m)
2395             {
2396               accum = copy_to_mode_reg (mode, op0);
2397               val_so_far = 1;
2398             }
2399           else
2400             abort ();
2401
2402           for (opno = 1; opno < alg.ops; opno++)
2403             {
2404               int log = alg.log[opno];
2405               int preserve = preserve_subexpressions_p ();
2406               rtx shift_subtarget = preserve ? 0 : accum;
2407               rtx add_target
2408                 = (opno == alg.ops - 1 && target != 0 && variant != add_variant
2409                    && ! preserve)
2410                   ? target : 0;
2411               rtx accum_target = preserve ? 0 : accum;
2412
2413               switch (alg.op[opno])
2414                 {
2415                 case alg_shift:
2416                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2417                                         build_int_2 (log, 0), NULL_RTX, 0);
2418                   val_so_far <<= log;
2419                   break;
2420
2421                 case alg_add_t_m2:
2422                   tem = expand_shift (LSHIFT_EXPR, mode, op0,
2423                                       build_int_2 (log, 0), NULL_RTX, 0);
2424                   accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2425                                          add_target
2426                                          ? add_target : accum_target);
2427                   val_so_far += (HOST_WIDE_INT) 1 << log;
2428                   break;
2429
2430                 case alg_sub_t_m2:
2431                   tem = expand_shift (LSHIFT_EXPR, mode, op0,
2432                                       build_int_2 (log, 0), NULL_RTX, 0);
2433                   accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2434                                          add_target
2435                                          ? add_target : accum_target);
2436                   val_so_far -= (HOST_WIDE_INT) 1 << log;
2437                   break;
2438
2439                 case alg_add_t2_m:
2440                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2441                                         build_int_2 (log, 0), shift_subtarget,
2442                                         0);
2443                   accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2444                                          add_target
2445                                          ? add_target : accum_target);
2446                   val_so_far = (val_so_far << log) + 1;
2447                   break;
2448
2449                 case alg_sub_t2_m:
2450                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2451                                         build_int_2 (log, 0), shift_subtarget,
2452                                         0);
2453                   accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2454                                          add_target
2455                                          ? add_target : accum_target);
2456                   val_so_far = (val_so_far << log) - 1;
2457                   break;
2458
2459                 case alg_add_factor:
2460                   tem = expand_shift (LSHIFT_EXPR, mode, accum,
2461                                       build_int_2 (log, 0), NULL_RTX, 0);
2462                   accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2463                                          add_target
2464                                          ? add_target : accum_target);
2465                   val_so_far += val_so_far << log;
2466                   break;
2467
2468                 case alg_sub_factor:
2469                   tem = expand_shift (LSHIFT_EXPR, mode, accum,
2470                                       build_int_2 (log, 0), NULL_RTX, 0);
2471                   accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2472                                          (add_target ? add_target
2473                                           : preserve ? 0 : tem));
2474                   val_so_far = (val_so_far << log) - val_so_far;
2475                   break;
2476
2477                 default:
2478                   abort ();
2479                 }
2480
2481               /* Write a REG_EQUAL note on the last insn so that we can cse
2482                  multiplication sequences.  Note that if ACCUM is a SUBREG,
2483                  we've set the inner register and must properly indicate
2484                  that.  */
2485
2486               tem = op0, nmode = mode;
2487               if (GET_CODE (accum) == SUBREG)
2488                 {
2489                   nmode = GET_MODE (SUBREG_REG (accum));
2490                   tem = gen_lowpart (nmode, op0);
2491                 }
2492
2493               insn = get_last_insn ();
2494               set_unique_reg_note (insn,
2495                                    REG_EQUAL,
2496                                    gen_rtx_MULT (nmode, tem,
2497                                                  GEN_INT (val_so_far)));
2498             }
2499
2500           if (variant == negate_variant)
2501             {
2502               val_so_far = - val_so_far;
2503               accum = expand_unop (mode, neg_optab, accum, target, 0);
2504             }
2505           else if (variant == add_variant)
2506             {
2507               val_so_far = val_so_far + 1;
2508               accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
2509             }
2510
2511           if (val != val_so_far)
2512             abort ();
2513
2514           return accum;
2515         }
2516     }
2517
2518   if (GET_CODE (op0) == CONST_DOUBLE)
2519     {
2520       rtx temp = op0;
2521       op0 = op1;
2522       op1 = temp;
2523     }
2524
2525   /* Expand x*2.0 as x+x.  */
2526   if (GET_CODE (op1) == CONST_DOUBLE
2527       && GET_MODE_CLASS (mode) == MODE_FLOAT)
2528     {
2529       REAL_VALUE_TYPE d;
2530       REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
2531
2532       if (REAL_VALUES_EQUAL (d, dconst2))
2533         {
2534           op0 = force_reg (GET_MODE (op0), op0);
2535           return expand_binop (mode, add_optab, op0, op0,
2536                                target, unsignedp, OPTAB_LIB_WIDEN);
2537         }
2538     }
2539
2540   /* This used to use umul_optab if unsigned, but for non-widening multiply
2541      there is no difference between signed and unsigned.  */
2542   op0 = expand_binop (mode,
2543                       ! unsignedp
2544                       && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
2545                       ? smulv_optab : smul_optab,
2546                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
2547   if (op0 == 0)
2548     abort ();
2549   return op0;
2550 }
2551 \f
2552 /* Return the smallest n such that 2**n >= X.  */
2553
2554 int
2555 ceil_log2 (unsigned HOST_WIDE_INT x)
2556 {
2557   return floor_log2 (x - 1) + 1;
2558 }
2559
2560 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
2561    replace division by D, and put the least significant N bits of the result
2562    in *MULTIPLIER_PTR and return the most significant bit.
2563
2564    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
2565    needed precision is in PRECISION (should be <= N).
2566
2567    PRECISION should be as small as possible so this function can choose
2568    multiplier more freely.
2569
2570    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
2571    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
2572
2573    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
2574    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
2575
2576 static
2577 unsigned HOST_WIDE_INT
2578 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
2579                    unsigned HOST_WIDE_INT *multiplier_ptr,
2580                    int *post_shift_ptr, int *lgup_ptr)
2581 {
2582   HOST_WIDE_INT mhigh_hi, mlow_hi;
2583   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
2584   int lgup, post_shift;
2585   int pow, pow2;
2586   unsigned HOST_WIDE_INT nl, dummy1;
2587   HOST_WIDE_INT nh, dummy2;
2588
2589   /* lgup = ceil(log2(divisor)); */
2590   lgup = ceil_log2 (d);
2591
2592   if (lgup > n)
2593     abort ();
2594
2595   pow = n + lgup;
2596   pow2 = n + lgup - precision;
2597
2598   if (pow == 2 * HOST_BITS_PER_WIDE_INT)
2599     {
2600       /* We could handle this with some effort, but this case is much better
2601          handled directly with a scc insn, so rely on caller using that.  */
2602       abort ();
2603     }
2604
2605   /* mlow = 2^(N + lgup)/d */
2606  if (pow >= HOST_BITS_PER_WIDE_INT)
2607     {
2608       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
2609       nl = 0;
2610     }
2611   else
2612     {
2613       nh = 0;
2614       nl = (unsigned HOST_WIDE_INT) 1 << pow;
2615     }
2616   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2617                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
2618
2619   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
2620   if (pow2 >= HOST_BITS_PER_WIDE_INT)
2621     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
2622   else
2623     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
2624   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2625                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
2626
2627   if (mhigh_hi && nh - d >= d)
2628     abort ();
2629   if (mhigh_hi > 1 || mlow_hi > 1)
2630     abort ();
2631   /* assert that mlow < mhigh.  */
2632   if (! (mlow_hi < mhigh_hi || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo)))
2633     abort ();
2634
2635   /* If precision == N, then mlow, mhigh exceed 2^N
2636      (but they do not exceed 2^(N+1)).  */
2637
2638   /* Reduce to lowest terms.  */
2639   for (post_shift = lgup; post_shift > 0; post_shift--)
2640     {
2641       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
2642       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
2643       if (ml_lo >= mh_lo)
2644         break;
2645
2646       mlow_hi = 0;
2647       mlow_lo = ml_lo;
2648       mhigh_hi = 0;
2649       mhigh_lo = mh_lo;
2650     }
2651
2652   *post_shift_ptr = post_shift;
2653   *lgup_ptr = lgup;
2654   if (n < HOST_BITS_PER_WIDE_INT)
2655     {
2656       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
2657       *multiplier_ptr = mhigh_lo & mask;
2658       return mhigh_lo >= mask;
2659     }
2660   else
2661     {
2662       *multiplier_ptr = mhigh_lo;
2663       return mhigh_hi;
2664     }
2665 }
2666
2667 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
2668    congruent to 1 (mod 2**N).  */
2669
2670 static unsigned HOST_WIDE_INT
2671 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
2672 {
2673   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
2674
2675   /* The algorithm notes that the choice y = x satisfies
2676      x*y == 1 mod 2^3, since x is assumed odd.
2677      Each iteration doubles the number of bits of significance in y.  */
2678
2679   unsigned HOST_WIDE_INT mask;
2680   unsigned HOST_WIDE_INT y = x;
2681   int nbit = 3;
2682
2683   mask = (n == HOST_BITS_PER_WIDE_INT
2684           ? ~(unsigned HOST_WIDE_INT) 0
2685           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
2686
2687   while (nbit < n)
2688     {
2689       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
2690       nbit *= 2;
2691     }
2692   return y;
2693 }
2694
2695 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
2696    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
2697    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
2698    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
2699    become signed.
2700
2701    The result is put in TARGET if that is convenient.
2702
2703    MODE is the mode of operation.  */
2704
2705 rtx
2706 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
2707                              rtx op1, rtx target, int unsignedp)
2708 {
2709   rtx tem;
2710   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
2711
2712   tem = expand_shift (RSHIFT_EXPR, mode, op0,
2713                       build_int_2 (GET_MODE_BITSIZE (mode) - 1, 0),
2714                       NULL_RTX, 0);
2715   tem = expand_and (mode, tem, op1, NULL_RTX);
2716   adj_operand
2717     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
2718                      adj_operand);
2719
2720   tem = expand_shift (RSHIFT_EXPR, mode, op1,
2721                       build_int_2 (GET_MODE_BITSIZE (mode) - 1, 0),
2722                       NULL_RTX, 0);
2723   tem = expand_and (mode, tem, op0, NULL_RTX);
2724   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
2725                           target);
2726
2727   return target;
2728 }
2729
2730 /* Emit code to multiply OP0 and CNST1, putting the high half of the result
2731    in TARGET if that is convenient, and return where the result is.  If the
2732    operation can not be performed, 0 is returned.
2733
2734    MODE is the mode of operation and result.
2735
2736    UNSIGNEDP nonzero means unsigned multiply.
2737
2738    MAX_COST is the total allowed cost for the expanded RTL.  */
2739
2740 rtx
2741 expand_mult_highpart (enum machine_mode mode, rtx op0,
2742                       unsigned HOST_WIDE_INT cnst1, rtx target,
2743                       int unsignedp, int max_cost)
2744 {
2745   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
2746   optab mul_highpart_optab;
2747   optab moptab;
2748   rtx tem;
2749   int size = GET_MODE_BITSIZE (mode);
2750   rtx op1, wide_op1;
2751
2752   /* We can't support modes wider than HOST_BITS_PER_INT.  */
2753   if (size > HOST_BITS_PER_WIDE_INT)
2754     abort ();
2755
2756   op1 = gen_int_mode (cnst1, mode);
2757
2758   wide_op1
2759     = immed_double_const (cnst1,
2760                           (unsignedp
2761                            ? (HOST_WIDE_INT) 0
2762                            : -(cnst1 >> (HOST_BITS_PER_WIDE_INT - 1))),
2763                           wider_mode);
2764
2765   /* expand_mult handles constant multiplication of word_mode
2766      or narrower.  It does a poor job for large modes.  */
2767   if (size < BITS_PER_WORD
2768       && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
2769     {
2770       /* We have to do this, since expand_binop doesn't do conversion for
2771          multiply.  Maybe change expand_binop to handle widening multiply?  */
2772       op0 = convert_to_mode (wider_mode, op0, unsignedp);
2773
2774       /* We know that this can't have signed overflow, so pretend this is
2775          an unsigned multiply.  */
2776       tem = expand_mult (wider_mode, op0, wide_op1, NULL_RTX, 0);
2777       tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2778                           build_int_2 (size, 0), NULL_RTX, 1);
2779       return convert_modes (mode, wider_mode, tem, unsignedp);
2780     }
2781
2782   if (target == 0)
2783     target = gen_reg_rtx (mode);
2784
2785   /* Firstly, try using a multiplication insn that only generates the needed
2786      high part of the product, and in the sign flavor of unsignedp.  */
2787   if (mul_highpart_cost[(int) mode] < max_cost)
2788     {
2789       mul_highpart_optab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
2790       target = expand_binop (mode, mul_highpart_optab,
2791                              op0, op1, target, unsignedp, OPTAB_DIRECT);
2792       if (target)
2793         return target;
2794     }
2795
2796   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
2797      Need to adjust the result after the multiplication.  */
2798   if (size - 1 < BITS_PER_WORD
2799       && (mul_highpart_cost[(int) mode] + 2 * shift_cost[size-1] + 4 * add_cost
2800           < max_cost))
2801     {
2802       mul_highpart_optab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
2803       target = expand_binop (mode, mul_highpart_optab,
2804                              op0, op1, target, unsignedp, OPTAB_DIRECT);
2805       if (target)
2806         /* We used the wrong signedness.  Adjust the result.  */
2807         return expand_mult_highpart_adjust (mode, target, op0,
2808                                             op1, target, unsignedp);
2809     }
2810
2811   /* Try widening multiplication.  */
2812   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
2813   if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2814       && mul_widen_cost[(int) wider_mode] < max_cost)
2815     {
2816       op1 = force_reg (mode, op1);
2817       goto try;
2818     }
2819
2820   /* Try widening the mode and perform a non-widening multiplication.  */
2821   moptab = smul_optab;
2822   if (smul_optab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2823       && size - 1 < BITS_PER_WORD
2824       && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
2825     {
2826       op1 = wide_op1;
2827       goto try;
2828     }
2829
2830   /* Try widening multiplication of opposite signedness, and adjust.  */
2831   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
2832   if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2833       && size - 1 < BITS_PER_WORD
2834       && (mul_widen_cost[(int) wider_mode]
2835           + 2 * shift_cost[size-1] + 4 * add_cost < max_cost))
2836     {
2837       rtx regop1 = force_reg (mode, op1);
2838       tem = expand_binop (wider_mode, moptab, op0, regop1,
2839                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
2840       if (tem != 0)
2841         {
2842           /* Extract the high half of the just generated product.  */
2843           tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2844                               build_int_2 (size, 0), NULL_RTX, 1);
2845           tem = convert_modes (mode, wider_mode, tem, unsignedp);
2846           /* We used the wrong signedness.  Adjust the result.  */
2847           return expand_mult_highpart_adjust (mode, tem, op0, op1,
2848                                               target, unsignedp);
2849         }
2850     }
2851
2852   return 0;
2853
2854  try:
2855   /* Pass NULL_RTX as target since TARGET has wrong mode.  */
2856   tem = expand_binop (wider_mode, moptab, op0, op1,
2857                       NULL_RTX, unsignedp, OPTAB_WIDEN);
2858   if (tem == 0)
2859     return 0;
2860
2861   /* Extract the high half of the just generated product.  */
2862   if (mode == word_mode)
2863     {
2864       return gen_highpart (mode, tem);
2865     }
2866   else
2867     {
2868       tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2869                           build_int_2 (size, 0), NULL_RTX, 1);
2870       return convert_modes (mode, wider_mode, tem, unsignedp);
2871     }
2872 }
2873 \f
2874 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
2875    if that is convenient, and returning where the result is.
2876    You may request either the quotient or the remainder as the result;
2877    specify REM_FLAG nonzero to get the remainder.
2878
2879    CODE is the expression code for which kind of division this is;
2880    it controls how rounding is done.  MODE is the machine mode to use.
2881    UNSIGNEDP nonzero means do unsigned division.  */
2882
2883 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
2884    and then correct it by or'ing in missing high bits
2885    if result of ANDI is nonzero.
2886    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
2887    This could optimize to a bfexts instruction.
2888    But C doesn't use these operations, so their optimizations are
2889    left for later.  */
2890 /* ??? For modulo, we don't actually need the highpart of the first product,
2891    the low part will do nicely.  And for small divisors, the second multiply
2892    can also be a low-part only multiply or even be completely left out.
2893    E.g. to calculate the remainder of a division by 3 with a 32 bit
2894    multiply, multiply with 0x55555556 and extract the upper two bits;
2895    the result is exact for inputs up to 0x1fffffff.
2896    The input range can be reduced by using cross-sum rules.
2897    For odd divisors >= 3, the following table gives right shift counts
2898    so that if a number is shifted by an integer multiple of the given
2899    amount, the remainder stays the same:
2900    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
2901    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
2902    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
2903    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
2904    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
2905
2906    Cross-sum rules for even numbers can be derived by leaving as many bits
2907    to the right alone as the divisor has zeros to the right.
2908    E.g. if x is an unsigned 32 bit number:
2909    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
2910    */
2911
2912 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
2913
2914 rtx
2915 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
2916                rtx op0, rtx op1, rtx target, int unsignedp)
2917 {
2918   enum machine_mode compute_mode;
2919   rtx tquotient;
2920   rtx quotient = 0, remainder = 0;
2921   rtx last;
2922   int size;
2923   rtx insn, set;
2924   optab optab1, optab2;
2925   int op1_is_constant, op1_is_pow2 = 0;
2926   int max_cost, extra_cost;
2927   static HOST_WIDE_INT last_div_const = 0;
2928   static HOST_WIDE_INT ext_op1;
2929
2930   op1_is_constant = GET_CODE (op1) == CONST_INT;
2931   if (op1_is_constant)
2932     {
2933       ext_op1 = INTVAL (op1);
2934       if (unsignedp)
2935         ext_op1 &= GET_MODE_MASK (mode);
2936       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
2937                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
2938     }
2939
2940   /*
2941      This is the structure of expand_divmod:
2942
2943      First comes code to fix up the operands so we can perform the operations
2944      correctly and efficiently.
2945
2946      Second comes a switch statement with code specific for each rounding mode.
2947      For some special operands this code emits all RTL for the desired
2948      operation, for other cases, it generates only a quotient and stores it in
2949      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
2950      to indicate that it has not done anything.
2951
2952      Last comes code that finishes the operation.  If QUOTIENT is set and
2953      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
2954      QUOTIENT is not set, it is computed using trunc rounding.
2955
2956      We try to generate special code for division and remainder when OP1 is a
2957      constant.  If |OP1| = 2**n we can use shifts and some other fast
2958      operations.  For other values of OP1, we compute a carefully selected
2959      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
2960      by m.
2961
2962      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
2963      half of the product.  Different strategies for generating the product are
2964      implemented in expand_mult_highpart.
2965
2966      If what we actually want is the remainder, we generate that by another
2967      by-constant multiplication and a subtraction.  */
2968
2969   /* We shouldn't be called with OP1 == const1_rtx, but some of the
2970      code below will malfunction if we are, so check here and handle
2971      the special case if so.  */
2972   if (op1 == const1_rtx)
2973     return rem_flag ? const0_rtx : op0;
2974
2975     /* When dividing by -1, we could get an overflow.
2976      negv_optab can handle overflows.  */
2977   if (! unsignedp && op1 == constm1_rtx)
2978     {
2979       if (rem_flag)
2980         return const0_rtx;
2981       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
2982                           ? negv_optab : neg_optab, op0, target, 0);
2983     }
2984
2985   if (target
2986       /* Don't use the function value register as a target
2987          since we have to read it as well as write it,
2988          and function-inlining gets confused by this.  */
2989       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
2990           /* Don't clobber an operand while doing a multi-step calculation.  */
2991           || ((rem_flag || op1_is_constant)
2992               && (reg_mentioned_p (target, op0)
2993                   || (GET_CODE (op0) == MEM && GET_CODE (target) == MEM)))
2994           || reg_mentioned_p (target, op1)
2995           || (GET_CODE (op1) == MEM && GET_CODE (target) == MEM)))
2996     target = 0;
2997
2998   /* Get the mode in which to perform this computation.  Normally it will
2999      be MODE, but sometimes we can't do the desired operation in MODE.
3000      If so, pick a wider mode in which we can do the operation.  Convert
3001      to that mode at the start to avoid repeated conversions.
3002
3003      First see what operations we need.  These depend on the expression
3004      we are evaluating.  (We assume that divxx3 insns exist under the
3005      same conditions that modxx3 insns and that these insns don't normally
3006      fail.  If these assumptions are not correct, we may generate less
3007      efficient code in some cases.)
3008
3009      Then see if we find a mode in which we can open-code that operation
3010      (either a division, modulus, or shift).  Finally, check for the smallest
3011      mode for which we can do the operation with a library call.  */
3012
3013   /* We might want to refine this now that we have division-by-constant
3014      optimization.  Since expand_mult_highpart tries so many variants, it is
3015      not straightforward to generalize this.  Maybe we should make an array
3016      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3017
3018   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3019             ? (unsignedp ? lshr_optab : ashr_optab)
3020             : (unsignedp ? udiv_optab : sdiv_optab));
3021   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3022             ? optab1
3023             : (unsignedp ? udivmod_optab : sdivmod_optab));
3024
3025   for (compute_mode = mode; compute_mode != VOIDmode;
3026        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3027     if (optab1->handlers[(int) compute_mode].insn_code != CODE_FOR_nothing
3028         || optab2->handlers[(int) compute_mode].insn_code != CODE_FOR_nothing)
3029       break;
3030
3031   if (compute_mode == VOIDmode)
3032     for (compute_mode = mode; compute_mode != VOIDmode;
3033          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3034       if (optab1->handlers[(int) compute_mode].libfunc
3035           || optab2->handlers[(int) compute_mode].libfunc)
3036         break;
3037
3038   /* If we still couldn't find a mode, use MODE, but we'll probably abort
3039      in expand_binop.  */
3040   if (compute_mode == VOIDmode)
3041     compute_mode = mode;
3042
3043   if (target && GET_MODE (target) == compute_mode)
3044     tquotient = target;
3045   else
3046     tquotient = gen_reg_rtx (compute_mode);
3047
3048   size = GET_MODE_BITSIZE (compute_mode);
3049 #if 0
3050   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3051      (mode), and thereby get better code when OP1 is a constant.  Do that
3052      later.  It will require going over all usages of SIZE below.  */
3053   size = GET_MODE_BITSIZE (mode);
3054 #endif
3055
3056   /* Only deduct something for a REM if the last divide done was
3057      for a different constant.   Then set the constant of the last
3058      divide.  */
3059   max_cost = div_cost[(int) compute_mode]
3060     - (rem_flag && ! (last_div_const != 0 && op1_is_constant
3061                       && INTVAL (op1) == last_div_const)
3062        ? mul_cost[(int) compute_mode] + add_cost : 0);
3063
3064   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3065
3066   /* Now convert to the best mode to use.  */
3067   if (compute_mode != mode)
3068     {
3069       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3070       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3071
3072       /* convert_modes may have placed op1 into a register, so we
3073          must recompute the following.  */
3074       op1_is_constant = GET_CODE (op1) == CONST_INT;
3075       op1_is_pow2 = (op1_is_constant
3076                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3077                           || (! unsignedp
3078                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3079     }
3080
3081   /* If one of the operands is a volatile MEM, copy it into a register.  */
3082
3083   if (GET_CODE (op0) == MEM && MEM_VOLATILE_P (op0))
3084     op0 = force_reg (compute_mode, op0);
3085   if (GET_CODE (op1) == MEM && MEM_VOLATILE_P (op1))
3086     op1 = force_reg (compute_mode, op1);
3087
3088   /* If we need the remainder or if OP1 is constant, we need to
3089      put OP0 in a register in case it has any queued subexpressions.  */
3090   if (rem_flag || op1_is_constant)
3091     op0 = force_reg (compute_mode, op0);
3092
3093   last = get_last_insn ();
3094
3095   /* Promote floor rounding to trunc rounding for unsigned operations.  */
3096   if (unsignedp)
3097     {
3098       if (code == FLOOR_DIV_EXPR)
3099         code = TRUNC_DIV_EXPR;
3100       if (code == FLOOR_MOD_EXPR)
3101         code = TRUNC_MOD_EXPR;
3102       if (code == EXACT_DIV_EXPR && op1_is_pow2)
3103         code = TRUNC_DIV_EXPR;
3104     }
3105
3106   if (op1 != const0_rtx)
3107     switch (code)
3108       {
3109       case TRUNC_MOD_EXPR:
3110       case TRUNC_DIV_EXPR:
3111         if (op1_is_constant)
3112           {
3113             if (unsignedp)
3114               {
3115                 unsigned HOST_WIDE_INT mh, ml;
3116                 int pre_shift, post_shift;
3117                 int dummy;
3118                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
3119                                             & GET_MODE_MASK (compute_mode));
3120
3121                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3122                   {
3123                     pre_shift = floor_log2 (d);
3124                     if (rem_flag)
3125                       {
3126                         remainder
3127                           = expand_binop (compute_mode, and_optab, op0,
3128                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3129                                           remainder, 1,
3130                                           OPTAB_LIB_WIDEN);
3131                         if (remainder)
3132                           return gen_lowpart (mode, remainder);
3133                       }
3134                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3135                                              build_int_2 (pre_shift, 0),
3136                                              tquotient, 1);
3137                   }
3138                 else if (size <= HOST_BITS_PER_WIDE_INT)
3139                   {
3140                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
3141                       {
3142                         /* Most significant bit of divisor is set; emit an scc
3143                            insn.  */
3144                         quotient = emit_store_flag (tquotient, GEU, op0, op1,
3145                                                     compute_mode, 1, 1);
3146                         if (quotient == 0)
3147                           goto fail1;
3148                       }
3149                     else
3150                       {
3151                         /* Find a suitable multiplier and right shift count
3152                            instead of multiplying with D.  */
3153
3154                         mh = choose_multiplier (d, size, size,
3155                                                 &ml, &post_shift, &dummy);
3156
3157                         /* If the suggested multiplier is more than SIZE bits,
3158                            we can do better for even divisors, using an
3159                            initial right shift.  */
3160                         if (mh != 0 && (d & 1) == 0)
3161                           {
3162                             pre_shift = floor_log2 (d & -d);
3163                             mh = choose_multiplier (d >> pre_shift, size,
3164                                                     size - pre_shift,
3165                                                     &ml, &post_shift, &dummy);
3166                             if (mh)
3167                               abort ();
3168                           }
3169                         else
3170                           pre_shift = 0;
3171
3172                         if (mh != 0)
3173                           {
3174                             rtx t1, t2, t3, t4;
3175
3176                             if (post_shift - 1 >= BITS_PER_WORD)
3177                               goto fail1;
3178
3179                             extra_cost = (shift_cost[post_shift - 1]
3180                                           + shift_cost[1] + 2 * add_cost);
3181                             t1 = expand_mult_highpart (compute_mode, op0, ml,
3182                                                        NULL_RTX, 1,
3183                                                        max_cost - extra_cost);
3184                             if (t1 == 0)
3185                               goto fail1;
3186                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
3187                                                                op0, t1),
3188                                                 NULL_RTX);
3189                             t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3190                                                build_int_2 (1, 0), NULL_RTX,1);
3191                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
3192                                                               t1, t3),
3193                                                 NULL_RTX);
3194                             quotient
3195                               = expand_shift (RSHIFT_EXPR, compute_mode, t4,
3196                                               build_int_2 (post_shift - 1, 0),
3197                                               tquotient, 1);
3198                           }
3199                         else
3200                           {
3201                             rtx t1, t2;
3202
3203                             if (pre_shift >= BITS_PER_WORD
3204                                 || post_shift >= BITS_PER_WORD)
3205                               goto fail1;
3206
3207                             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3208                                                build_int_2 (pre_shift, 0),
3209                                                NULL_RTX, 1);
3210                             extra_cost = (shift_cost[pre_shift]
3211                                           + shift_cost[post_shift]);
3212                             t2 = expand_mult_highpart (compute_mode, t1, ml,
3213                                                        NULL_RTX, 1,
3214                                                        max_cost - extra_cost);
3215                             if (t2 == 0)
3216                               goto fail1;
3217                             quotient
3218                               = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3219                                               build_int_2 (post_shift, 0),
3220                                               tquotient, 1);
3221                           }
3222                       }
3223                   }
3224                 else            /* Too wide mode to use tricky code */
3225                   break;
3226
3227                 insn = get_last_insn ();
3228                 if (insn != last
3229                     && (set = single_set (insn)) != 0
3230                     && SET_DEST (set) == quotient)
3231                   set_unique_reg_note (insn,
3232                                        REG_EQUAL,
3233                                        gen_rtx_UDIV (compute_mode, op0, op1));
3234               }
3235             else                /* TRUNC_DIV, signed */
3236               {
3237                 unsigned HOST_WIDE_INT ml;
3238                 int lgup, post_shift;
3239                 HOST_WIDE_INT d = INTVAL (op1);
3240                 unsigned HOST_WIDE_INT abs_d = d >= 0 ? d : -d;
3241
3242                 /* n rem d = n rem -d */
3243                 if (rem_flag && d < 0)
3244                   {
3245                     d = abs_d;
3246                     op1 = gen_int_mode (abs_d, compute_mode);
3247                   }
3248
3249                 if (d == 1)
3250                   quotient = op0;
3251                 else if (d == -1)
3252                   quotient = expand_unop (compute_mode, neg_optab, op0,
3253                                           tquotient, 0);
3254                 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
3255                   {
3256                     /* This case is not handled correctly below.  */
3257                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
3258                                                 compute_mode, 1, 1);
3259                     if (quotient == 0)
3260                       goto fail1;
3261                   }
3262                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
3263                          && (rem_flag ? smod_pow2_cheap : sdiv_pow2_cheap)
3264                          /* ??? The cheap metric is computed only for
3265                             word_mode.  If this operation is wider, this may
3266                             not be so.  Assume true if the optab has an
3267                             expander for this mode.  */
3268                          && (((rem_flag ? smod_optab : sdiv_optab)
3269                               ->handlers[(int) compute_mode].insn_code
3270                               != CODE_FOR_nothing)
3271                              || (sdivmod_optab->handlers[(int) compute_mode]
3272                                  .insn_code != CODE_FOR_nothing)))
3273                   ;
3274                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
3275                   {
3276                     lgup = floor_log2 (abs_d);
3277                     if (BRANCH_COST < 1 || (abs_d != 2 && BRANCH_COST < 3))
3278                       {
3279                         rtx label = gen_label_rtx ();
3280                         rtx t1;
3281
3282                         t1 = copy_to_mode_reg (compute_mode, op0);
3283                         do_cmp_and_jump (t1, const0_rtx, GE,
3284                                          compute_mode, label);
3285                         expand_inc (t1, gen_int_mode (abs_d - 1,
3286                                                       compute_mode));
3287                         emit_label (label);
3288                         quotient = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3289                                                  build_int_2 (lgup, 0),
3290                                                  tquotient, 0);
3291                       }
3292                     else
3293                       {
3294                         rtx t1, t2, t3;
3295                         t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3296                                            build_int_2 (size - 1, 0),
3297                                            NULL_RTX, 0);
3298                         t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3299                                            build_int_2 (size - lgup, 0),
3300                                            NULL_RTX, 1);
3301                         t3 = force_operand (gen_rtx_PLUS (compute_mode,
3302                                                           op0, t2),
3303                                             NULL_RTX);
3304                         quotient = expand_shift (RSHIFT_EXPR, compute_mode, t3,
3305                                                  build_int_2 (lgup, 0),
3306                                                  tquotient, 0);
3307                       }
3308
3309                     /* We have computed OP0 / abs(OP1).  If OP1 is negative, negate
3310                        the quotient.  */
3311                     if (d < 0)
3312                       {
3313                         insn = get_last_insn ();
3314                         if (insn != last
3315                             && (set = single_set (insn)) != 0
3316                             && SET_DEST (set) == quotient
3317                             && abs_d < ((unsigned HOST_WIDE_INT) 1
3318                                         << (HOST_BITS_PER_WIDE_INT - 1)))
3319                           set_unique_reg_note (insn,
3320                                                REG_EQUAL,
3321                                                gen_rtx_DIV (compute_mode,
3322                                                             op0,
3323                                                             GEN_INT
3324                                                             (trunc_int_for_mode
3325                                                              (abs_d,
3326                                                               compute_mode))));
3327
3328                         quotient = expand_unop (compute_mode, neg_optab,
3329                                                 quotient, quotient, 0);
3330                       }
3331                   }
3332                 else if (size <= HOST_BITS_PER_WIDE_INT)
3333                   {
3334                     choose_multiplier (abs_d, size, size - 1,
3335                                        &ml, &post_shift, &lgup);
3336                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
3337                       {
3338                         rtx t1, t2, t3;
3339
3340                         if (post_shift >= BITS_PER_WORD
3341                             || size - 1 >= BITS_PER_WORD)
3342                           goto fail1;
3343
3344                         extra_cost = (shift_cost[post_shift]
3345                                       + shift_cost[size - 1] + add_cost);
3346                         t1 = expand_mult_highpart (compute_mode, op0, ml,
3347                                                    NULL_RTX, 0,
3348                                                    max_cost - extra_cost);
3349                         if (t1 == 0)
3350                           goto fail1;
3351                         t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3352                                            build_int_2 (post_shift, 0), NULL_RTX, 0);
3353                         t3 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3354                                            build_int_2 (size - 1, 0), NULL_RTX, 0);
3355                         if (d < 0)
3356                           quotient
3357                             = force_operand (gen_rtx_MINUS (compute_mode,
3358                                                             t3, t2),
3359                                              tquotient);
3360                         else
3361                           quotient
3362                             = force_operand (gen_rtx_MINUS (compute_mode,
3363                                                             t2, t3),
3364                                              tquotient);
3365                       }
3366                     else
3367                       {
3368                         rtx t1, t2, t3, t4;
3369
3370                         if (post_shift >= BITS_PER_WORD
3371                             || size - 1 >= BITS_PER_WORD)
3372                           goto fail1;
3373
3374                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
3375                         extra_cost = (shift_cost[post_shift]
3376                                       + shift_cost[size - 1] + 2 * add_cost);
3377                         t1 = expand_mult_highpart (compute_mode, op0, ml,
3378                                                    NULL_RTX, 0,
3379                                                    max_cost - extra_cost);
3380                         if (t1 == 0)
3381                           goto fail1;
3382                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
3383                                                           t1, op0),
3384                                             NULL_RTX);
3385                         t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3386                                            build_int_2 (post_shift, 0),
3387                                            NULL_RTX, 0);
3388                         t4 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3389                                            build_int_2 (size - 1, 0),
3390                                            NULL_RTX, 0);
3391                         if (d < 0)
3392                           quotient
3393                             = force_operand (gen_rtx_MINUS (compute_mode,
3394                                                             t4, t3),
3395                                              tquotient);
3396                         else
3397                           quotient
3398                             = force_operand (gen_rtx_MINUS (compute_mode,
3399                                                             t3, t4),
3400                                              tquotient);
3401                       }
3402                   }
3403                 else            /* Too wide mode to use tricky code */
3404                   break;
3405
3406                 insn = get_last_insn ();
3407                 if (insn != last
3408                     && (set = single_set (insn)) != 0
3409                     && SET_DEST (set) == quotient)
3410                   set_unique_reg_note (insn,
3411                                        REG_EQUAL,
3412                                        gen_rtx_DIV (compute_mode, op0, op1));
3413               }
3414             break;
3415           }
3416       fail1:
3417         delete_insns_since (last);
3418         break;
3419
3420       case FLOOR_DIV_EXPR:
3421       case FLOOR_MOD_EXPR:
3422       /* We will come here only for signed operations.  */
3423         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
3424           {
3425             unsigned HOST_WIDE_INT mh, ml;
3426             int pre_shift, lgup, post_shift;
3427             HOST_WIDE_INT d = INTVAL (op1);
3428
3429             if (d > 0)
3430               {
3431                 /* We could just as easily deal with negative constants here,
3432                    but it does not seem worth the trouble for GCC 2.6.  */
3433                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3434                   {
3435                     pre_shift = floor_log2 (d);
3436                     if (rem_flag)
3437                       {
3438                         remainder = expand_binop (compute_mode, and_optab, op0,
3439                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3440                                                   remainder, 0, OPTAB_LIB_WIDEN);
3441                         if (remainder)
3442                           return gen_lowpart (mode, remainder);
3443                       }
3444                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3445                                              build_int_2 (pre_shift, 0),
3446                                              tquotient, 0);
3447                   }
3448                 else
3449                   {
3450                     rtx t1, t2, t3, t4;
3451
3452                     mh = choose_multiplier (d, size, size - 1,
3453                                             &ml, &post_shift, &lgup);
3454                     if (mh)
3455                       abort ();
3456
3457                     if (post_shift < BITS_PER_WORD
3458                         && size - 1 < BITS_PER_WORD)
3459                       {
3460                         t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3461                                            build_int_2 (size - 1, 0),
3462                                            NULL_RTX, 0);
3463                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
3464                                            NULL_RTX, 0, OPTAB_WIDEN);
3465                         extra_cost = (shift_cost[post_shift]
3466                                       + shift_cost[size - 1] + 2 * add_cost);
3467                         t3 = expand_mult_highpart (compute_mode, t2, ml,
3468                                                    NULL_RTX, 1,
3469                                                    max_cost - extra_cost);
3470                         if (t3 != 0)
3471                           {
3472                             t4 = expand_shift (RSHIFT_EXPR, compute_mode, t3,
3473                                                build_int_2 (post_shift, 0),
3474                                                NULL_RTX, 1);
3475                             quotient = expand_binop (compute_mode, xor_optab,
3476                                                      t4, t1, tquotient, 0,
3477                                                      OPTAB_WIDEN);
3478                           }
3479                       }
3480                   }
3481               }
3482             else
3483               {
3484                 rtx nsign, t1, t2, t3, t4;
3485                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
3486                                                   op0, constm1_rtx), NULL_RTX);
3487                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
3488                                    0, OPTAB_WIDEN);
3489                 nsign = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3490                                       build_int_2 (size - 1, 0), NULL_RTX, 0);
3491                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
3492                                     NULL_RTX);
3493                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
3494                                     NULL_RTX, 0);
3495                 if (t4)
3496                   {
3497                     rtx t5;
3498                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
3499                                       NULL_RTX, 0);
3500                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
3501                                                             t4, t5),
3502                                               tquotient);
3503                   }
3504               }
3505           }
3506
3507         if (quotient != 0)
3508           break;
3509         delete_insns_since (last);
3510
3511         /* Try using an instruction that produces both the quotient and
3512            remainder, using truncation.  We can easily compensate the quotient
3513            or remainder to get floor rounding, once we have the remainder.
3514            Notice that we compute also the final remainder value here,
3515            and return the result right away.  */
3516         if (target == 0 || GET_MODE (target) != compute_mode)
3517           target = gen_reg_rtx (compute_mode);
3518
3519         if (rem_flag)
3520           {
3521             remainder
3522               = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode);
3523             quotient = gen_reg_rtx (compute_mode);
3524           }
3525         else
3526           {
3527             quotient
3528               = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode);
3529             remainder = gen_reg_rtx (compute_mode);
3530           }
3531
3532         if (expand_twoval_binop (sdivmod_optab, op0, op1,
3533                                  quotient, remainder, 0))
3534           {
3535             /* This could be computed with a branch-less sequence.
3536                Save that for later.  */
3537             rtx tem;
3538             rtx label = gen_label_rtx ();
3539             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
3540             tem = expand_binop (compute_mode, xor_optab, op0, op1,
3541                                 NULL_RTX, 0, OPTAB_WIDEN);
3542             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
3543             expand_dec (quotient, const1_rtx);
3544             expand_inc (remainder, op1);
3545             emit_label (label);
3546             return gen_lowpart (mode, rem_flag ? remainder : quotient);
3547           }
3548
3549         /* No luck with division elimination or divmod.  Have to do it
3550            by conditionally adjusting op0 *and* the result.  */
3551         {
3552           rtx label1, label2, label3, label4, label5;
3553           rtx adjusted_op0;
3554           rtx tem;
3555
3556           quotient = gen_reg_rtx (compute_mode);
3557           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3558           label1 = gen_label_rtx ();
3559           label2 = gen_label_rtx ();
3560           label3 = gen_label_rtx ();
3561           label4 = gen_label_rtx ();
3562           label5 = gen_label_rtx ();
3563           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
3564           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
3565           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3566                               quotient, 0, OPTAB_LIB_WIDEN);
3567           if (tem != quotient)
3568             emit_move_insn (quotient, tem);
3569           emit_jump_insn (gen_jump (label5));
3570           emit_barrier ();
3571           emit_label (label1);
3572           expand_inc (adjusted_op0, const1_rtx);
3573           emit_jump_insn (gen_jump (label4));
3574           emit_barrier ();
3575           emit_label (label2);
3576           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
3577           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3578                               quotient, 0, OPTAB_LIB_WIDEN);
3579           if (tem != quotient)
3580             emit_move_insn (quotient, tem);
3581           emit_jump_insn (gen_jump (label5));
3582           emit_barrier ();
3583           emit_label (label3);
3584           expand_dec (adjusted_op0, const1_rtx);
3585           emit_label (label4);
3586           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3587                               quotient, 0, OPTAB_LIB_WIDEN);
3588           if (tem != quotient)
3589             emit_move_insn (quotient, tem);
3590           expand_dec (quotient, const1_rtx);
3591           emit_label (label5);
3592         }
3593         break;
3594
3595       case CEIL_DIV_EXPR:
3596       case CEIL_MOD_EXPR:
3597         if (unsignedp)
3598           {
3599             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
3600               {
3601                 rtx t1, t2, t3;
3602                 unsigned HOST_WIDE_INT d = INTVAL (op1);
3603                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3604                                    build_int_2 (floor_log2 (d), 0),
3605                                    tquotient, 1);
3606                 t2 = expand_binop (compute_mode, and_optab, op0,
3607                                    GEN_INT (d - 1),
3608                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3609                 t3 = gen_reg_rtx (compute_mode);
3610                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
3611                                       compute_mode, 1, 1);
3612                 if (t3 == 0)
3613                   {
3614                     rtx lab;
3615                     lab = gen_label_rtx ();
3616                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
3617                     expand_inc (t1, const1_rtx);
3618                     emit_label (lab);
3619                     quotient = t1;
3620                   }
3621                 else
3622                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
3623                                                           t1, t3),
3624                                             tquotient);
3625                 break;
3626               }
3627
3628             /* Try using an instruction that produces both the quotient and
3629                remainder, using truncation.  We can easily compensate the
3630                quotient or remainder to get ceiling rounding, once we have the
3631                remainder.  Notice that we compute also the final remainder
3632                value here, and return the result right away.  */
3633             if (target == 0 || GET_MODE (target) != compute_mode)
3634               target = gen_reg_rtx (compute_mode);
3635
3636             if (rem_flag)
3637               {
3638                 remainder = (GET_CODE (target) == REG
3639                              ? target : gen_reg_rtx (compute_mode));
3640                 quotient = gen_reg_rtx (compute_mode);
3641               }
3642             else
3643               {
3644                 quotient = (GET_CODE (target) == REG
3645                             ? target : gen_reg_rtx (compute_mode));
3646                 remainder = gen_reg_rtx (compute_mode);
3647               }
3648
3649             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
3650                                      remainder, 1))
3651               {
3652                 /* This could be computed with a branch-less sequence.
3653                    Save that for later.  */
3654                 rtx label = gen_label_rtx ();
3655                 do_cmp_and_jump (remainder, const0_rtx, EQ,
3656                                  compute_mode, label);
3657                 expand_inc (quotient, const1_rtx);
3658                 expand_dec (remainder, op1);
3659                 emit_label (label);
3660                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
3661               }
3662
3663             /* No luck with division elimination or divmod.  Have to do it
3664                by conditionally adjusting op0 *and* the result.  */
3665             {
3666               rtx label1, label2;
3667               rtx adjusted_op0, tem;
3668
3669               quotient = gen_reg_rtx (compute_mode);
3670               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3671               label1 = gen_label_rtx ();
3672               label2 = gen_label_rtx ();
3673               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
3674                                compute_mode, label1);
3675               emit_move_insn  (quotient, const0_rtx);
3676               emit_jump_insn (gen_jump (label2));
3677               emit_barrier ();
3678               emit_label (label1);
3679               expand_dec (adjusted_op0, const1_rtx);
3680               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
3681                                   quotient, 1, OPTAB_LIB_WIDEN);
3682               if (tem != quotient)
3683                 emit_move_insn (quotient, tem);
3684               expand_inc (quotient, const1_rtx);
3685               emit_label (label2);
3686             }
3687           }
3688         else /* signed */
3689           {
3690             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3691                 && INTVAL (op1) >= 0)
3692               {
3693                 /* This is extremely similar to the code for the unsigned case
3694                    above.  For 2.7 we should merge these variants, but for
3695                    2.6.1 I don't want to touch the code for unsigned since that
3696                    get used in C.  The signed case will only be used by other
3697                    languages (Ada).  */
3698
3699                 rtx t1, t2, t3;
3700                 unsigned HOST_WIDE_INT d = INTVAL (op1);
3701                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3702                                    build_int_2 (floor_log2 (d), 0),
3703                                    tquotient, 0);
3704                 t2 = expand_binop (compute_mode, and_optab, op0,
3705                                    GEN_INT (d - 1),
3706                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3707                 t3 = gen_reg_rtx (compute_mode);
3708                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
3709                                       compute_mode, 1, 1);
3710                 if (t3 == 0)
3711                   {
3712                     rtx lab;
3713                     lab = gen_label_rtx ();
3714                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
3715                     expand_inc (t1, const1_rtx);
3716                     emit_label (lab);
3717                     quotient = t1;
3718                   }
3719                 else
3720                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
3721                                                           t1, t3),
3722                                             tquotient);
3723                 break;
3724               }
3725
3726             /* Try using an instruction that produces both the quotient and
3727                remainder, using truncation.  We can easily compensate the
3728                quotient or remainder to get ceiling rounding, once we have the
3729                remainder.  Notice that we compute also the final remainder
3730                value here, and return the result right away.  */
3731             if (target == 0 || GET_MODE (target) != compute_mode)
3732               target = gen_reg_rtx (compute_mode);
3733             if (rem_flag)
3734               {
3735                 remainder= (GET_CODE (target) == REG
3736                             ? target : gen_reg_rtx (compute_mode));
3737                 quotient = gen_reg_rtx (compute_mode);
3738               }
3739             else
3740               {
3741                 quotient = (GET_CODE (target) == REG
3742                             ? target : gen_reg_rtx (compute_mode));
3743                 remainder = gen_reg_rtx (compute_mode);
3744               }
3745
3746             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
3747                                      remainder, 0))
3748               {
3749                 /* This could be computed with a branch-less sequence.
3750                    Save that for later.  */
3751                 rtx tem;
3752                 rtx label = gen_label_rtx ();
3753                 do_cmp_and_jump (remainder, const0_rtx, EQ,
3754                                  compute_mode, label);
3755                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
3756                                     NULL_RTX, 0, OPTAB_WIDEN);
3757                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
3758                 expand_inc (quotient, const1_rtx);
3759                 expand_dec (remainder, op1);
3760                 emit_label (label);
3761                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
3762               }
3763
3764             /* No luck with division elimination or divmod.  Have to do it
3765                by conditionally adjusting op0 *and* the result.  */
3766             {
3767               rtx label1, label2, label3, label4, label5;
3768               rtx adjusted_op0;
3769               rtx tem;
3770
3771               quotient = gen_reg_rtx (compute_mode);
3772               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3773               label1 = gen_label_rtx ();
3774               label2 = gen_label_rtx ();
3775               label3 = gen_label_rtx ();
3776               label4 = gen_label_rtx ();
3777               label5 = gen_label_rtx ();
3778               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
3779               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
3780                                compute_mode, label1);
3781               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3782                                   quotient, 0, OPTAB_LIB_WIDEN);
3783               if (tem != quotient)
3784                 emit_move_insn (quotient, tem);
3785               emit_jump_insn (gen_jump (label5));
3786               emit_barrier ();
3787               emit_label (label1);
3788               expand_dec (adjusted_op0, const1_rtx);
3789               emit_jump_insn (gen_jump (label4));
3790               emit_barrier ();
3791               emit_label (label2);
3792               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
3793                                compute_mode, label3);
3794               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3795                                   quotient, 0, OPTAB_LIB_WIDEN);
3796               if (tem != quotient)
3797                 emit_move_insn (quotient, tem);
3798               emit_jump_insn (gen_jump (label5));
3799               emit_barrier ();
3800               emit_label (label3);
3801               expand_inc (adjusted_op0, const1_rtx);
3802               emit_label (label4);
3803               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3804                                   quotient, 0, OPTAB_LIB_WIDEN);
3805               if (tem != quotient)
3806                 emit_move_insn (quotient, tem);
3807               expand_inc (quotient, const1_rtx);
3808               emit_label (label5);
3809             }
3810           }
3811         break;
3812
3813       case EXACT_DIV_EXPR:
3814         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
3815           {
3816             HOST_WIDE_INT d = INTVAL (op1);
3817             unsigned HOST_WIDE_INT ml;
3818             int pre_shift;
3819             rtx t1;
3820
3821             pre_shift = floor_log2 (d & -d);
3822             ml = invert_mod2n (d >> pre_shift, size);
3823             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3824                                build_int_2 (pre_shift, 0), NULL_RTX, unsignedp);
3825             quotient = expand_mult (compute_mode, t1,
3826                                     gen_int_mode (ml, compute_mode),
3827                                     NULL_RTX, 0);
3828
3829             insn = get_last_insn ();
3830             set_unique_reg_note (insn,
3831                                  REG_EQUAL,
3832                                  gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
3833                                                  compute_mode,
3834                                                  op0, op1));
3835           }
3836         break;
3837
3838       case ROUND_DIV_EXPR:
3839       case ROUND_MOD_EXPR:
3840         if (unsignedp)
3841           {
3842             rtx tem;
3843             rtx label;
3844             label = gen_label_rtx ();
3845             quotient = gen_reg_rtx (compute_mode);
3846             remainder = gen_reg_rtx (compute_mode);
3847             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
3848               {
3849                 rtx tem;
3850                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
3851                                          quotient, 1, OPTAB_LIB_WIDEN);
3852                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
3853                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
3854                                           remainder, 1, OPTAB_LIB_WIDEN);
3855               }
3856             tem = plus_constant (op1, -1);
3857             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
3858                                 build_int_2 (1, 0), NULL_RTX, 1);
3859             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
3860             expand_inc (quotient, const1_rtx);
3861             expand_dec (remainder, op1);
3862             emit_label (label);
3863           }
3864         else
3865           {
3866             rtx abs_rem, abs_op1, tem, mask;
3867             rtx label;
3868             label = gen_label_rtx ();
3869             quotient = gen_reg_rtx (compute_mode);
3870             remainder = gen_reg_rtx (compute_mode);
3871             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
3872               {
3873                 rtx tem;
3874                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
3875                                          quotient, 0, OPTAB_LIB_WIDEN);
3876                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
3877                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
3878                                           remainder, 0, OPTAB_LIB_WIDEN);
3879               }
3880             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
3881             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
3882             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
3883                                 build_int_2 (1, 0), NULL_RTX, 1);
3884             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
3885             tem = expand_binop (compute_mode, xor_optab, op0, op1,
3886                                 NULL_RTX, 0, OPTAB_WIDEN);
3887             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
3888                                 build_int_2 (size - 1, 0), NULL_RTX, 0);
3889             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
3890                                 NULL_RTX, 0, OPTAB_WIDEN);
3891             tem = expand_binop (compute_mode, sub_optab, tem, mask,
3892                                 NULL_RTX, 0, OPTAB_WIDEN);
3893             expand_inc (quotient, tem);
3894             tem = expand_binop (compute_mode, xor_optab, mask, op1,
3895                                 NULL_RTX, 0, OPTAB_WIDEN);
3896             tem = expand_binop (compute_mode, sub_optab, tem, mask,
3897                                 NULL_RTX, 0, OPTAB_WIDEN);
3898             expand_dec (remainder, tem);
3899             emit_label (label);
3900           }
3901         return gen_lowpart (mode, rem_flag ? remainder : quotient);
3902
3903       default:
3904         abort ();
3905       }
3906
3907   if (quotient == 0)
3908     {
3909       if (target && GET_MODE (target) != compute_mode)
3910         target = 0;
3911
3912       if (rem_flag)
3913         {
3914           /* Try to produce the remainder without producing the quotient.
3915              If we seem to have a divmod pattern that does not require widening,
3916              don't try widening here.  We should really have a WIDEN argument
3917              to expand_twoval_binop, since what we'd really like to do here is
3918              1) try a mod insn in compute_mode
3919              2) try a divmod insn in compute_mode
3920              3) try a div insn in compute_mode and multiply-subtract to get
3921                 remainder
3922              4) try the same things with widening allowed.  */
3923           remainder
3924             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
3925                                  op0, op1, target,
3926                                  unsignedp,
3927                                  ((optab2->handlers[(int) compute_mode].insn_code
3928                                    != CODE_FOR_nothing)
3929                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
3930           if (remainder == 0)
3931             {
3932               /* No luck there.  Can we do remainder and divide at once
3933                  without a library call?  */
3934               remainder = gen_reg_rtx (compute_mode);
3935               if (! expand_twoval_binop ((unsignedp
3936                                           ? udivmod_optab
3937                                           : sdivmod_optab),
3938                                          op0, op1,
3939                                          NULL_RTX, remainder, unsignedp))
3940                 remainder = 0;
3941             }
3942
3943           if (remainder)
3944             return gen_lowpart (mode, remainder);
3945         }
3946
3947       /* Produce the quotient.  Try a quotient insn, but not a library call.
3948          If we have a divmod in this mode, use it in preference to widening
3949          the div (for this test we assume it will not fail). Note that optab2
3950          is set to the one of the two optabs that the call below will use.  */
3951       quotient
3952         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
3953                              op0, op1, rem_flag ? NULL_RTX : target,
3954                              unsignedp,
3955                              ((optab2->handlers[(int) compute_mode].insn_code
3956                                != CODE_FOR_nothing)
3957                               ? OPTAB_DIRECT : OPTAB_WIDEN));
3958
3959       if (quotient == 0)
3960         {
3961           /* No luck there.  Try a quotient-and-remainder insn,
3962              keeping the quotient alone.  */
3963           quotient = gen_reg_rtx (compute_mode);
3964           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
3965                                      op0, op1,
3966                                      quotient, NULL_RTX, unsignedp))
3967             {
3968               quotient = 0;
3969               if (! rem_flag)
3970                 /* Still no luck.  If we are not computing the remainder,
3971                    use a library call for the quotient.  */
3972                 quotient = sign_expand_binop (compute_mode,
3973                                               udiv_optab, sdiv_optab,
3974                                               op0, op1, target,
3975                                               unsignedp, OPTAB_LIB_WIDEN);
3976             }
3977         }
3978     }
3979
3980   if (rem_flag)
3981     {
3982       if (target && GET_MODE (target) != compute_mode)
3983         target = 0;
3984
3985       if (quotient == 0)
3986         /* No divide instruction either.  Use library for remainder.  */
3987         remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
3988                                        op0, op1, target,
3989                                        unsignedp, OPTAB_LIB_WIDEN);
3990       else
3991         {
3992           /* We divided.  Now finish doing X - Y * (X / Y).  */
3993           remainder = expand_mult (compute_mode, quotient, op1,
3994                                    NULL_RTX, unsignedp);
3995           remainder = expand_binop (compute_mode, sub_optab, op0,
3996                                     remainder, target, unsignedp,
3997                                     OPTAB_LIB_WIDEN);
3998         }
3999     }
4000
4001   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4002 }
4003 \f
4004 /* Return a tree node with data type TYPE, describing the value of X.
4005    Usually this is an RTL_EXPR, if there is no obvious better choice.
4006    X may be an expression, however we only support those expressions
4007    generated by loop.c.  */
4008
4009 tree
4010 make_tree (tree type, rtx x)
4011 {
4012   tree t;
4013
4014   switch (GET_CODE (x))
4015     {
4016     case CONST_INT:
4017       t = build_int_2 (INTVAL (x),
4018                        (TREE_UNSIGNED (type)
4019                         && (GET_MODE_BITSIZE (TYPE_MODE (type)) < HOST_BITS_PER_WIDE_INT))
4020                        || INTVAL (x) >= 0 ? 0 : -1);
4021       TREE_TYPE (t) = type;
4022       return t;
4023
4024     case CONST_DOUBLE:
4025       if (GET_MODE (x) == VOIDmode)
4026         {
4027           t = build_int_2 (CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4028           TREE_TYPE (t) = type;
4029         }
4030       else
4031         {
4032           REAL_VALUE_TYPE d;
4033
4034           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4035           t = build_real (type, d);
4036         }
4037
4038       return t;
4039
4040     case CONST_VECTOR:
4041       {
4042         int i, units;
4043         rtx elt;
4044         tree t = NULL_TREE;
4045
4046         units = CONST_VECTOR_NUNITS (x);
4047
4048         /* Build a tree with vector elements.  */
4049         for (i = units - 1; i >= 0; --i)
4050           {
4051             elt = CONST_VECTOR_ELT (x, i);
4052             t = tree_cons (NULL_TREE, make_tree (type, elt), t);
4053           }
4054
4055         return build_vector (type, t);
4056       }
4057
4058     case PLUS:
4059       return fold (build (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4060                           make_tree (type, XEXP (x, 1))));
4061
4062     case MINUS:
4063       return fold (build (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4064                           make_tree (type, XEXP (x, 1))));
4065
4066     case NEG:
4067       return fold (build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0))));
4068
4069     case MULT:
4070       return fold (build (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4071                           make_tree (type, XEXP (x, 1))));
4072
4073     case ASHIFT:
4074       return fold (build (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4075                           make_tree (type, XEXP (x, 1))));
4076
4077     case LSHIFTRT:
4078       t = (*lang_hooks.types.unsigned_type) (type);
4079       return fold (convert (type,
4080                             build (RSHIFT_EXPR, t,
4081                                    make_tree (t, XEXP (x, 0)),
4082                                    make_tree (type, XEXP (x, 1)))));
4083
4084     case ASHIFTRT:
4085       t = (*lang_hooks.types.signed_type) (type);
4086       return fold (convert (type,
4087                             build (RSHIFT_EXPR, t,
4088                                    make_tree (t, XEXP (x, 0)),
4089                                    make_tree (type, XEXP (x, 1)))));
4090
4091     case DIV:
4092       if (TREE_CODE (type) != REAL_TYPE)
4093         t = (*lang_hooks.types.signed_type) (type);
4094       else
4095         t = type;
4096
4097       return fold (convert (type,
4098                             build (TRUNC_DIV_EXPR, t,
4099                                    make_tree (t, XEXP (x, 0)),
4100                                    make_tree (t, XEXP (x, 1)))));
4101     case UDIV:
4102       t = (*lang_hooks.types.unsigned_type) (type);
4103       return fold (convert (type,
4104                             build (TRUNC_DIV_EXPR, t,
4105                                    make_tree (t, XEXP (x, 0)),
4106                                    make_tree (t, XEXP (x, 1)))));
4107
4108     case SIGN_EXTEND:
4109     case ZERO_EXTEND:
4110       t = (*lang_hooks.types.type_for_mode) (GET_MODE (XEXP (x, 0)),
4111                                              GET_CODE (x) == ZERO_EXTEND);
4112       return fold (convert (type, make_tree (t, XEXP (x, 0))));
4113
4114    default:
4115       t = make_node (RTL_EXPR);
4116       TREE_TYPE (t) = type;
4117
4118 #ifdef POINTERS_EXTEND_UNSIGNED
4119       /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
4120          ptr_mode.  So convert.  */
4121       if (POINTER_TYPE_P (type) && GET_MODE (x) != TYPE_MODE (type))
4122         x = convert_memory_address (TYPE_MODE (type), x);
4123 #endif
4124
4125       RTL_EXPR_RTL (t) = x;
4126       /* There are no insns to be output
4127          when this rtl_expr is used.  */
4128       RTL_EXPR_SEQUENCE (t) = 0;
4129       return t;
4130     }
4131 }
4132
4133 /* Check whether the multiplication X * MULT + ADD overflows.
4134    X, MULT and ADD must be CONST_*.
4135    MODE is the machine mode for the computation.
4136    X and MULT must have mode MODE.  ADD may have a different mode.
4137    So can X (defaults to same as MODE).
4138    UNSIGNEDP is nonzero to do unsigned multiplication.  */
4139
4140 bool
4141 const_mult_add_overflow_p (rtx x, rtx mult, rtx add, enum machine_mode mode, int unsignedp)
4142 {
4143   tree type, mult_type, add_type, result;
4144
4145   type = (*lang_hooks.types.type_for_mode) (mode, unsignedp);
4146
4147   /* In order to get a proper overflow indication from an unsigned
4148      type, we have to pretend that it's a sizetype.  */
4149   mult_type = type;
4150   if (unsignedp)
4151     {
4152       mult_type = copy_node (type);
4153       TYPE_IS_SIZETYPE (mult_type) = 1;
4154     }
4155
4156   add_type = (GET_MODE (add) == VOIDmode ? mult_type
4157               : (*lang_hooks.types.type_for_mode) (GET_MODE (add), unsignedp));
4158
4159   result = fold (build (PLUS_EXPR, mult_type,
4160                         fold (build (MULT_EXPR, mult_type,
4161                                      make_tree (mult_type, x),
4162                                      make_tree (mult_type, mult))),
4163                         make_tree (add_type, add)));
4164
4165   return TREE_CONSTANT_OVERFLOW (result);
4166 }
4167
4168 /* Return an rtx representing the value of X * MULT + ADD.
4169    TARGET is a suggestion for where to store the result (an rtx).
4170    MODE is the machine mode for the computation.
4171    X and MULT must have mode MODE.  ADD may have a different mode.
4172    So can X (defaults to same as MODE).
4173    UNSIGNEDP is nonzero to do unsigned multiplication.
4174    This may emit insns.  */
4175
4176 rtx
4177 expand_mult_add (rtx x, rtx target, rtx mult, rtx add, enum machine_mode mode,
4178                  int unsignedp)
4179 {
4180   tree type = (*lang_hooks.types.type_for_mode) (mode, unsignedp);
4181   tree add_type = (GET_MODE (add) == VOIDmode
4182                    ? type: (*lang_hooks.types.type_for_mode) (GET_MODE (add),
4183                                                               unsignedp));
4184   tree result =  fold (build (PLUS_EXPR, type,
4185                               fold (build (MULT_EXPR, type,
4186                                            make_tree (type, x),
4187                                            make_tree (type, mult))),
4188                               make_tree (add_type, add)));
4189
4190   return expand_expr (result, target, VOIDmode, 0);
4191 }
4192 \f
4193 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
4194    and returning TARGET.
4195
4196    If TARGET is 0, a pseudo-register or constant is returned.  */
4197
4198 rtx
4199 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
4200 {
4201   rtx tem = 0;
4202
4203   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
4204     tem = simplify_binary_operation (AND, mode, op0, op1);
4205   if (tem == 0)
4206     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
4207
4208   if (target == 0)
4209     target = tem;
4210   else if (tem != target)
4211     emit_move_insn (target, tem);
4212   return target;
4213 }
4214 \f
4215 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
4216    and storing in TARGET.  Normally return TARGET.
4217    Return 0 if that cannot be done.
4218
4219    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
4220    it is VOIDmode, they cannot both be CONST_INT.
4221
4222    UNSIGNEDP is for the case where we have to widen the operands
4223    to perform the operation.  It says to use zero-extension.
4224
4225    NORMALIZEP is 1 if we should convert the result to be either zero
4226    or one.  Normalize is -1 if we should convert the result to be
4227    either zero or -1.  If NORMALIZEP is zero, the result will be left
4228    "raw" out of the scc insn.  */
4229
4230 rtx
4231 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
4232                  enum machine_mode mode, int unsignedp, int normalizep)
4233 {
4234   rtx subtarget;
4235   enum insn_code icode;
4236   enum machine_mode compare_mode;
4237   enum machine_mode target_mode = GET_MODE (target);
4238   rtx tem;
4239   rtx last = get_last_insn ();
4240   rtx pattern, comparison;
4241
4242   /* ??? Ok to do this and then fail? */
4243   op0 = protect_from_queue (op0, 0);
4244   op1 = protect_from_queue (op1, 0);
4245
4246   if (unsignedp)
4247     code = unsigned_condition (code);
4248
4249   /* If one operand is constant, make it the second one.  Only do this
4250      if the other operand is not constant as well.  */
4251
4252   if (swap_commutative_operands_p (op0, op1))
4253     {
4254       tem = op0;
4255       op0 = op1;
4256       op1 = tem;
4257       code = swap_condition (code);
4258     }
4259
4260   if (mode == VOIDmode)
4261     mode = GET_MODE (op0);
4262
4263   /* For some comparisons with 1 and -1, we can convert this to
4264      comparisons with zero.  This will often produce more opportunities for
4265      store-flag insns.  */
4266
4267   switch (code)
4268     {
4269     case LT:
4270       if (op1 == const1_rtx)
4271         op1 = const0_rtx, code = LE;
4272       break;
4273     case LE:
4274       if (op1 == constm1_rtx)
4275         op1 = const0_rtx, code = LT;
4276       break;
4277     case GE:
4278       if (op1 == const1_rtx)
4279         op1 = const0_rtx, code = GT;
4280       break;
4281     case GT:
4282       if (op1 == constm1_rtx)
4283         op1 = const0_rtx, code = GE;
4284       break;
4285     case GEU:
4286       if (op1 == const1_rtx)
4287         op1 = const0_rtx, code = NE;
4288       break;
4289     case LTU:
4290       if (op1 == const1_rtx)
4291         op1 = const0_rtx, code = EQ;
4292       break;
4293     default:
4294       break;
4295     }
4296
4297   /* If we are comparing a double-word integer with zero, we can convert
4298      the comparison into one involving a single word.  */
4299   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
4300       && GET_MODE_CLASS (mode) == MODE_INT
4301       && op1 == const0_rtx
4302       && (GET_CODE (op0) != MEM || ! MEM_VOLATILE_P (op0)))
4303     {
4304       if (code == EQ || code == NE)
4305         {
4306           rtx op00, op01, op0both;
4307
4308           /* Do a logical OR of the two words and compare the result.  */
4309           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
4310           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
4311           op0both = expand_binop (word_mode, ior_optab, op00, op01,
4312                                   NULL_RTX, unsignedp, OPTAB_DIRECT);
4313           if (op0both != 0)
4314             return emit_store_flag (target, code, op0both, op1, word_mode,
4315                                     unsignedp, normalizep);
4316         }
4317       else if (code == LT || code == GE)
4318         {
4319           rtx op0h;
4320
4321           /* If testing the sign bit, can just test on high word.  */
4322           op0h = simplify_gen_subreg (word_mode, op0, mode,
4323                                       subreg_highpart_offset (word_mode, mode));
4324           return emit_store_flag (target, code, op0h, op1, word_mode,
4325                                   unsignedp, normalizep);
4326         }
4327     }
4328
4329   /* From now on, we won't change CODE, so set ICODE now.  */
4330   icode = setcc_gen_code[(int) code];
4331
4332   /* If this is A < 0 or A >= 0, we can do this by taking the ones
4333      complement of A (for GE) and shifting the sign bit to the low bit.  */
4334   if (op1 == const0_rtx && (code == LT || code == GE)
4335       && GET_MODE_CLASS (mode) == MODE_INT
4336       && (normalizep || STORE_FLAG_VALUE == 1
4337           || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4338               && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
4339                   == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
4340     {
4341       subtarget = target;
4342
4343       /* If the result is to be wider than OP0, it is best to convert it
4344          first.  If it is to be narrower, it is *incorrect* to convert it
4345          first.  */
4346       if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
4347         {
4348           op0 = protect_from_queue (op0, 0);
4349           op0 = convert_modes (target_mode, mode, op0, 0);
4350           mode = target_mode;
4351         }
4352
4353       if (target_mode != mode)
4354         subtarget = 0;
4355
4356       if (code == GE)
4357         op0 = expand_unop (mode, one_cmpl_optab, op0,
4358                            ((STORE_FLAG_VALUE == 1 || normalizep)
4359                             ? 0 : subtarget), 0);
4360
4361       if (STORE_FLAG_VALUE == 1 || normalizep)
4362         /* If we are supposed to produce a 0/1 value, we want to do
4363            a logical shift from the sign bit to the low-order bit; for
4364            a -1/0 value, we do an arithmetic shift.  */
4365         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
4366                             size_int (GET_MODE_BITSIZE (mode) - 1),
4367                             subtarget, normalizep != -1);
4368
4369       if (mode != target_mode)
4370         op0 = convert_modes (target_mode, mode, op0, 0);
4371
4372       return op0;
4373     }
4374
4375   if (icode != CODE_FOR_nothing)
4376     {
4377       insn_operand_predicate_fn pred;
4378
4379       /* We think we may be able to do this with a scc insn.  Emit the
4380          comparison and then the scc insn.
4381
4382          compare_from_rtx may call emit_queue, which would be deleted below
4383          if the scc insn fails.  So call it ourselves before setting LAST.
4384          Likewise for do_pending_stack_adjust.  */
4385
4386       emit_queue ();
4387       do_pending_stack_adjust ();
4388       last = get_last_insn ();
4389
4390       comparison
4391         = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX);
4392       if (GET_CODE (comparison) == CONST_INT)
4393         return (comparison == const0_rtx ? const0_rtx
4394                 : normalizep == 1 ? const1_rtx
4395                 : normalizep == -1 ? constm1_rtx
4396                 : const_true_rtx);
4397
4398       /* The code of COMPARISON may not match CODE if compare_from_rtx
4399          decided to swap its operands and reverse the original code.
4400
4401          We know that compare_from_rtx returns either a CONST_INT or
4402          a new comparison code, so it is safe to just extract the
4403          code from COMPARISON.  */
4404       code = GET_CODE (comparison);
4405
4406       /* Get a reference to the target in the proper mode for this insn.  */
4407       compare_mode = insn_data[(int) icode].operand[0].mode;
4408       subtarget = target;
4409       pred = insn_data[(int) icode].operand[0].predicate;
4410       if (preserve_subexpressions_p ()
4411           || ! (*pred) (subtarget, compare_mode))
4412         subtarget = gen_reg_rtx (compare_mode);
4413
4414       pattern = GEN_FCN (icode) (subtarget);
4415       if (pattern)
4416         {
4417           emit_insn (pattern);
4418
4419           /* If we are converting to a wider mode, first convert to
4420              TARGET_MODE, then normalize.  This produces better combining
4421              opportunities on machines that have a SIGN_EXTRACT when we are
4422              testing a single bit.  This mostly benefits the 68k.
4423
4424              If STORE_FLAG_VALUE does not have the sign bit set when
4425              interpreted in COMPARE_MODE, we can do this conversion as
4426              unsigned, which is usually more efficient.  */
4427           if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (compare_mode))
4428             {
4429               convert_move (target, subtarget,
4430                             (GET_MODE_BITSIZE (compare_mode)
4431                              <= HOST_BITS_PER_WIDE_INT)
4432                             && 0 == (STORE_FLAG_VALUE
4433                                      & ((HOST_WIDE_INT) 1
4434                                         << (GET_MODE_BITSIZE (compare_mode) -1))));
4435               op0 = target;
4436               compare_mode = target_mode;
4437             }
4438           else
4439             op0 = subtarget;
4440
4441           /* If we want to keep subexpressions around, don't reuse our
4442              last target.  */
4443
4444           if (preserve_subexpressions_p ())
4445             subtarget = 0;
4446
4447           /* Now normalize to the proper value in COMPARE_MODE.  Sometimes
4448              we don't have to do anything.  */
4449           if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
4450             ;
4451           /* STORE_FLAG_VALUE might be the most negative number, so write
4452              the comparison this way to avoid a compiler-time warning.  */
4453           else if (- normalizep == STORE_FLAG_VALUE)
4454             op0 = expand_unop (compare_mode, neg_optab, op0, subtarget, 0);
4455
4456           /* We don't want to use STORE_FLAG_VALUE < 0 below since this
4457              makes it hard to use a value of just the sign bit due to
4458              ANSI integer constant typing rules.  */
4459           else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_WIDE_INT
4460                    && (STORE_FLAG_VALUE
4461                        & ((HOST_WIDE_INT) 1
4462                           << (GET_MODE_BITSIZE (compare_mode) - 1))))
4463             op0 = expand_shift (RSHIFT_EXPR, compare_mode, op0,
4464                                 size_int (GET_MODE_BITSIZE (compare_mode) - 1),
4465                                 subtarget, normalizep == 1);
4466           else if (STORE_FLAG_VALUE & 1)
4467             {
4468               op0 = expand_and (compare_mode, op0, const1_rtx, subtarget);
4469               if (normalizep == -1)
4470                 op0 = expand_unop (compare_mode, neg_optab, op0, op0, 0);
4471             }
4472           else
4473             abort ();
4474
4475           /* If we were converting to a smaller mode, do the
4476              conversion now.  */
4477           if (target_mode != compare_mode)
4478             {
4479               convert_move (target, op0, 0);
4480               return target;
4481             }
4482           else
4483             return op0;
4484         }
4485     }
4486
4487   delete_insns_since (last);
4488
4489   /* If expensive optimizations, use different pseudo registers for each
4490      insn, instead of reusing the same pseudo.  This leads to better CSE,
4491      but slows down the compiler, since there are more pseudos */
4492   subtarget = (!flag_expensive_optimizations
4493                && (target_mode == mode)) ? target : NULL_RTX;
4494
4495   /* If we reached here, we can't do this with a scc insn.  However, there
4496      are some comparisons that can be done directly.  For example, if
4497      this is an equality comparison of integers, we can try to exclusive-or
4498      (or subtract) the two operands and use a recursive call to try the
4499      comparison with zero.  Don't do any of these cases if branches are
4500      very cheap.  */
4501
4502   if (BRANCH_COST > 0
4503       && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
4504       && op1 != const0_rtx)
4505     {
4506       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
4507                           OPTAB_WIDEN);
4508
4509       if (tem == 0)
4510         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
4511                             OPTAB_WIDEN);
4512       if (tem != 0)
4513         tem = emit_store_flag (target, code, tem, const0_rtx,
4514                                mode, unsignedp, normalizep);
4515       if (tem == 0)
4516         delete_insns_since (last);
4517       return tem;
4518     }
4519
4520   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
4521      the constant zero.  Reject all other comparisons at this point.  Only
4522      do LE and GT if branches are expensive since they are expensive on
4523      2-operand machines.  */
4524
4525   if (BRANCH_COST == 0
4526       || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
4527       || (code != EQ && code != NE
4528           && (BRANCH_COST <= 1 || (code != LE && code != GT))))
4529     return 0;
4530
4531   /* See what we need to return.  We can only return a 1, -1, or the
4532      sign bit.  */
4533
4534   if (normalizep == 0)
4535     {
4536       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
4537         normalizep = STORE_FLAG_VALUE;
4538
4539       else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4540                && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
4541                    == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
4542         ;
4543       else
4544         return 0;
4545     }
4546
4547   /* Try to put the result of the comparison in the sign bit.  Assume we can't
4548      do the necessary operation below.  */
4549
4550   tem = 0;
4551
4552   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
4553      the sign bit set.  */
4554
4555   if (code == LE)
4556     {
4557       /* This is destructive, so SUBTARGET can't be OP0.  */
4558       if (rtx_equal_p (subtarget, op0))
4559         subtarget = 0;
4560
4561       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
4562                           OPTAB_WIDEN);
4563       if (tem)
4564         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
4565                             OPTAB_WIDEN);
4566     }
4567
4568   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
4569      number of bits in the mode of OP0, minus one.  */
4570
4571   if (code == GT)
4572     {
4573       if (rtx_equal_p (subtarget, op0))
4574         subtarget = 0;
4575
4576       tem = expand_shift (RSHIFT_EXPR, mode, op0,
4577                           size_int (GET_MODE_BITSIZE (mode) - 1),
4578                           subtarget, 0);
4579       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
4580                           OPTAB_WIDEN);
4581     }
4582
4583   if (code == EQ || code == NE)
4584     {
4585       /* For EQ or NE, one way to do the comparison is to apply an operation
4586          that converts the operand into a positive number if it is nonzero
4587          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
4588          for NE we negate.  This puts the result in the sign bit.  Then we
4589          normalize with a shift, if needed.
4590
4591          Two operations that can do the above actions are ABS and FFS, so try
4592          them.  If that doesn't work, and MODE is smaller than a full word,
4593          we can use zero-extension to the wider mode (an unsigned conversion)
4594          as the operation.  */
4595
4596       /* Note that ABS doesn't yield a positive number for INT_MIN, but
4597          that is compensated by the subsequent overflow when subtracting
4598          one / negating.  */
4599
4600       if (abs_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
4601         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
4602       else if (ffs_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
4603         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
4604       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
4605         {
4606           op0 = protect_from_queue (op0, 0);
4607           tem = convert_modes (word_mode, mode, op0, 1);
4608           mode = word_mode;
4609         }
4610
4611       if (tem != 0)
4612         {
4613           if (code == EQ)
4614             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
4615                                 0, OPTAB_WIDEN);
4616           else
4617             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
4618         }
4619
4620       /* If we couldn't do it that way, for NE we can "or" the two's complement
4621          of the value with itself.  For EQ, we take the one's complement of
4622          that "or", which is an extra insn, so we only handle EQ if branches
4623          are expensive.  */
4624
4625       if (tem == 0 && (code == NE || BRANCH_COST > 1))
4626         {
4627           if (rtx_equal_p (subtarget, op0))
4628             subtarget = 0;
4629
4630           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
4631           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
4632                               OPTAB_WIDEN);
4633
4634           if (tem && code == EQ)
4635             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
4636         }
4637     }
4638
4639   if (tem && normalizep)
4640     tem = expand_shift (RSHIFT_EXPR, mode, tem,
4641                         size_int (GET_MODE_BITSIZE (mode) - 1),
4642                         subtarget, normalizep == 1);
4643
4644   if (tem)
4645     {
4646       if (GET_MODE (tem) != target_mode)
4647         {
4648           convert_move (target, tem, 0);
4649           tem = target;
4650         }
4651       else if (!subtarget)
4652         {
4653           emit_move_insn (target, tem);
4654           tem = target;
4655         }
4656     }
4657   else
4658     delete_insns_since (last);
4659
4660   return tem;
4661 }
4662
4663 /* Like emit_store_flag, but always succeeds.  */
4664
4665 rtx
4666 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
4667                        enum machine_mode mode, int unsignedp, int normalizep)
4668 {
4669   rtx tem, label;
4670
4671   /* First see if emit_store_flag can do the job.  */
4672   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
4673   if (tem != 0)
4674     return tem;
4675
4676   if (normalizep == 0)
4677     normalizep = 1;
4678
4679   /* If this failed, we have to do this with set/compare/jump/set code.  */
4680
4681   if (GET_CODE (target) != REG
4682       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
4683     target = gen_reg_rtx (GET_MODE (target));
4684
4685   emit_move_insn (target, const1_rtx);
4686   label = gen_label_rtx ();
4687   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
4688                            NULL_RTX, label);
4689
4690   emit_move_insn (target, const0_rtx);
4691   emit_label (label);
4692
4693   return target;
4694 }
4695 \f
4696 /* Perform possibly multi-word comparison and conditional jump to LABEL
4697    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE
4698
4699    The algorithm is based on the code in expr.c:do_jump.
4700
4701    Note that this does not perform a general comparison.  Only variants
4702    generated within expmed.c are correctly handled, others abort (but could
4703    be handled if needed).  */
4704
4705 static void
4706 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
4707                  rtx label)
4708 {
4709   /* If this mode is an integer too wide to compare properly,
4710      compare word by word.  Rely on cse to optimize constant cases.  */
4711
4712   if (GET_MODE_CLASS (mode) == MODE_INT
4713       && ! can_compare_p (op, mode, ccp_jump))
4714     {
4715       rtx label2 = gen_label_rtx ();
4716
4717       switch (op)
4718         {
4719         case LTU:
4720           do_jump_by_parts_greater_rtx (mode, 1, arg2, arg1, label2, label);
4721           break;
4722
4723         case LEU:
4724           do_jump_by_parts_greater_rtx (mode, 1, arg1, arg2, label, label2);
4725           break;
4726
4727         case LT:
4728           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label2, label);
4729           break;
4730
4731         case GT:
4732           do_jump_by_parts_greater_rtx (mode, 0, arg1, arg2, label2, label);
4733           break;
4734
4735         case GE:
4736           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label, label2);
4737           break;
4738
4739           /* do_jump_by_parts_equality_rtx compares with zero.  Luckily
4740              that's the only equality operations we do */
4741         case EQ:
4742           if (arg2 != const0_rtx || mode != GET_MODE(arg1))
4743             abort ();
4744           do_jump_by_parts_equality_rtx (arg1, label2, label);
4745           break;
4746
4747         case NE:
4748           if (arg2 != const0_rtx || mode != GET_MODE(arg1))
4749             abort ();
4750           do_jump_by_parts_equality_rtx (arg1, label, label2);
4751           break;
4752
4753         default:
4754           abort ();
4755         }
4756
4757       emit_label (label2);
4758     }
4759   else
4760     emit_cmp_and_jump_insns (arg1, arg2, op, NULL_RTX, mode, 0, label);
4761 }