gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 2, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING.  If not, write to the Free
  20 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  21 02111-1307, USA.  */
  22
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "coretypes.h"
  27 #include "tm.h"
  28 #include "toplev.h"
  29 #include "rtl.h"
  30 #include "tree.h"
  31 #include "tm_p.h"
  32 #include "flags.h"
  33 #include "insn-config.h"
  34 #include "expr.h"
  35 #include "optabs.h"
  36 #include "real.h"
  37 #include "recog.h"
  38 #include "langhooks.h"
  39
  40 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  41                                    unsigned HOST_WIDE_INT,
  42                                    unsigned HOST_WIDE_INT, rtx);
  43 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  44                                    unsigned HOST_WIDE_INT, rtx);
  45 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  46                                     unsigned HOST_WIDE_INT,
  47                                     unsigned HOST_WIDE_INT,
  48                                     unsigned HOST_WIDE_INT, rtx, int);
  49 static rtx mask_rtx (enum machine_mode, int, int, int);
  50 static rtx lshift_value (enum machine_mode, rtx, int, int);
  51 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  52                                     unsigned HOST_WIDE_INT, int);
  53 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  54
  55 /* Nonzero means divides or modulus operations are relatively cheap for
  56    powers of two, so don't use branches; emit the operation instead.
  57    Usually, this will mean that the MD file will emit non-branch
  58    sequences.  */
  59
  60 static int sdiv_pow2_cheap, smod_pow2_cheap;
  61
  62 #ifndef SLOW_UNALIGNED_ACCESS
  63 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  64 #endif
  65
  66 /* For compilers that support multiple targets with different word sizes,
  67    MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
  68    is the H8/300(H) compiler.  */
  69
  70 #ifndef MAX_BITS_PER_WORD
  71 #define MAX_BITS_PER_WORD BITS_PER_WORD
  72 #endif
  73
  74 /* Reduce conditional compilation elsewhere.  */
  75 #ifndef HAVE_insv
  76 #define HAVE_insv       0
  77 #define CODE_FOR_insv   CODE_FOR_nothing
  78 #define gen_insv(a,b,c,d) NULL_RTX
  79 #endif
  80 #ifndef HAVE_extv
  81 #define HAVE_extv       0
  82 #define CODE_FOR_extv   CODE_FOR_nothing
  83 #define gen_extv(a,b,c,d) NULL_RTX
  84 #endif
  85 #ifndef HAVE_extzv
  86 #define HAVE_extzv      0
  87 #define CODE_FOR_extzv  CODE_FOR_nothing
  88 #define gen_extzv(a,b,c,d) NULL_RTX
  89 #endif
  90
  91 /* Cost of various pieces of RTL.  Note that some of these are indexed by
  92    shift count and some by mode.  */
  93 static int add_cost, negate_cost, zero_cost;
  94 static int shift_cost[MAX_BITS_PER_WORD];
  95 static int shiftadd_cost[MAX_BITS_PER_WORD];
  96 static int shiftsub_cost[MAX_BITS_PER_WORD];
  97 static int mul_cost[NUM_MACHINE_MODES];
  98 static int div_cost[NUM_MACHINE_MODES];
  99 static int mul_widen_cost[NUM_MACHINE_MODES];
 100 static int mul_highpart_cost[NUM_MACHINE_MODES];
 101
 102 void
 103 init_expmed (void)
 104 {
 105   rtx reg, shift_insn, shiftadd_insn, shiftsub_insn;
 106   int dummy;
 107   int m;
 108   enum machine_mode mode, wider_mode;
 109
 110   start_sequence ();
 111
 112   /* This is "some random pseudo register" for purposes of calling recog
 113      to see what insns exist.  */
 114   reg = gen_rtx_REG (word_mode, 10000);
 115
 116   zero_cost = rtx_cost (const0_rtx, 0);
 117   add_cost = rtx_cost (gen_rtx_PLUS (word_mode, reg, reg), SET);
 118
 119   shift_insn = emit_insn (gen_rtx_SET (VOIDmode, reg,
 120                                        gen_rtx_ASHIFT (word_mode, reg,
 121                                                        const0_rtx)));
 122
 123   shiftadd_insn
 124     = emit_insn (gen_rtx_SET (VOIDmode, reg,
 125                               gen_rtx_PLUS (word_mode,
 126                                             gen_rtx_MULT (word_mode,
 127                                                           reg, const0_rtx),
 128                                             reg)));
 129
 130   shiftsub_insn
 131     = emit_insn (gen_rtx_SET (VOIDmode, reg,
 132                               gen_rtx_MINUS (word_mode,
 133                                              gen_rtx_MULT (word_mode,
 134                                                            reg, const0_rtx),
 135                                              reg)));
 136
 137   init_recog ();
 138
 139   shift_cost[0] = 0;
 140   shiftadd_cost[0] = shiftsub_cost[0] = add_cost;
 141
 142   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 143     {
 144       rtx c_int = GEN_INT ((HOST_WIDE_INT) 1 << m);
 145       shift_cost[m] = shiftadd_cost[m] = shiftsub_cost[m] = 32000;
 146
 147       XEXP (SET_SRC (PATTERN (shift_insn)), 1) = GEN_INT (m);
 148       if (recog (PATTERN (shift_insn), shift_insn, &dummy) >= 0)
 149         shift_cost[m] = rtx_cost (SET_SRC (PATTERN (shift_insn)), SET);
 150
 151       XEXP (XEXP (SET_SRC (PATTERN (shiftadd_insn)), 0), 1) = c_int;
 152       if (recog (PATTERN (shiftadd_insn), shiftadd_insn, &dummy) >= 0)
 153         shiftadd_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftadd_insn)), SET);
 154
 155       XEXP (XEXP (SET_SRC (PATTERN (shiftsub_insn)), 0), 1) = c_int;
 156       if (recog (PATTERN (shiftsub_insn), shiftsub_insn, &dummy) >= 0)
 157         shiftsub_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftsub_insn)), SET);
 158     }
 159
 160   negate_cost = rtx_cost (gen_rtx_NEG (word_mode, reg), SET);
 161
 162   sdiv_pow2_cheap
 163     = (rtx_cost (gen_rtx_DIV (word_mode, reg, GEN_INT (32)), SET)
 164        <= 2 * add_cost);
 165   smod_pow2_cheap
 166     = (rtx_cost (gen_rtx_MOD (word_mode, reg, GEN_INT (32)), SET)
 167        <= 2 * add_cost);
 168
 169   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 170        mode != VOIDmode;
 171        mode = GET_MODE_WIDER_MODE (mode))
 172     {
 173       reg = gen_rtx_REG (mode, 10000);
 174       div_cost[(int) mode] = rtx_cost (gen_rtx_UDIV (mode, reg, reg), SET);
 175       mul_cost[(int) mode] = rtx_cost (gen_rtx_MULT (mode, reg, reg), SET);
 176       wider_mode = GET_MODE_WIDER_MODE (mode);
 177       if (wider_mode != VOIDmode)
 178         {
 179           mul_widen_cost[(int) wider_mode]
 180             = rtx_cost (gen_rtx_MULT (wider_mode,
 181                                       gen_rtx_ZERO_EXTEND (wider_mode, reg),
 182                                       gen_rtx_ZERO_EXTEND (wider_mode, reg)),
 183                         SET);
 184           mul_highpart_cost[(int) mode]
 185             = rtx_cost (gen_rtx_TRUNCATE
 186                         (mode,
 187                          gen_rtx_LSHIFTRT (wider_mode,
 188                                            gen_rtx_MULT (wider_mode,
 189                                                          gen_rtx_ZERO_EXTEND
 190                                                          (wider_mode, reg),
 191                                                          gen_rtx_ZERO_EXTEND
 192                                                          (wider_mode, reg)),
 193                                            GEN_INT (GET_MODE_BITSIZE (mode)))),
 194                         SET);
 195         }
 196     }
 197
 198   end_sequence ();
 199 }
 200
 201 /* Return an rtx representing minus the value of X.
 202    MODE is the intended mode of the result,
 203    useful if X is a CONST_INT.  */
 204
 205 rtx
 206 negate_rtx (enum machine_mode mode, rtx x)
 207 {
 208   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 209
 210   if (result == 0)
 211     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 212
 213   return result;
 214 }
 215
 216 /* Report on the availability of insv/extv/extzv and the desired mode
 217    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
 218    is false; else the mode of the specified operand.  If OPNO is -1,
 219    all the caller cares about is whether the insn is available.  */
 220 enum machine_mode
 221 mode_for_extraction (enum extraction_pattern pattern, int opno)
 222 {
 223   const struct insn_data *data;
 224
 225   switch (pattern)
 226     {
 227     case EP_insv:
 228       if (HAVE_insv)
 229         {
 230           data = &insn_data[CODE_FOR_insv];
 231           break;
 232         }
 233       return MAX_MACHINE_MODE;
 234
 235     case EP_extv:
 236       if (HAVE_extv)
 237         {
 238           data = &insn_data[CODE_FOR_extv];
 239           break;
 240         }
 241       return MAX_MACHINE_MODE;
 242
 243     case EP_extzv:
 244       if (HAVE_extzv)
 245         {
 246           data = &insn_data[CODE_FOR_extzv];
 247           break;
 248         }
 249       return MAX_MACHINE_MODE;
 250
 251     default:
 252       abort ();
 253     }
 254
 255   if (opno == -1)
 256     return VOIDmode;
 257
 258   /* Everyone who uses this function used to follow it with
 259      if (result == VOIDmode) result = word_mode; */
 260   if (data->operand[opno].mode == VOIDmode)
 261     return word_mode;
 262   return data->operand[opno].mode;
 263 }
 264
 265 \f
 266 /* Generate code to store value from rtx VALUE
 267    into a bit-field within structure STR_RTX
 268    containing BITSIZE bits starting at bit BITNUM.
 269    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
 270    ALIGN is the alignment that STR_RTX is known to have.
 271    TOTAL_SIZE is the size of the structure in bytes, or -1 if varying.  */
 272
 273 /* ??? Note that there are two different ideas here for how
 274    to determine the size to count bits within, for a register.
 275    One is BITS_PER_WORD, and the other is the size of operand 3
 276    of the insv pattern.
 277
 278    If operand 3 of the insv pattern is VOIDmode, then we will use BITS_PER_WORD
 279    else, we use the mode of operand 3.  */
 280
 281 rtx
 282 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 283                  unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
 284                  rtx value, HOST_WIDE_INT total_size)
 285 {
 286   unsigned int unit
 287     = (GET_CODE (str_rtx) == MEM) ? BITS_PER_UNIT : BITS_PER_WORD;
 288   unsigned HOST_WIDE_INT offset = bitnum / unit;
 289   unsigned HOST_WIDE_INT bitpos = bitnum % unit;
 290   rtx op0 = str_rtx;
 291   int byte_offset;
 292
 293   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
 294
 295   /* Discount the part of the structure before the desired byte.
 296      We need to know how many bytes are safe to reference after it.  */
 297   if (total_size >= 0)
 298     total_size -= (bitpos / BIGGEST_ALIGNMENT
 299                    * (BIGGEST_ALIGNMENT / BITS_PER_UNIT));
 300
 301   while (GET_CODE (op0) == SUBREG)
 302     {
 303       /* The following line once was done only if WORDS_BIG_ENDIAN,
 304          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 305          meaningful at a much higher level; when structures are copied
 306          between memory and regs, the higher-numbered regs
 307          always get higher addresses.  */
 308       offset += (SUBREG_BYTE (op0) / UNITS_PER_WORD);
 309       /* We used to adjust BITPOS here, but now we do the whole adjustment
 310          right after the loop.  */
 311       op0 = SUBREG_REG (op0);
 312     }
 313
 314   value = protect_from_queue (value, 0);
 315
 316   if (flag_force_mem)
 317     {
 318       int old_generating_concat_p = generating_concat_p;
 319       generating_concat_p = 0;
 320       value = force_not_mem (value);
 321       generating_concat_p = old_generating_concat_p;
 322     }
 323
 324   /* If the target is a register, overwriting the entire object, or storing
 325      a full-word or multi-word field can be done with just a SUBREG.
 326
 327      If the target is memory, storing any naturally aligned field can be
 328      done with a simple store.  For targets that support fast unaligned
 329      memory, any naturally sized, unit aligned field can be done directly.  */
 330
 331   byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 332                 + (offset * UNITS_PER_WORD);
 333
 334   if (bitpos == 0
 335       && bitsize == GET_MODE_BITSIZE (fieldmode)
 336       && (GET_CODE (op0) != MEM
 337           ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 338              || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
 339              && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
 340           : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 341              || (offset * BITS_PER_UNIT % bitsize == 0
 342                  && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
 343     {
 344       if (GET_MODE (op0) != fieldmode)
 345         {
 346           if (GET_CODE (op0) == SUBREG)
 347             {
 348               if (GET_MODE (SUBREG_REG (op0)) == fieldmode
 349                   || GET_MODE_CLASS (fieldmode) == MODE_INT
 350                   || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT)
 351                 op0 = SUBREG_REG (op0);
 352               else
 353                 /* Else we've got some float mode source being extracted into
 354                    a different float mode destination -- this combination of
 355                    subregs results in Severe Tire Damage.  */
 356                 abort ();
 357             }
 358           if (GET_CODE (op0) == REG)
 359             op0 = gen_rtx_SUBREG (fieldmode, op0, byte_offset);
 360           else
 361             op0 = adjust_address (op0, fieldmode, offset);
 362         }
 363       emit_move_insn (op0, value);
 364       return value;
 365     }
 366
 367   /* Make sure we are playing with integral modes.  Pun with subregs
 368      if we aren't.  This must come after the entire register case above,
 369      since that case is valid for any mode.  The following cases are only
 370      valid for integral modes.  */
 371   {
 372     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 373     if (imode != GET_MODE (op0))
 374       {
 375         if (GET_CODE (op0) == MEM)
 376           op0 = adjust_address (op0, imode, 0);
 377         else if (imode != BLKmode)
 378           op0 = gen_lowpart (imode, op0);
 379         else
 380           abort ();
 381       }
 382   }
 383
 384   /* We may be accessing data outside the field, which means
 385      we can alias adjacent data.  */
 386   if (GET_CODE (op0) == MEM)
 387     {
 388       op0 = shallow_copy_rtx (op0);
 389       set_mem_alias_set (op0, 0);
 390       set_mem_expr (op0, 0);
 391     }
 392
 393   /* If OP0 is a register, BITPOS must count within a word.
 394      But as we have it, it counts within whatever size OP0 now has.
 395      On a bigendian machine, these are not the same, so convert.  */
 396   if (BYTES_BIG_ENDIAN
 397       && GET_CODE (op0) != MEM
 398       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 399     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 400
 401   /* Storing an lsb-aligned field in a register
 402      can be done with a movestrict instruction.  */
 403
 404   if (GET_CODE (op0) != MEM
 405       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 406       && bitsize == GET_MODE_BITSIZE (fieldmode)
 407       && (movstrict_optab->handlers[(int) fieldmode].insn_code
 408           != CODE_FOR_nothing))
 409     {
 410       int icode = movstrict_optab->handlers[(int) fieldmode].insn_code;
 411
 412       /* Get appropriate low part of the value being stored.  */
 413       if (GET_CODE (value) == CONST_INT || GET_CODE (value) == REG)
 414         value = gen_lowpart (fieldmode, value);
 415       else if (!(GET_CODE (value) == SYMBOL_REF
 416                  || GET_CODE (value) == LABEL_REF
 417                  || GET_CODE (value) == CONST))
 418         value = convert_to_mode (fieldmode, value, 0);
 419
 420       if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
 421         value = copy_to_mode_reg (fieldmode, value);
 422
 423       if (GET_CODE (op0) == SUBREG)
 424         {
 425           if (GET_MODE (SUBREG_REG (op0)) == fieldmode
 426               || GET_MODE_CLASS (fieldmode) == MODE_INT
 427               || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT)
 428             op0 = SUBREG_REG (op0);
 429           else
 430             /* Else we've got some float mode source being extracted into
 431                a different float mode destination -- this combination of
 432                subregs results in Severe Tire Damage.  */
 433             abort ();
 434         }
 435
 436       emit_insn (GEN_FCN (icode)
 437                  (gen_rtx_SUBREG (fieldmode, op0,
 438                                   (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 439                                   + (offset * UNITS_PER_WORD)),
 440                                   value));
 441
 442       return value;
 443     }
 444
 445   /* Handle fields bigger than a word.  */
 446
 447   if (bitsize > BITS_PER_WORD)
 448     {
 449       /* Here we transfer the words of the field
 450          in the order least significant first.
 451          This is because the most significant word is the one which may
 452          be less than full.
 453          However, only do that if the value is not BLKmode.  */
 454
 455       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 456       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 457       unsigned int i;
 458
 459       /* This is the mode we must force value to, so that there will be enough
 460          subwords to extract.  Note that fieldmode will often (always?) be
 461          VOIDmode, because that is what store_field uses to indicate that this
 462          is a bit field, but passing VOIDmode to operand_subword_force will
 463          result in an abort.  */
 464       fieldmode = GET_MODE (value);
 465       if (fieldmode == VOIDmode)
 466         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 467
 468       for (i = 0; i < nwords; i++)
 469         {
 470           /* If I is 0, use the low-order word in both field and target;
 471              if I is 1, use the next to lowest word; and so on.  */
 472           unsigned int wordnum = (backwards ? nwords - i - 1 : i);
 473           unsigned int bit_offset = (backwards
 474                                      ? MAX ((int) bitsize - ((int) i + 1)
 475                                             * BITS_PER_WORD,
 476                                             0)
 477                                      : (int) i * BITS_PER_WORD);
 478
 479           store_bit_field (op0, MIN (BITS_PER_WORD,
 480                                      bitsize - i * BITS_PER_WORD),
 481                            bitnum + bit_offset, word_mode,
 482                            operand_subword_force (value, wordnum, fieldmode),
 483                            total_size);
 484         }
 485       return value;
 486     }
 487
 488   /* From here on we can assume that the field to be stored in is
 489      a full-word (whatever type that is), since it is shorter than a word.  */
 490
 491   /* OFFSET is the number of words or bytes (UNIT says which)
 492      from STR_RTX to the first word or byte containing part of the field.  */
 493
 494   if (GET_CODE (op0) != MEM)
 495     {
 496       if (offset != 0
 497           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 498         {
 499           if (GET_CODE (op0) != REG)
 500             {
 501               /* Since this is a destination (lvalue), we can't copy it to a
 502                  pseudo.  We can trivially remove a SUBREG that does not
 503                  change the size of the operand.  Such a SUBREG may have been
 504                  added above.  Otherwise, abort.  */
 505               if (GET_CODE (op0) == SUBREG
 506                   && (GET_MODE_SIZE (GET_MODE (op0))
 507                       == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))))
 508                 op0 = SUBREG_REG (op0);
 509               else
 510                 abort ();
 511             }
 512           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 513                                 op0, (offset * UNITS_PER_WORD));
 514         }
 515       offset = 0;
 516     }
 517   else
 518     op0 = protect_from_queue (op0, 1);
 519
 520   /* If VALUE is a floating-point mode, access it as an integer of the
 521      corresponding size.  This can occur on a machine with 64 bit registers
 522      that uses SFmode for float.  This can also occur for unaligned float
 523      structure fields.  */
 524   if (GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 525       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 526     value = gen_lowpart ((GET_MODE (value) == VOIDmode
 527                           ? word_mode : int_mode_for_mode (GET_MODE (value))),
 528                          value);
 529
 530   /* Now OFFSET is nonzero only if OP0 is memory
 531      and is therefore always measured in bytes.  */
 532
 533   if (HAVE_insv
 534       && GET_MODE (value) != BLKmode
 535       && !(bitsize == 1 && GET_CODE (value) == CONST_INT)
 536       /* Ensure insv's size is wide enough for this field.  */
 537       && (GET_MODE_BITSIZE (op_mode) >= bitsize)
 538       && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 539             && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))))
 540     {
 541       int xbitpos = bitpos;
 542       rtx value1;
 543       rtx xop0 = op0;
 544       rtx last = get_last_insn ();
 545       rtx pat;
 546       enum machine_mode maxmode = mode_for_extraction (EP_insv, 3);
 547       int save_volatile_ok = volatile_ok;
 548
 549       volatile_ok = 1;
 550
 551       /* If this machine's insv can only insert into a register, copy OP0
 552          into a register and save it back later.  */
 553       /* This used to check flag_force_mem, but that was a serious
 554          de-optimization now that flag_force_mem is enabled by -O2.  */
 555       if (GET_CODE (op0) == MEM
 556           && ! ((*insn_data[(int) CODE_FOR_insv].operand[0].predicate)
 557                 (op0, VOIDmode)))
 558         {
 559           rtx tempreg;
 560           enum machine_mode bestmode;
 561
 562           /* Get the mode to use for inserting into this field.  If OP0 is
 563              BLKmode, get the smallest mode consistent with the alignment. If
 564              OP0 is a non-BLKmode object that is no wider than MAXMODE, use its
 565              mode. Otherwise, use the smallest mode containing the field.  */
 566
 567           if (GET_MODE (op0) == BLKmode
 568               || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (maxmode))
 569             bestmode
 570               = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0), maxmode,
 571                                MEM_VOLATILE_P (op0));
 572           else
 573             bestmode = GET_MODE (op0);
 574
 575           if (bestmode == VOIDmode
 576               || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
 577                   && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
 578             goto insv_loses;
 579
 580           /* Adjust address to point to the containing unit of that mode.
 581              Compute offset as multiple of this unit, counting in bytes.  */
 582           unit = GET_MODE_BITSIZE (bestmode);
 583           offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 584           bitpos = bitnum % unit;
 585           op0 = adjust_address (op0, bestmode,  offset);
 586
 587           /* Fetch that unit, store the bitfield in it, then store
 588              the unit.  */
 589           tempreg = copy_to_reg (op0);
 590           store_bit_field (tempreg, bitsize, bitpos, fieldmode, value,
 591                            total_size);
 592           emit_move_insn (op0, tempreg);
 593           return value;
 594         }
 595       volatile_ok = save_volatile_ok;
 596
 597       /* Add OFFSET into OP0's address.  */
 598       if (GET_CODE (xop0) == MEM)
 599         xop0 = adjust_address (xop0, byte_mode, offset);
 600
 601       /* If xop0 is a register, we need it in MAXMODE
 602          to make it acceptable to the format of insv.  */
 603       if (GET_CODE (xop0) == SUBREG)
 604         /* We can't just change the mode, because this might clobber op0,
 605            and we will need the original value of op0 if insv fails.  */
 606         xop0 = gen_rtx_SUBREG (maxmode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 607       if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
 608         xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
 609
 610       /* On big-endian machines, we count bits from the most significant.
 611          If the bit field insn does not, we must invert.  */
 612
 613       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 614         xbitpos = unit - bitsize - xbitpos;
 615
 616       /* We have been counting XBITPOS within UNIT.
 617          Count instead within the size of the register.  */
 618       if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
 619         xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
 620
 621       unit = GET_MODE_BITSIZE (maxmode);
 622
 623       /* Convert VALUE to maxmode (which insv insn wants) in VALUE1.  */
 624       value1 = value;
 625       if (GET_MODE (value) != maxmode)
 626         {
 627           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 628             {
 629               /* Optimization: Don't bother really extending VALUE
 630                  if it has all the bits we will actually use.  However,
 631                  if we must narrow it, be sure we do it correctly.  */
 632
 633               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (maxmode))
 634                 {
 635                   rtx tmp;
 636
 637                   tmp = simplify_subreg (maxmode, value1, GET_MODE (value), 0);
 638                   if (! tmp)
 639                     tmp = simplify_gen_subreg (maxmode,
 640                                                force_reg (GET_MODE (value),
 641                                                           value1),
 642                                                GET_MODE (value), 0);
 643                   value1 = tmp;
 644                 }
 645               else
 646                 value1 = gen_lowpart (maxmode, value1);
 647             }
 648           else if (GET_CODE (value) == CONST_INT)
 649             value1 = gen_int_mode (INTVAL (value), maxmode);
 650           else if (!CONSTANT_P (value))
 651             /* Parse phase is supposed to make VALUE's data type
 652                match that of the component reference, which is a type
 653                at least as wide as the field; so VALUE should have
 654                a mode that corresponds to that type.  */
 655             abort ();
 656         }
 657
 658       /* If this machine's insv insists on a register,
 659          get VALUE1 into a register.  */
 660       if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
 661              (value1, maxmode)))
 662         value1 = force_reg (maxmode, value1);
 663
 664       pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
 665       if (pat)
 666         emit_insn (pat);
 667       else
 668         {
 669           delete_insns_since (last);
 670           store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 671         }
 672     }
 673   else
 674     insv_loses:
 675     /* Insv is not available; store using shifts and boolean ops.  */
 676     store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 677   return value;
 678 }
 679 \f
 680 /* Use shifts and boolean operations to store VALUE
 681    into a bit field of width BITSIZE
 682    in a memory location specified by OP0 except offset by OFFSET bytes.
 683      (OFFSET must be 0 if OP0 is a register.)
 684    The field starts at position BITPOS within the byte.
 685     (If OP0 is a register, it may be a full word or a narrower mode,
 686      but BITPOS still counts within a full word,
 687      which is significant on bigendian machines.)
 688
 689    Note that protect_from_queue has already been done on OP0 and VALUE.  */
 690
 691 static void
 692 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
 693                        unsigned HOST_WIDE_INT bitsize,
 694                        unsigned HOST_WIDE_INT bitpos, rtx value)
 695 {
 696   enum machine_mode mode;
 697   unsigned int total_bits = BITS_PER_WORD;
 698   rtx subtarget, temp;
 699   int all_zero = 0;
 700   int all_one = 0;
 701
 702   /* There is a case not handled here:
 703      a structure with a known alignment of just a halfword
 704      and a field split across two aligned halfwords within the structure.
 705      Or likewise a structure with a known alignment of just a byte
 706      and a field split across two bytes.
 707      Such cases are not supposed to be able to occur.  */
 708
 709   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 710     {
 711       if (offset != 0)
 712         abort ();
 713       /* Special treatment for a bit field split across two registers.  */
 714       if (bitsize + bitpos > BITS_PER_WORD)
 715         {
 716           store_split_bit_field (op0, bitsize, bitpos, value);
 717           return;
 718         }
 719     }
 720   else
 721     {
 722       /* Get the proper mode to use for this field.  We want a mode that
 723          includes the entire field.  If such a mode would be larger than
 724          a word, we won't be doing the extraction the normal way.
 725          We don't want a mode bigger than the destination.  */
 726
 727       mode = GET_MODE (op0);
 728       if (GET_MODE_BITSIZE (mode) == 0
 729           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 730         mode = word_mode;
 731       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 732                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 733
 734       if (mode == VOIDmode)
 735         {
 736           /* The only way this should occur is if the field spans word
 737              boundaries.  */
 738           store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
 739                                  value);
 740           return;
 741         }
 742
 743       total_bits = GET_MODE_BITSIZE (mode);
 744
 745       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 746          be in the range 0 to total_bits-1, and put any excess bytes in
 747          OFFSET.  */
 748       if (bitpos >= total_bits)
 749         {
 750           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 751           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 752                      * BITS_PER_UNIT);
 753         }
 754
 755       /* Get ref to an aligned byte, halfword, or word containing the field.
 756          Adjust BITPOS to be position within a word,
 757          and OFFSET to be the offset of that word.
 758          Then alter OP0 to refer to that word.  */
 759       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 760       offset -= (offset % (total_bits / BITS_PER_UNIT));
 761       op0 = adjust_address (op0, mode, offset);
 762     }
 763
 764   mode = GET_MODE (op0);
 765
 766   /* Now MODE is either some integral mode for a MEM as OP0,
 767      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 768      The bit field is contained entirely within OP0.
 769      BITPOS is the starting bit number within OP0.
 770      (OP0's mode may actually be narrower than MODE.)  */
 771
 772   if (BYTES_BIG_ENDIAN)
 773       /* BITPOS is the distance between our msb
 774          and that of the containing datum.
 775          Convert it to the distance from the lsb.  */
 776       bitpos = total_bits - bitsize - bitpos;
 777
 778   /* Now BITPOS is always the distance between our lsb
 779      and that of OP0.  */
 780
 781   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 782      we must first convert its mode to MODE.  */
 783
 784   if (GET_CODE (value) == CONST_INT)
 785     {
 786       HOST_WIDE_INT v = INTVAL (value);
 787
 788       if (bitsize < HOST_BITS_PER_WIDE_INT)
 789         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 790
 791       if (v == 0)
 792         all_zero = 1;
 793       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 794                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 795                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 796         all_one = 1;
 797
 798       value = lshift_value (mode, value, bitpos, bitsize);
 799     }
 800   else
 801     {
 802       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 803                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 804
 805       if (GET_MODE (value) != mode)
 806         {
 807           if ((GET_CODE (value) == REG || GET_CODE (value) == SUBREG)
 808               && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
 809             value = gen_lowpart (mode, value);
 810           else
 811             value = convert_to_mode (mode, value, 1);
 812         }
 813
 814       if (must_and)
 815         value = expand_binop (mode, and_optab, value,
 816                               mask_rtx (mode, 0, bitsize, 0),
 817                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
 818       if (bitpos > 0)
 819         value = expand_shift (LSHIFT_EXPR, mode, value,
 820                               build_int_2 (bitpos, 0), NULL_RTX, 1);
 821     }
 822
 823   /* Now clear the chosen bits in OP0,
 824      except that if VALUE is -1 we need not bother.  */
 825
 826   subtarget = (GET_CODE (op0) == REG || ! flag_force_mem) ? op0 : 0;
 827
 828   if (! all_one)
 829     {
 830       temp = expand_binop (mode, and_optab, op0,
 831                            mask_rtx (mode, bitpos, bitsize, 1),
 832                            subtarget, 1, OPTAB_LIB_WIDEN);
 833       subtarget = temp;
 834     }
 835   else
 836     temp = op0;
 837
 838   /* Now logical-or VALUE into OP0, unless it is zero.  */
 839
 840   if (! all_zero)
 841     temp = expand_binop (mode, ior_optab, temp, value,
 842                          subtarget, 1, OPTAB_LIB_WIDEN);
 843   if (op0 != temp)
 844     emit_move_insn (op0, temp);
 845 }
 846 \f
 847 /* Store a bit field that is split across multiple accessible memory objects.
 848
 849    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
 850    BITSIZE is the field width; BITPOS the position of its first bit
 851    (within the word).
 852    VALUE is the value to store.
 853
 854    This does not yet handle fields wider than BITS_PER_WORD.  */
 855
 856 static void
 857 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
 858                        unsigned HOST_WIDE_INT bitpos, rtx value)
 859 {
 860   unsigned int unit;
 861   unsigned int bitsdone = 0;
 862
 863   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
 864      much at a time.  */
 865   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 866     unit = BITS_PER_WORD;
 867   else
 868     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
 869
 870   /* If VALUE is a constant other than a CONST_INT, get it into a register in
 871      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
 872      that VALUE might be a floating-point constant.  */
 873   if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
 874     {
 875       rtx word = gen_lowpart_common (word_mode, value);
 876
 877       if (word && (value != word))
 878         value = word;
 879       else
 880         value = gen_lowpart_common (word_mode,
 881                                     force_reg (GET_MODE (value) != VOIDmode
 882                                                ? GET_MODE (value)
 883                                                : word_mode, value));
 884     }
 885   else if (GET_CODE (value) == ADDRESSOF)
 886     value = copy_to_reg (value);
 887
 888   while (bitsdone < bitsize)
 889     {
 890       unsigned HOST_WIDE_INT thissize;
 891       rtx part, word;
 892       unsigned HOST_WIDE_INT thispos;
 893       unsigned HOST_WIDE_INT offset;
 894
 895       offset = (bitpos + bitsdone) / unit;
 896       thispos = (bitpos + bitsdone) % unit;
 897
 898       /* THISSIZE must not overrun a word boundary.  Otherwise,
 899          store_fixed_bit_field will call us again, and we will mutually
 900          recurse forever.  */
 901       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
 902       thissize = MIN (thissize, unit - thispos);
 903
 904       if (BYTES_BIG_ENDIAN)
 905         {
 906           int total_bits;
 907
 908           /* We must do an endian conversion exactly the same way as it is
 909              done in extract_bit_field, so that the two calls to
 910              extract_fixed_bit_field will have comparable arguments.  */
 911           if (GET_CODE (value) != MEM || GET_MODE (value) == BLKmode)
 912             total_bits = BITS_PER_WORD;
 913           else
 914             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
 915
 916           /* Fetch successively less significant portions.  */
 917           if (GET_CODE (value) == CONST_INT)
 918             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 919                              >> (bitsize - bitsdone - thissize))
 920                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
 921           else
 922             /* The args are chosen so that the last part includes the
 923                lsb.  Give extract_bit_field the value it needs (with
 924                endianness compensation) to fetch the piece we want.  */
 925             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
 926                                             total_bits - bitsize + bitsdone,
 927                                             NULL_RTX, 1);
 928         }
 929       else
 930         {
 931           /* Fetch successively more significant portions.  */
 932           if (GET_CODE (value) == CONST_INT)
 933             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 934                              >> bitsdone)
 935                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
 936           else
 937             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
 938                                             bitsdone, NULL_RTX, 1);
 939         }
 940
 941       /* If OP0 is a register, then handle OFFSET here.
 942
 943          When handling multiword bitfields, extract_bit_field may pass
 944          down a word_mode SUBREG of a larger REG for a bitfield that actually
 945          crosses a word boundary.  Thus, for a SUBREG, we must find
 946          the current word starting from the base register.  */
 947       if (GET_CODE (op0) == SUBREG)
 948         {
 949           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
 950           word = operand_subword_force (SUBREG_REG (op0), word_offset,
 951                                         GET_MODE (SUBREG_REG (op0)));
 952           offset = 0;
 953         }
 954       else if (GET_CODE (op0) == REG)
 955         {
 956           word = operand_subword_force (op0, offset, GET_MODE (op0));
 957           offset = 0;
 958         }
 959       else
 960         word = op0;
 961
 962       /* OFFSET is in UNITs, and UNIT is in bits.
 963          store_fixed_bit_field wants offset in bytes.  */
 964       store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
 965                              thispos, part);
 966       bitsdone += thissize;
 967     }
 968 }
 969 \f
 970 /* Generate code to extract a byte-field from STR_RTX
 971    containing BITSIZE bits, starting at BITNUM,
 972    and put it in TARGET if possible (if TARGET is nonzero).
 973    Regardless of TARGET, we return the rtx for where the value is placed.
 974    It may be a QUEUED.
 975
 976    STR_RTX is the structure containing the byte (a REG or MEM).
 977    UNSIGNEDP is nonzero if this is an unsigned bit field.
 978    MODE is the natural mode of the field value once extracted.
 979    TMODE is the mode the caller would like the value to have;
 980    but the value may be returned with type MODE instead.
 981
 982    TOTAL_SIZE is the size in bytes of the containing structure,
 983    or -1 if varying.
 984
 985    If a TARGET is specified and we can store in it at no extra cost,
 986    we do so, and return TARGET.
 987    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
 988    if they are equally easy.  */
 989
 990 rtx
 991 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 992                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
 993                    enum machine_mode mode, enum machine_mode tmode,
 994                    HOST_WIDE_INT total_size)
 995 {
 996   unsigned int unit
 997     = (GET_CODE (str_rtx) == MEM) ? BITS_PER_UNIT : BITS_PER_WORD;
 998   unsigned HOST_WIDE_INT offset = bitnum / unit;
 999   unsigned HOST_WIDE_INT bitpos = bitnum % unit;
1000   rtx op0 = str_rtx;
1001   rtx spec_target = target;
1002   rtx spec_target_subreg = 0;
1003   enum machine_mode int_mode;
1004   enum machine_mode extv_mode = mode_for_extraction (EP_extv, 0);
1005   enum machine_mode extzv_mode = mode_for_extraction (EP_extzv, 0);
1006   enum machine_mode mode1;
1007   int byte_offset;
1008
1009   /* Discount the part of the structure before the desired byte.
1010      We need to know how many bytes are safe to reference after it.  */
1011   if (total_size >= 0)
1012     total_size -= (bitpos / BIGGEST_ALIGNMENT
1013                    * (BIGGEST_ALIGNMENT / BITS_PER_UNIT));
1014
1015   if (tmode == VOIDmode)
1016     tmode = mode;
1017
1018   while (GET_CODE (op0) == SUBREG)
1019     {
1020       bitpos += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1021       if (bitpos > unit)
1022         {
1023           offset += (bitpos / unit);
1024           bitpos %= unit;
1025         }
1026       op0 = SUBREG_REG (op0);
1027     }
1028
1029   if (GET_CODE (op0) == REG
1030       && mode == GET_MODE (op0)
1031       && bitnum == 0
1032       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1033     {
1034       /* We're trying to extract a full register from itself.  */
1035       return op0;
1036     }
1037
1038   /* Make sure we are playing with integral modes.  Pun with subregs
1039      if we aren't.  */
1040   {
1041     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1042     if (imode != GET_MODE (op0))
1043       {
1044         if (GET_CODE (op0) == MEM)
1045           op0 = adjust_address (op0, imode, 0);
1046         else if (imode != BLKmode)
1047           op0 = gen_lowpart (imode, op0);
1048         else
1049           abort ();
1050       }
1051   }
1052
1053   /* We may be accessing data outside the field, which means
1054      we can alias adjacent data.  */
1055   if (GET_CODE (op0) == MEM)
1056     {
1057       op0 = shallow_copy_rtx (op0);
1058       set_mem_alias_set (op0, 0);
1059       set_mem_expr (op0, 0);
1060     }
1061
1062   /* Extraction of a full-word or multi-word value from a structure
1063      in a register or aligned memory can be done with just a SUBREG.
1064      A subword value in the least significant part of a register
1065      can also be extracted with a SUBREG.  For this, we need the
1066      byte offset of the value in op0.  */
1067
1068   byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1069
1070   /* If OP0 is a register, BITPOS must count within a word.
1071      But as we have it, it counts within whatever size OP0 now has.
1072      On a bigendian machine, these are not the same, so convert.  */
1073   if (BYTES_BIG_ENDIAN
1074       && GET_CODE (op0) != MEM
1075       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1076     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1077
1078   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1079      If that's wrong, the solution is to test for it and set TARGET to 0
1080      if needed.  */
1081
1082   /* Only scalar integer modes can be converted via subregs.  There is an
1083      additional problem for FP modes here in that they can have a precision
1084      which is different from the size.  mode_for_size uses precision, but
1085      we want a mode based on the size, so we must avoid calling it for FP
1086      modes.  */
1087   mode1  = (SCALAR_INT_MODE_P (tmode)
1088             ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1089             : mode);
1090
1091   if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1092         && bitpos % BITS_PER_WORD == 0)
1093        || (mode1 != BLKmode
1094            /* ??? The big endian test here is wrong.  This is correct
1095               if the value is in a register, and if mode_for_size is not
1096               the same mode as op0.  This causes us to get unnecessarily
1097               inefficient code from the Thumb port when -mbig-endian.  */
1098            && (BYTES_BIG_ENDIAN
1099                ? bitpos + bitsize == BITS_PER_WORD
1100                : bitpos == 0)))
1101       && ((GET_CODE (op0) != MEM
1102            && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
1103                                      GET_MODE_BITSIZE (GET_MODE (op0)))
1104            && GET_MODE_SIZE (mode1) != 0
1105            && byte_offset % GET_MODE_SIZE (mode1) == 0)
1106           || (GET_CODE (op0) == MEM
1107               && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1108                   || (offset * BITS_PER_UNIT % bitsize == 0
1109                       && MEM_ALIGN (op0) % bitsize == 0)))))
1110     {
1111       if (mode1 != GET_MODE (op0))
1112         {
1113           if (GET_CODE (op0) == SUBREG)
1114             {
1115               if (GET_MODE (SUBREG_REG (op0)) == mode1
1116                   || GET_MODE_CLASS (mode1) == MODE_INT
1117                   || GET_MODE_CLASS (mode1) == MODE_PARTIAL_INT)
1118                 op0 = SUBREG_REG (op0);
1119               else
1120                 /* Else we've got some float mode source being extracted into
1121                    a different float mode destination -- this combination of
1122                    subregs results in Severe Tire Damage.  */
1123                 goto no_subreg_mode_swap;
1124             }
1125           if (GET_CODE (op0) == REG)
1126             op0 = gen_rtx_SUBREG (mode1, op0, byte_offset);
1127           else
1128             op0 = adjust_address (op0, mode1, offset);
1129         }
1130       if (mode1 != mode)
1131         return convert_to_mode (tmode, op0, unsignedp);
1132       return op0;
1133     }
1134  no_subreg_mode_swap:
1135
1136   /* Handle fields bigger than a word.  */
1137
1138   if (bitsize > BITS_PER_WORD)
1139     {
1140       /* Here we transfer the words of the field
1141          in the order least significant first.
1142          This is because the most significant word is the one which may
1143          be less than full.  */
1144
1145       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1146       unsigned int i;
1147
1148       if (target == 0 || GET_CODE (target) != REG)
1149         target = gen_reg_rtx (mode);
1150
1151       /* Indicate for flow that the entire target reg is being set.  */
1152       emit_insn (gen_rtx_CLOBBER (VOIDmode, target));
1153
1154       for (i = 0; i < nwords; i++)
1155         {
1156           /* If I is 0, use the low-order word in both field and target;
1157              if I is 1, use the next to lowest word; and so on.  */
1158           /* Word number in TARGET to use.  */
1159           unsigned int wordnum
1160             = (WORDS_BIG_ENDIAN
1161                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1162                : i);
1163           /* Offset from start of field in OP0.  */
1164           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1165                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1166                                                 * (int) BITS_PER_WORD))
1167                                      : (int) i * BITS_PER_WORD);
1168           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1169           rtx result_part
1170             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1171                                            bitsize - i * BITS_PER_WORD),
1172                                  bitnum + bit_offset, 1, target_part, mode,
1173                                  word_mode, total_size);
1174
1175           if (target_part == 0)
1176             abort ();
1177
1178           if (result_part != target_part)
1179             emit_move_insn (target_part, result_part);
1180         }
1181
1182       if (unsignedp)
1183         {
1184           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1185              need to be zero'd out.  */
1186           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1187             {
1188               unsigned int i, total_words;
1189
1190               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1191               for (i = nwords; i < total_words; i++)
1192                 emit_move_insn
1193                   (operand_subword (target,
1194                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1195                                     1, VOIDmode),
1196                    const0_rtx);
1197             }
1198           return target;
1199         }
1200
1201       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1202       target = expand_shift (LSHIFT_EXPR, mode, target,
1203                              build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1204                              NULL_RTX, 0);
1205       return expand_shift (RSHIFT_EXPR, mode, target,
1206                            build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1207                            NULL_RTX, 0);
1208     }
1209
1210   /* From here on we know the desired field is smaller than a word.  */
1211
1212   /* Check if there is a correspondingly-sized integer field, so we can
1213      safely extract it as one size of integer, if necessary; then
1214      truncate or extend to the size that is wanted; then use SUBREGs or
1215      convert_to_mode to get one of the modes we really wanted.  */
1216
1217   int_mode = int_mode_for_mode (tmode);
1218   if (int_mode == BLKmode)
1219     int_mode = int_mode_for_mode (mode);
1220   if (int_mode == BLKmode)
1221     abort ();    /* Should probably push op0 out to memory and then
1222                     do a load.  */
1223
1224   /* OFFSET is the number of words or bytes (UNIT says which)
1225      from STR_RTX to the first word or byte containing part of the field.  */
1226
1227   if (GET_CODE (op0) != MEM)
1228     {
1229       if (offset != 0
1230           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1231         {
1232           if (GET_CODE (op0) != REG)
1233             op0 = copy_to_reg (op0);
1234           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1235                                 op0, (offset * UNITS_PER_WORD));
1236         }
1237       offset = 0;
1238     }
1239   else
1240     op0 = protect_from_queue (str_rtx, 1);
1241
1242   /* Now OFFSET is nonzero only for memory operands.  */
1243
1244   if (unsignedp)
1245     {
1246       if (HAVE_extzv
1247           && (GET_MODE_BITSIZE (extzv_mode) >= bitsize)
1248           && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1249                 && (bitsize + bitpos > GET_MODE_BITSIZE (extzv_mode))))
1250         {
1251           unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1252           rtx bitsize_rtx, bitpos_rtx;
1253           rtx last = get_last_insn ();
1254           rtx xop0 = op0;
1255           rtx xtarget = target;
1256           rtx xspec_target = spec_target;
1257           rtx xspec_target_subreg = spec_target_subreg;
1258           rtx pat;
1259           enum machine_mode maxmode = mode_for_extraction (EP_extzv, 0);
1260
1261           if (GET_CODE (xop0) == MEM)
1262             {
1263               int save_volatile_ok = volatile_ok;
1264               volatile_ok = 1;
1265
1266               /* Is the memory operand acceptable?  */
1267               if (! ((*insn_data[(int) CODE_FOR_extzv].operand[1].predicate)
1268                      (xop0, GET_MODE (xop0))))
1269                 {
1270                   /* No, load into a reg and extract from there.  */
1271                   enum machine_mode bestmode;
1272
1273                   /* Get the mode to use for inserting into this field.  If
1274                      OP0 is BLKmode, get the smallest mode consistent with the
1275                      alignment. If OP0 is a non-BLKmode object that is no
1276                      wider than MAXMODE, use its mode. Otherwise, use the
1277                      smallest mode containing the field.  */
1278
1279                   if (GET_MODE (xop0) == BLKmode
1280                       || (GET_MODE_SIZE (GET_MODE (op0))
1281                           > GET_MODE_SIZE (maxmode)))
1282                     bestmode = get_best_mode (bitsize, bitnum,
1283                                               MEM_ALIGN (xop0), maxmode,
1284                                               MEM_VOLATILE_P (xop0));
1285                   else
1286                     bestmode = GET_MODE (xop0);
1287
1288                   if (bestmode == VOIDmode
1289                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1290                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1291                     goto extzv_loses;
1292
1293                   /* Compute offset as multiple of this unit,
1294                      counting in bytes.  */
1295                   unit = GET_MODE_BITSIZE (bestmode);
1296                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1297                   xbitpos = bitnum % unit;
1298                   xop0 = adjust_address (xop0, bestmode, xoffset);
1299
1300                   /* Fetch it to a register in that size.  */
1301                   xop0 = force_reg (bestmode, xop0);
1302
1303                   /* XBITPOS counts within UNIT, which is what is expected.  */
1304                 }
1305               else
1306                 /* Get ref to first byte containing part of the field.  */
1307                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1308
1309               volatile_ok = save_volatile_ok;
1310             }
1311
1312           /* If op0 is a register, we need it in MAXMODE (which is usually
1313              SImode). to make it acceptable to the format of extzv.  */
1314           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1315             goto extzv_loses;
1316           if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
1317             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1318
1319           /* On big-endian machines, we count bits from the most significant.
1320              If the bit field insn does not, we must invert.  */
1321           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1322             xbitpos = unit - bitsize - xbitpos;
1323
1324           /* Now convert from counting within UNIT to counting in MAXMODE.  */
1325           if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
1326             xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
1327
1328           unit = GET_MODE_BITSIZE (maxmode);
1329
1330           if (xtarget == 0
1331               || (flag_force_mem && GET_CODE (xtarget) == MEM))
1332             xtarget = xspec_target = gen_reg_rtx (tmode);
1333
1334           if (GET_MODE (xtarget) != maxmode)
1335             {
1336               if (GET_CODE (xtarget) == REG)
1337                 {
1338                   int wider = (GET_MODE_SIZE (maxmode)
1339                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1340                   xtarget = gen_lowpart (maxmode, xtarget);
1341                   if (wider)
1342                     xspec_target_subreg = xtarget;
1343                 }
1344               else
1345                 xtarget = gen_reg_rtx (maxmode);
1346             }
1347
1348           /* If this machine's extzv insists on a register target,
1349              make sure we have one.  */
1350           if (! ((*insn_data[(int) CODE_FOR_extzv].operand[0].predicate)
1351                  (xtarget, maxmode)))
1352             xtarget = gen_reg_rtx (maxmode);
1353
1354           bitsize_rtx = GEN_INT (bitsize);
1355           bitpos_rtx = GEN_INT (xbitpos);
1356
1357           pat = gen_extzv (protect_from_queue (xtarget, 1),
1358                            xop0, bitsize_rtx, bitpos_rtx);
1359           if (pat)
1360             {
1361               emit_insn (pat);
1362               target = xtarget;
1363               spec_target = xspec_target;
1364               spec_target_subreg = xspec_target_subreg;
1365             }
1366           else
1367             {
1368               delete_insns_since (last);
1369               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1370                                                 bitpos, target, 1);
1371             }
1372         }
1373       else
1374       extzv_loses:
1375         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1376                                           bitpos, target, 1);
1377     }
1378   else
1379     {
1380       if (HAVE_extv
1381           && (GET_MODE_BITSIZE (extv_mode) >= bitsize)
1382           && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1383                 && (bitsize + bitpos > GET_MODE_BITSIZE (extv_mode))))
1384         {
1385           int xbitpos = bitpos, xoffset = offset;
1386           rtx bitsize_rtx, bitpos_rtx;
1387           rtx last = get_last_insn ();
1388           rtx xop0 = op0, xtarget = target;
1389           rtx xspec_target = spec_target;
1390           rtx xspec_target_subreg = spec_target_subreg;
1391           rtx pat;
1392           enum machine_mode maxmode = mode_for_extraction (EP_extv, 0);
1393
1394           if (GET_CODE (xop0) == MEM)
1395             {
1396               /* Is the memory operand acceptable?  */
1397               if (! ((*insn_data[(int) CODE_FOR_extv].operand[1].predicate)
1398                      (xop0, GET_MODE (xop0))))
1399                 {
1400                   /* No, load into a reg and extract from there.  */
1401                   enum machine_mode bestmode;
1402
1403                   /* Get the mode to use for inserting into this field.  If
1404                      OP0 is BLKmode, get the smallest mode consistent with the
1405                      alignment. If OP0 is a non-BLKmode object that is no
1406                      wider than MAXMODE, use its mode. Otherwise, use the
1407                      smallest mode containing the field.  */
1408
1409                   if (GET_MODE (xop0) == BLKmode
1410                       || (GET_MODE_SIZE (GET_MODE (op0))
1411                           > GET_MODE_SIZE (maxmode)))
1412                     bestmode = get_best_mode (bitsize, bitnum,
1413                                               MEM_ALIGN (xop0), maxmode,
1414                                               MEM_VOLATILE_P (xop0));
1415                   else
1416                     bestmode = GET_MODE (xop0);
1417
1418                   if (bestmode == VOIDmode
1419                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1420                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1421                     goto extv_loses;
1422
1423                   /* Compute offset as multiple of this unit,
1424                      counting in bytes.  */
1425                   unit = GET_MODE_BITSIZE (bestmode);
1426                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1427                   xbitpos = bitnum % unit;
1428                   xop0 = adjust_address (xop0, bestmode, xoffset);
1429
1430                   /* Fetch it to a register in that size.  */
1431                   xop0 = force_reg (bestmode, xop0);
1432
1433                   /* XBITPOS counts within UNIT, which is what is expected.  */
1434                 }
1435               else
1436                 /* Get ref to first byte containing part of the field.  */
1437                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1438             }
1439
1440           /* If op0 is a register, we need it in MAXMODE (which is usually
1441              SImode) to make it acceptable to the format of extv.  */
1442           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1443             goto extv_loses;
1444           if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
1445             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1446
1447           /* On big-endian machines, we count bits from the most significant.
1448              If the bit field insn does not, we must invert.  */
1449           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1450             xbitpos = unit - bitsize - xbitpos;
1451
1452           /* XBITPOS counts within a size of UNIT.
1453              Adjust to count within a size of MAXMODE.  */
1454           if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
1455             xbitpos += (GET_MODE_BITSIZE (maxmode) - unit);
1456
1457           unit = GET_MODE_BITSIZE (maxmode);
1458
1459           if (xtarget == 0
1460               || (flag_force_mem && GET_CODE (xtarget) == MEM))
1461             xtarget = xspec_target = gen_reg_rtx (tmode);
1462
1463           if (GET_MODE (xtarget) != maxmode)
1464             {
1465               if (GET_CODE (xtarget) == REG)
1466                 {
1467                   int wider = (GET_MODE_SIZE (maxmode)
1468                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1469                   xtarget = gen_lowpart (maxmode, xtarget);
1470                   if (wider)
1471                     xspec_target_subreg = xtarget;
1472                 }
1473               else
1474                 xtarget = gen_reg_rtx (maxmode);
1475             }
1476
1477           /* If this machine's extv insists on a register target,
1478              make sure we have one.  */
1479           if (! ((*insn_data[(int) CODE_FOR_extv].operand[0].predicate)
1480                  (xtarget, maxmode)))
1481             xtarget = gen_reg_rtx (maxmode);
1482
1483           bitsize_rtx = GEN_INT (bitsize);
1484           bitpos_rtx = GEN_INT (xbitpos);
1485
1486           pat = gen_extv (protect_from_queue (xtarget, 1),
1487                           xop0, bitsize_rtx, bitpos_rtx);
1488           if (pat)
1489             {
1490               emit_insn (pat);
1491               target = xtarget;
1492               spec_target = xspec_target;
1493               spec_target_subreg = xspec_target_subreg;
1494             }
1495           else
1496             {
1497               delete_insns_since (last);
1498               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1499                                                 bitpos, target, 0);
1500             }
1501         }
1502       else
1503       extv_loses:
1504         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1505                                           bitpos, target, 0);
1506     }
1507   if (target == spec_target)
1508     return target;
1509   if (target == spec_target_subreg)
1510     return spec_target;
1511   if (GET_MODE (target) != tmode && GET_MODE (target) != mode)
1512     {
1513       /* If the target mode is floating-point, first convert to the
1514          integer mode of that size and then access it as a floating-point
1515          value via a SUBREG.  */
1516       if (GET_MODE_CLASS (tmode) != MODE_INT
1517           && GET_MODE_CLASS (tmode) != MODE_PARTIAL_INT)
1518         {
1519           target = convert_to_mode (mode_for_size (GET_MODE_BITSIZE (tmode),
1520                                                    MODE_INT, 0),
1521                                     target, unsignedp);
1522           return gen_lowpart (tmode, target);
1523         }
1524       else
1525         return convert_to_mode (tmode, target, unsignedp);
1526     }
1527   return target;
1528 }
1529 \f
1530 /* Extract a bit field using shifts and boolean operations
1531    Returns an rtx to represent the value.
1532    OP0 addresses a register (word) or memory (byte).
1533    BITPOS says which bit within the word or byte the bit field starts in.
1534    OFFSET says how many bytes farther the bit field starts;
1535     it is 0 if OP0 is a register.
1536    BITSIZE says how many bits long the bit field is.
1537     (If OP0 is a register, it may be narrower than a full word,
1538      but BITPOS still counts within a full word,
1539      which is significant on bigendian machines.)
1540
1541    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1542    If TARGET is nonzero, attempts to store the value there
1543    and return TARGET, but this is not guaranteed.
1544    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1545
1546 static rtx
1547 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1548                          unsigned HOST_WIDE_INT offset,
1549                          unsigned HOST_WIDE_INT bitsize,
1550                          unsigned HOST_WIDE_INT bitpos, rtx target,
1551                          int unsignedp)
1552 {
1553   unsigned int total_bits = BITS_PER_WORD;
1554   enum machine_mode mode;
1555
1556   if (GET_CODE (op0) == SUBREG || GET_CODE (op0) == REG)
1557     {
1558       /* Special treatment for a bit field split across two registers.  */
1559       if (bitsize + bitpos > BITS_PER_WORD)
1560         return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1561     }
1562   else
1563     {
1564       /* Get the proper mode to use for this field.  We want a mode that
1565          includes the entire field.  If such a mode would be larger than
1566          a word, we won't be doing the extraction the normal way.  */
1567
1568       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1569                             MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1570
1571       if (mode == VOIDmode)
1572         /* The only way this should occur is if the field spans word
1573            boundaries.  */
1574         return extract_split_bit_field (op0, bitsize,
1575                                         bitpos + offset * BITS_PER_UNIT,
1576                                         unsignedp);
1577
1578       total_bits = GET_MODE_BITSIZE (mode);
1579
1580       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1581          be in the range 0 to total_bits-1, and put any excess bytes in
1582          OFFSET.  */
1583       if (bitpos >= total_bits)
1584         {
1585           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1586           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1587                      * BITS_PER_UNIT);
1588         }
1589
1590       /* Get ref to an aligned byte, halfword, or word containing the field.
1591          Adjust BITPOS to be position within a word,
1592          and OFFSET to be the offset of that word.
1593          Then alter OP0 to refer to that word.  */
1594       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1595       offset -= (offset % (total_bits / BITS_PER_UNIT));
1596       op0 = adjust_address (op0, mode, offset);
1597     }
1598
1599   mode = GET_MODE (op0);
1600
1601   if (BYTES_BIG_ENDIAN)
1602     /* BITPOS is the distance between our msb and that of OP0.
1603        Convert it to the distance from the lsb.  */
1604     bitpos = total_bits - bitsize - bitpos;
1605
1606   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1607      We have reduced the big-endian case to the little-endian case.  */
1608
1609   if (unsignedp)
1610     {
1611       if (bitpos)
1612         {
1613           /* If the field does not already start at the lsb,
1614              shift it so it does.  */
1615           tree amount = build_int_2 (bitpos, 0);
1616           /* Maybe propagate the target for the shift.  */
1617           /* But not if we will return it--could confuse integrate.c.  */
1618           rtx subtarget = (target != 0 && GET_CODE (target) == REG
1619                            && !REG_FUNCTION_VALUE_P (target)
1620                            ? target : 0);
1621           if (tmode != mode) subtarget = 0;
1622           op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1623         }
1624       /* Convert the value to the desired mode.  */
1625       if (mode != tmode)
1626         op0 = convert_to_mode (tmode, op0, 1);
1627
1628       /* Unless the msb of the field used to be the msb when we shifted,
1629          mask out the upper bits.  */
1630
1631       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1632         return expand_binop (GET_MODE (op0), and_optab, op0,
1633                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1634                              target, 1, OPTAB_LIB_WIDEN);
1635       return op0;
1636     }
1637
1638   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1639      then arithmetic-shift its lsb to the lsb of the word.  */
1640   op0 = force_reg (mode, op0);
1641   if (mode != tmode)
1642     target = 0;
1643
1644   /* Find the narrowest integer mode that contains the field.  */
1645
1646   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1647        mode = GET_MODE_WIDER_MODE (mode))
1648     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1649       {
1650         op0 = convert_to_mode (mode, op0, 0);
1651         break;
1652       }
1653
1654   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1655     {
1656       tree amount
1657         = build_int_2 (GET_MODE_BITSIZE (mode) - (bitsize + bitpos), 0);
1658       /* Maybe propagate the target for the shift.  */
1659       /* But not if we will return the result--could confuse integrate.c.  */
1660       rtx subtarget = (target != 0 && GET_CODE (target) == REG
1661                        && ! REG_FUNCTION_VALUE_P (target)
1662                        ? target : 0);
1663       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1664     }
1665
1666   return expand_shift (RSHIFT_EXPR, mode, op0,
1667                        build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1668                        target, 0);
1669 }
1670 \f
1671 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1672    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1673    complement of that if COMPLEMENT.  The mask is truncated if
1674    necessary to the width of mode MODE.  The mask is zero-extended if
1675    BITSIZE+BITPOS is too small for MODE.  */
1676
1677 static rtx
1678 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1679 {
1680   HOST_WIDE_INT masklow, maskhigh;
1681
1682   if (bitsize == 0)
1683     masklow = 0;
1684   else if (bitpos < HOST_BITS_PER_WIDE_INT)
1685     masklow = (HOST_WIDE_INT) -1 << bitpos;
1686   else
1687     masklow = 0;
1688
1689   if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1690     masklow &= ((unsigned HOST_WIDE_INT) -1
1691                 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1692
1693   if (bitpos <= HOST_BITS_PER_WIDE_INT)
1694     maskhigh = -1;
1695   else
1696     maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1697
1698   if (bitsize == 0)
1699     maskhigh = 0;
1700   else if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1701     maskhigh &= ((unsigned HOST_WIDE_INT) -1
1702                  >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1703   else
1704     maskhigh = 0;
1705
1706   if (complement)
1707     {
1708       maskhigh = ~maskhigh;
1709       masklow = ~masklow;
1710     }
1711
1712   return immed_double_const (masklow, maskhigh, mode);
1713 }
1714
1715 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1716    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1717
1718 static rtx
1719 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1720 {
1721   unsigned HOST_WIDE_INT v = INTVAL (value);
1722   HOST_WIDE_INT low, high;
1723
1724   if (bitsize < HOST_BITS_PER_WIDE_INT)
1725     v &= ~((HOST_WIDE_INT) -1 << bitsize);
1726
1727   if (bitpos < HOST_BITS_PER_WIDE_INT)
1728     {
1729       low = v << bitpos;
1730       high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1731     }
1732   else
1733     {
1734       low = 0;
1735       high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1736     }
1737
1738   return immed_double_const (low, high, mode);
1739 }
1740 \f
1741 /* Extract a bit field that is split across two words
1742    and return an RTX for the result.
1743
1744    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1745    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1746    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1747
1748 static rtx
1749 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1750                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1751 {
1752   unsigned int unit;
1753   unsigned int bitsdone = 0;
1754   rtx result = NULL_RTX;
1755   int first = 1;
1756
1757   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1758      much at a time.  */
1759   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1760     unit = BITS_PER_WORD;
1761   else
1762     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1763
1764   while (bitsdone < bitsize)
1765     {
1766       unsigned HOST_WIDE_INT thissize;
1767       rtx part, word;
1768       unsigned HOST_WIDE_INT thispos;
1769       unsigned HOST_WIDE_INT offset;
1770
1771       offset = (bitpos + bitsdone) / unit;
1772       thispos = (bitpos + bitsdone) % unit;
1773
1774       /* THISSIZE must not overrun a word boundary.  Otherwise,
1775          extract_fixed_bit_field will call us again, and we will mutually
1776          recurse forever.  */
1777       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1778       thissize = MIN (thissize, unit - thispos);
1779
1780       /* If OP0 is a register, then handle OFFSET here.
1781
1782          When handling multiword bitfields, extract_bit_field may pass
1783          down a word_mode SUBREG of a larger REG for a bitfield that actually
1784          crosses a word boundary.  Thus, for a SUBREG, we must find
1785          the current word starting from the base register.  */
1786       if (GET_CODE (op0) == SUBREG)
1787         {
1788           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1789           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1790                                         GET_MODE (SUBREG_REG (op0)));
1791           offset = 0;
1792         }
1793       else if (GET_CODE (op0) == REG)
1794         {
1795           word = operand_subword_force (op0, offset, GET_MODE (op0));
1796           offset = 0;
1797         }
1798       else
1799         word = op0;
1800
1801       /* Extract the parts in bit-counting order,
1802          whose meaning is determined by BYTES_PER_UNIT.
1803          OFFSET is in UNITs, and UNIT is in bits.
1804          extract_fixed_bit_field wants offset in bytes.  */
1805       part = extract_fixed_bit_field (word_mode, word,
1806                                       offset * unit / BITS_PER_UNIT,
1807                                       thissize, thispos, 0, 1);
1808       bitsdone += thissize;
1809
1810       /* Shift this part into place for the result.  */
1811       if (BYTES_BIG_ENDIAN)
1812         {
1813           if (bitsize != bitsdone)
1814             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1815                                  build_int_2 (bitsize - bitsdone, 0), 0, 1);
1816         }
1817       else
1818         {
1819           if (bitsdone != thissize)
1820             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1821                                  build_int_2 (bitsdone - thissize, 0), 0, 1);
1822         }
1823
1824       if (first)
1825         result = part;
1826       else
1827         /* Combine the parts with bitwise or.  This works
1828            because we extracted each part as an unsigned bit field.  */
1829         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1830                                OPTAB_LIB_WIDEN);
1831
1832       first = 0;
1833     }
1834
1835   /* Unsigned bit field: we are done.  */
1836   if (unsignedp)
1837     return result;
1838   /* Signed bit field: sign-extend with two arithmetic shifts.  */
1839   result = expand_shift (LSHIFT_EXPR, word_mode, result,
1840                          build_int_2 (BITS_PER_WORD - bitsize, 0),
1841                          NULL_RTX, 0);
1842   return expand_shift (RSHIFT_EXPR, word_mode, result,
1843                        build_int_2 (BITS_PER_WORD - bitsize, 0), NULL_RTX, 0);
1844 }
1845 \f
1846 /* Add INC into TARGET.  */
1847
1848 void
1849 expand_inc (rtx target, rtx inc)
1850 {
1851   rtx value = expand_binop (GET_MODE (target), add_optab,
1852                             target, inc,
1853                             target, 0, OPTAB_LIB_WIDEN);
1854   if (value != target)
1855     emit_move_insn (target, value);
1856 }
1857
1858 /* Subtract DEC from TARGET.  */
1859
1860 void
1861 expand_dec (rtx target, rtx dec)
1862 {
1863   rtx value = expand_binop (GET_MODE (target), sub_optab,
1864                             target, dec,
1865                             target, 0, OPTAB_LIB_WIDEN);
1866   if (value != target)
1867     emit_move_insn (target, value);
1868 }
1869 \f
1870 /* Output a shift instruction for expression code CODE,
1871    with SHIFTED being the rtx for the value to shift,
1872    and AMOUNT the tree for the amount to shift by.
1873    Store the result in the rtx TARGET, if that is convenient.
1874    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
1875    Return the rtx for where the value is.  */
1876
1877 rtx
1878 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
1879               tree amount, rtx target, int unsignedp)
1880 {
1881   rtx op1, temp = 0;
1882   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
1883   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
1884   int try;
1885
1886   /* Previously detected shift-counts computed by NEGATE_EXPR
1887      and shifted in the other direction; but that does not work
1888      on all machines.  */
1889
1890   op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0);
1891
1892 #ifdef SHIFT_COUNT_TRUNCATED
1893   if (SHIFT_COUNT_TRUNCATED)
1894     {
1895       if (GET_CODE (op1) == CONST_INT
1896           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
1897               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
1898         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
1899                        % GET_MODE_BITSIZE (mode));
1900       else if (GET_CODE (op1) == SUBREG
1901                && subreg_lowpart_p (op1))
1902         op1 = SUBREG_REG (op1);
1903     }
1904 #endif
1905
1906   if (op1 == const0_rtx)
1907     return shifted;
1908
1909   for (try = 0; temp == 0 && try < 3; try++)
1910     {
1911       enum optab_methods methods;
1912
1913       if (try == 0)
1914         methods = OPTAB_DIRECT;
1915       else if (try == 1)
1916         methods = OPTAB_WIDEN;
1917       else
1918         methods = OPTAB_LIB_WIDEN;
1919
1920       if (rotate)
1921         {
1922           /* Widening does not work for rotation.  */
1923           if (methods == OPTAB_WIDEN)
1924             continue;
1925           else if (methods == OPTAB_LIB_WIDEN)
1926             {
1927               /* If we have been unable to open-code this by a rotation,
1928                  do it as the IOR of two shifts.  I.e., to rotate A
1929                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
1930                  where C is the bitsize of A.
1931
1932                  It is theoretically possible that the target machine might
1933                  not be able to perform either shift and hence we would
1934                  be making two libcalls rather than just the one for the
1935                  shift (similarly if IOR could not be done).  We will allow
1936                  this extremely unlikely lossage to avoid complicating the
1937                  code below.  */
1938
1939               rtx subtarget = target == shifted ? 0 : target;
1940               rtx temp1;
1941               tree type = TREE_TYPE (amount);
1942               tree new_amount = make_tree (type, op1);
1943               tree other_amount
1944                 = fold (build (MINUS_EXPR, type,
1945                                convert (type,
1946                                         build_int_2 (GET_MODE_BITSIZE (mode),
1947                                                      0)),
1948                                amount));
1949
1950               shifted = force_reg (mode, shifted);
1951
1952               temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
1953                                    mode, shifted, new_amount, subtarget, 1);
1954               temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
1955                                     mode, shifted, other_amount, 0, 1);
1956               return expand_binop (mode, ior_optab, temp, temp1, target,
1957                                    unsignedp, methods);
1958             }
1959
1960           temp = expand_binop (mode,
1961                                left ? rotl_optab : rotr_optab,
1962                                shifted, op1, target, unsignedp, methods);
1963
1964           /* If we don't have the rotate, but we are rotating by a constant
1965              that is in range, try a rotate in the opposite direction.  */
1966
1967           if (temp == 0 && GET_CODE (op1) == CONST_INT
1968               && INTVAL (op1) > 0
1969               && (unsigned int) INTVAL (op1) < GET_MODE_BITSIZE (mode))
1970             temp = expand_binop (mode,
1971                                  left ? rotr_optab : rotl_optab,
1972                                  shifted,
1973                                  GEN_INT (GET_MODE_BITSIZE (mode)
1974                                           - INTVAL (op1)),
1975                                  target, unsignedp, methods);
1976         }
1977       else if (unsignedp)
1978         temp = expand_binop (mode,
1979                              left ? ashl_optab : lshr_optab,
1980                              shifted, op1, target, unsignedp, methods);
1981
1982       /* Do arithmetic shifts.
1983          Also, if we are going to widen the operand, we can just as well
1984          use an arithmetic right-shift instead of a logical one.  */
1985       if (temp == 0 && ! rotate
1986           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
1987         {
1988           enum optab_methods methods1 = methods;
1989
1990           /* If trying to widen a log shift to an arithmetic shift,
1991              don't accept an arithmetic shift of the same size.  */
1992           if (unsignedp)
1993             methods1 = OPTAB_MUST_WIDEN;
1994
1995           /* Arithmetic shift */
1996
1997           temp = expand_binop (mode,
1998                                left ? ashl_optab : ashr_optab,
1999                                shifted, op1, target, unsignedp, methods1);
2000         }
2001
2002       /* We used to try extzv here for logical right shifts, but that was
2003          only useful for one machine, the VAX, and caused poor code
2004          generation there for lshrdi3, so the code was deleted and a
2005          define_expand for lshrsi3 was added to vax.md.  */
2006     }
2007
2008   if (temp == 0)
2009     abort ();
2010   return temp;
2011 }
2012 \f
2013 enum alg_code { alg_zero, alg_m, alg_shift,
2014                   alg_add_t_m2, alg_sub_t_m2,
2015                   alg_add_factor, alg_sub_factor,
2016                   alg_add_t2_m, alg_sub_t2_m,
2017                   alg_add, alg_subtract, alg_factor, alg_shiftop };
2018
2019 /* This structure records a sequence of operations.
2020    `ops' is the number of operations recorded.
2021    `cost' is their total cost.
2022    The operations are stored in `op' and the corresponding
2023    logarithms of the integer coefficients in `log'.
2024
2025    These are the operations:
2026    alg_zero             total := 0;
2027    alg_m                total := multiplicand;
2028    alg_shift            total := total * coeff
2029    alg_add_t_m2         total := total + multiplicand * coeff;
2030    alg_sub_t_m2         total := total - multiplicand * coeff;
2031    alg_add_factor       total := total * coeff + total;
2032    alg_sub_factor       total := total * coeff - total;
2033    alg_add_t2_m         total := total * coeff + multiplicand;
2034    alg_sub_t2_m         total := total * coeff - multiplicand;
2035
2036    The first operand must be either alg_zero or alg_m.  */
2037
2038 struct algorithm
2039 {
2040   short cost;
2041   short ops;
2042   /* The size of the OP and LOG fields are not directly related to the
2043      word size, but the worst-case algorithms will be if we have few
2044      consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2045      In that case we will generate shift-by-2, add, shift-by-2, add,...,
2046      in total wordsize operations.  */
2047   enum alg_code op[MAX_BITS_PER_WORD];
2048   char log[MAX_BITS_PER_WORD];
2049 };
2050
2051 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT, int);
2052 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2053                                                  int, unsigned HOST_WIDE_INT *,
2054                                                  int *, int *);
2055 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2056 /* Compute and return the best algorithm for multiplying by T.
2057    The algorithm must cost less than cost_limit
2058    If retval.cost >= COST_LIMIT, no algorithm was found and all
2059    other field of the returned struct are undefined.  */
2060
2061 static void
2062 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2063             int cost_limit)
2064 {
2065   int m;
2066   struct algorithm *alg_in, *best_alg;
2067   int cost;
2068   unsigned HOST_WIDE_INT q;
2069
2070   /* Indicate that no algorithm is yet found.  If no algorithm
2071      is found, this value will be returned and indicate failure.  */
2072   alg_out->cost = cost_limit;
2073
2074   if (cost_limit <= 0)
2075     return;
2076
2077   /* t == 1 can be done in zero cost.  */
2078   if (t == 1)
2079     {
2080       alg_out->ops = 1;
2081       alg_out->cost = 0;
2082       alg_out->op[0] = alg_m;
2083       return;
2084     }
2085
2086   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2087      fail now.  */
2088   if (t == 0)
2089     {
2090       if (zero_cost >= cost_limit)
2091         return;
2092       else
2093         {
2094           alg_out->ops = 1;
2095           alg_out->cost = zero_cost;
2096           alg_out->op[0] = alg_zero;
2097           return;
2098         }
2099     }
2100
2101   /* We'll be needing a couple extra algorithm structures now.  */
2102
2103   alg_in = alloca (sizeof (struct algorithm));
2104   best_alg = alloca (sizeof (struct algorithm));
2105
2106   /* If we have a group of zero bits at the low-order part of T, try
2107      multiplying by the remaining bits and then doing a shift.  */
2108
2109   if ((t & 1) == 0)
2110     {
2111       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2112       if (m < BITS_PER_WORD)
2113         {
2114           q = t >> m;
2115           cost = shift_cost[m];
2116           synth_mult (alg_in, q, cost_limit - cost);
2117
2118           cost += alg_in->cost;
2119           if (cost < cost_limit)
2120             {
2121               struct algorithm *x;
2122               x = alg_in, alg_in = best_alg, best_alg = x;
2123               best_alg->log[best_alg->ops] = m;
2124               best_alg->op[best_alg->ops] = alg_shift;
2125               cost_limit = cost;
2126             }
2127         }
2128     }
2129
2130   /* If we have an odd number, add or subtract one.  */
2131   if ((t & 1) != 0)
2132     {
2133       unsigned HOST_WIDE_INT w;
2134
2135       for (w = 1; (w & t) != 0; w <<= 1)
2136         ;
2137       /* If T was -1, then W will be zero after the loop.  This is another
2138          case where T ends with ...111.  Handling this with (T + 1) and
2139          subtract 1 produces slightly better code and results in algorithm
2140          selection much faster than treating it like the ...0111 case
2141          below.  */
2142       if (w == 0
2143           || (w > 2
2144               /* Reject the case where t is 3.
2145                  Thus we prefer addition in that case.  */
2146               && t != 3))
2147         {
2148           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2149
2150           cost = add_cost;
2151           synth_mult (alg_in, t + 1, cost_limit - cost);
2152
2153           cost += alg_in->cost;
2154           if (cost < cost_limit)
2155             {
2156               struct algorithm *x;
2157               x = alg_in, alg_in = best_alg, best_alg = x;
2158               best_alg->log[best_alg->ops] = 0;
2159               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2160               cost_limit = cost;
2161             }
2162         }
2163       else
2164         {
2165           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2166
2167           cost = add_cost;
2168           synth_mult (alg_in, t - 1, cost_limit - cost);
2169
2170           cost += alg_in->cost;
2171           if (cost < cost_limit)
2172             {
2173               struct algorithm *x;
2174               x = alg_in, alg_in = best_alg, best_alg = x;
2175               best_alg->log[best_alg->ops] = 0;
2176               best_alg->op[best_alg->ops] = alg_add_t_m2;
2177               cost_limit = cost;
2178             }
2179         }
2180     }
2181
2182   /* Look for factors of t of the form
2183      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2184      If we find such a factor, we can multiply by t using an algorithm that
2185      multiplies by q, shift the result by m and add/subtract it to itself.
2186
2187      We search for large factors first and loop down, even if large factors
2188      are less probable than small; if we find a large factor we will find a
2189      good sequence quickly, and therefore be able to prune (by decreasing
2190      COST_LIMIT) the search.  */
2191
2192   for (m = floor_log2 (t - 1); m >= 2; m--)
2193     {
2194       unsigned HOST_WIDE_INT d;
2195
2196       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2197       if (t % d == 0 && t > d && m < BITS_PER_WORD)
2198         {
2199           cost = MIN (shiftadd_cost[m], add_cost + shift_cost[m]);
2200           synth_mult (alg_in, t / d, cost_limit - cost);
2201
2202           cost += alg_in->cost;
2203           if (cost < cost_limit)
2204             {
2205               struct algorithm *x;
2206               x = alg_in, alg_in = best_alg, best_alg = x;
2207               best_alg->log[best_alg->ops] = m;
2208               best_alg->op[best_alg->ops] = alg_add_factor;
2209               cost_limit = cost;
2210             }
2211           /* Other factors will have been taken care of in the recursion.  */
2212           break;
2213         }
2214
2215       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2216       if (t % d == 0 && t > d && m < BITS_PER_WORD)
2217         {
2218           cost = MIN (shiftsub_cost[m], add_cost + shift_cost[m]);
2219           synth_mult (alg_in, t / d, cost_limit - cost);
2220
2221           cost += alg_in->cost;
2222           if (cost < cost_limit)
2223             {
2224               struct algorithm *x;
2225               x = alg_in, alg_in = best_alg, best_alg = x;
2226               best_alg->log[best_alg->ops] = m;
2227               best_alg->op[best_alg->ops] = alg_sub_factor;
2228               cost_limit = cost;
2229             }
2230           break;
2231         }
2232     }
2233
2234   /* Try shift-and-add (load effective address) instructions,
2235      i.e. do a*3, a*5, a*9.  */
2236   if ((t & 1) != 0)
2237     {
2238       q = t - 1;
2239       q = q & -q;
2240       m = exact_log2 (q);
2241       if (m >= 0 && m < BITS_PER_WORD)
2242         {
2243           cost = shiftadd_cost[m];
2244           synth_mult (alg_in, (t - 1) >> m, cost_limit - cost);
2245
2246           cost += alg_in->cost;
2247           if (cost < cost_limit)
2248             {
2249               struct algorithm *x;
2250               x = alg_in, alg_in = best_alg, best_alg = x;
2251               best_alg->log[best_alg->ops] = m;
2252               best_alg->op[best_alg->ops] = alg_add_t2_m;
2253               cost_limit = cost;
2254             }
2255         }
2256
2257       q = t + 1;
2258       q = q & -q;
2259       m = exact_log2 (q);
2260       if (m >= 0 && m < BITS_PER_WORD)
2261         {
2262           cost = shiftsub_cost[m];
2263           synth_mult (alg_in, (t + 1) >> m, cost_limit - cost);
2264
2265           cost += alg_in->cost;
2266           if (cost < cost_limit)
2267             {
2268               struct algorithm *x;
2269               x = alg_in, alg_in = best_alg, best_alg = x;
2270               best_alg->log[best_alg->ops] = m;
2271               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2272               cost_limit = cost;
2273             }
2274         }
2275     }
2276
2277   /* If cost_limit has not decreased since we stored it in alg_out->cost,
2278      we have not found any algorithm.  */
2279   if (cost_limit == alg_out->cost)
2280     return;
2281
2282   /* If we are getting a too long sequence for `struct algorithm'
2283      to record, make this search fail.  */
2284   if (best_alg->ops == MAX_BITS_PER_WORD)
2285     return;
2286
2287   /* Copy the algorithm from temporary space to the space at alg_out.
2288      We avoid using structure assignment because the majority of
2289      best_alg is normally undefined, and this is a critical function.  */
2290   alg_out->ops = best_alg->ops + 1;
2291   alg_out->cost = cost_limit;
2292   memcpy (alg_out->op, best_alg->op,
2293           alg_out->ops * sizeof *alg_out->op);
2294   memcpy (alg_out->log, best_alg->log,
2295           alg_out->ops * sizeof *alg_out->log);
2296 }
2297 \f
2298 /* Perform a multiplication and return an rtx for the result.
2299    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
2300    TARGET is a suggestion for where to store the result (an rtx).
2301
2302    We check specially for a constant integer as OP1.
2303    If you want this check for OP0 as well, then before calling
2304    you should swap the two operands if OP0 would be constant.  */
2305
2306 rtx
2307 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
2308              int unsignedp)
2309 {
2310   rtx const_op1 = op1;
2311
2312   /* synth_mult does an `unsigned int' multiply.  As long as the mode is
2313      less than or equal in size to `unsigned int' this doesn't matter.
2314      If the mode is larger than `unsigned int', then synth_mult works only
2315      if the constant value exactly fits in an `unsigned int' without any
2316      truncation.  This means that multiplying by negative values does
2317      not work; results are off by 2^32 on a 32 bit machine.  */
2318
2319   /* If we are multiplying in DImode, it may still be a win
2320      to try to work with shifts and adds.  */
2321   if (GET_CODE (op1) == CONST_DOUBLE
2322       && GET_MODE_CLASS (GET_MODE (op1)) == MODE_INT
2323       && HOST_BITS_PER_INT >= BITS_PER_WORD
2324       && CONST_DOUBLE_HIGH (op1) == 0)
2325     const_op1 = GEN_INT (CONST_DOUBLE_LOW (op1));
2326   else if (HOST_BITS_PER_INT < GET_MODE_BITSIZE (mode)
2327            && GET_CODE (op1) == CONST_INT
2328            && INTVAL (op1) < 0)
2329     const_op1 = 0;
2330
2331   /* We used to test optimize here, on the grounds that it's better to
2332      produce a smaller program when -O is not used.
2333      But this causes such a terrible slowdown sometimes
2334      that it seems better to use synth_mult always.  */
2335
2336   if (const_op1 && GET_CODE (const_op1) == CONST_INT
2337       && (unsignedp || ! flag_trapv))
2338     {
2339       struct algorithm alg;
2340       struct algorithm alg2;
2341       HOST_WIDE_INT val = INTVAL (op1);
2342       HOST_WIDE_INT val_so_far;
2343       rtx insn;
2344       int mult_cost;
2345       enum {basic_variant, negate_variant, add_variant} variant = basic_variant;
2346
2347       /* op0 must be register to make mult_cost match the precomputed
2348          shiftadd_cost array.  */
2349       op0 = force_reg (mode, op0);
2350
2351       /* Try to do the computation three ways: multiply by the negative of OP1
2352          and then negate, do the multiplication directly, or do multiplication
2353          by OP1 - 1.  */
2354
2355       mult_cost = rtx_cost (gen_rtx_MULT (mode, op0, op1), SET);
2356       mult_cost = MIN (12 * add_cost, mult_cost);
2357
2358       synth_mult (&alg, val, mult_cost);
2359
2360       /* This works only if the inverted value actually fits in an
2361          `unsigned int' */
2362       if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2363         {
2364           synth_mult (&alg2, - val,
2365                       (alg.cost < mult_cost ? alg.cost : mult_cost) - negate_cost);
2366           if (alg2.cost + negate_cost < alg.cost)
2367             alg = alg2, variant = negate_variant;
2368         }
2369
2370       /* This proves very useful for division-by-constant.  */
2371       synth_mult (&alg2, val - 1,
2372                   (alg.cost < mult_cost ? alg.cost : mult_cost) - add_cost);
2373       if (alg2.cost + add_cost < alg.cost)
2374         alg = alg2, variant = add_variant;
2375
2376       if (alg.cost < mult_cost)
2377         {
2378           /* We found something cheaper than a multiply insn.  */
2379           int opno;
2380           rtx accum, tem;
2381           enum machine_mode nmode;
2382
2383           op0 = protect_from_queue (op0, 0);
2384
2385           /* Avoid referencing memory over and over.
2386              For speed, but also for correctness when mem is volatile.  */
2387           if (GET_CODE (op0) == MEM)
2388             op0 = force_reg (mode, op0);
2389
2390           /* ACCUM starts out either as OP0 or as a zero, depending on
2391              the first operation.  */
2392
2393           if (alg.op[0] == alg_zero)
2394             {
2395               accum = copy_to_mode_reg (mode, const0_rtx);
2396               val_so_far = 0;
2397             }
2398           else if (alg.op[0] == alg_m)
2399             {
2400               accum = copy_to_mode_reg (mode, op0);
2401               val_so_far = 1;
2402             }
2403           else
2404             abort ();
2405
2406           for (opno = 1; opno < alg.ops; opno++)
2407             {
2408               int log = alg.log[opno];
2409               int preserve = preserve_subexpressions_p ();
2410               rtx shift_subtarget = preserve ? 0 : accum;
2411               rtx add_target
2412                 = (opno == alg.ops - 1 && target != 0 && variant != add_variant
2413                    && ! preserve)
2414                   ? target : 0;
2415               rtx accum_target = preserve ? 0 : accum;
2416
2417               switch (alg.op[opno])
2418                 {
2419                 case alg_shift:
2420                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2421                                         build_int_2 (log, 0), NULL_RTX, 0);
2422                   val_so_far <<= log;
2423                   break;
2424
2425                 case alg_add_t_m2:
2426                   tem = expand_shift (LSHIFT_EXPR, mode, op0,
2427                                       build_int_2 (log, 0), NULL_RTX, 0);
2428                   accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2429                                          add_target
2430                                          ? add_target : accum_target);
2431                   val_so_far += (HOST_WIDE_INT) 1 << log;
2432                   break;
2433
2434                 case alg_sub_t_m2:
2435                   tem = expand_shift (LSHIFT_EXPR, mode, op0,
2436                                       build_int_2 (log, 0), NULL_RTX, 0);
2437                   accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2438                                          add_target
2439                                          ? add_target : accum_target);
2440                   val_so_far -= (HOST_WIDE_INT) 1 << log;
2441                   break;
2442
2443                 case alg_add_t2_m:
2444                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2445                                         build_int_2 (log, 0), shift_subtarget,
2446                                         0);
2447                   accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2448                                          add_target
2449                                          ? add_target : accum_target);
2450                   val_so_far = (val_so_far << log) + 1;
2451                   break;
2452
2453                 case alg_sub_t2_m:
2454                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2455                                         build_int_2 (log, 0), shift_subtarget,
2456                                         0);
2457                   accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2458                                          add_target
2459                                          ? add_target : accum_target);
2460                   val_so_far = (val_so_far << log) - 1;
2461                   break;
2462
2463                 case alg_add_factor:
2464                   tem = expand_shift (LSHIFT_EXPR, mode, accum,
2465                                       build_int_2 (log, 0), NULL_RTX, 0);
2466                   accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2467                                          add_target
2468                                          ? add_target : accum_target);
2469                   val_so_far += val_so_far << log;
2470                   break;
2471
2472                 case alg_sub_factor:
2473                   tem = expand_shift (LSHIFT_EXPR, mode, accum,
2474                                       build_int_2 (log, 0), NULL_RTX, 0);
2475                   accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2476                                          (add_target ? add_target
2477                                           : preserve ? 0 : tem));
2478                   val_so_far = (val_so_far << log) - val_so_far;
2479                   break;
2480
2481                 default:
2482                   abort ();
2483                 }
2484
2485               /* Write a REG_EQUAL note on the last insn so that we can cse
2486                  multiplication sequences.  Note that if ACCUM is a SUBREG,
2487                  we've set the inner register and must properly indicate
2488                  that.  */
2489
2490               tem = op0, nmode = mode;
2491               if (GET_CODE (accum) == SUBREG)
2492                 {
2493                   nmode = GET_MODE (SUBREG_REG (accum));
2494                   tem = gen_lowpart (nmode, op0);
2495                 }
2496
2497               insn = get_last_insn ();
2498               set_unique_reg_note (insn,
2499                                    REG_EQUAL,
2500                                    gen_rtx_MULT (nmode, tem,
2501                                                  GEN_INT (val_so_far)));
2502             }
2503
2504           if (variant == negate_variant)
2505             {
2506               val_so_far = - val_so_far;
2507               accum = expand_unop (mode, neg_optab, accum, target, 0);
2508             }
2509           else if (variant == add_variant)
2510             {
2511               val_so_far = val_so_far + 1;
2512               accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
2513             }
2514
2515           if (val != val_so_far)
2516             abort ();
2517
2518           return accum;
2519         }
2520     }
2521
2522   if (GET_CODE (op0) == CONST_DOUBLE)
2523     {
2524       rtx temp = op0;
2525       op0 = op1;
2526       op1 = temp;
2527     }
2528
2529   /* Expand x*2.0 as x+x.  */
2530   if (GET_CODE (op1) == CONST_DOUBLE
2531       && GET_MODE_CLASS (mode) == MODE_FLOAT)
2532     {
2533       REAL_VALUE_TYPE d;
2534       REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
2535
2536       if (REAL_VALUES_EQUAL (d, dconst2))
2537         {
2538           op0 = force_reg (GET_MODE (op0), op0);
2539           return expand_binop (mode, add_optab, op0, op0,
2540                                target, unsignedp, OPTAB_LIB_WIDEN);
2541         }
2542     }
2543
2544   /* This used to use umul_optab if unsigned, but for non-widening multiply
2545      there is no difference between signed and unsigned.  */
2546   op0 = expand_binop (mode,
2547                       ! unsignedp
2548                       && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
2549                       ? smulv_optab : smul_optab,
2550                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
2551   if (op0 == 0)
2552     abort ();
2553   return op0;
2554 }
2555 \f
2556 /* Return the smallest n such that 2**n >= X.  */
2557
2558 int
2559 ceil_log2 (unsigned HOST_WIDE_INT x)
2560 {
2561   return floor_log2 (x - 1) + 1;
2562 }
2563
2564 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
2565    replace division by D, and put the least significant N bits of the result
2566    in *MULTIPLIER_PTR and return the most significant bit.
2567
2568    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
2569    needed precision is in PRECISION (should be <= N).
2570
2571    PRECISION should be as small as possible so this function can choose
2572    multiplier more freely.
2573
2574    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
2575    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
2576
2577    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
2578    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
2579
2580 static
2581 unsigned HOST_WIDE_INT
2582 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
2583                    unsigned HOST_WIDE_INT *multiplier_ptr,
2584                    int *post_shift_ptr, int *lgup_ptr)
2585 {
2586   HOST_WIDE_INT mhigh_hi, mlow_hi;
2587   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
2588   int lgup, post_shift;
2589   int pow, pow2;
2590   unsigned HOST_WIDE_INT nl, dummy1;
2591   HOST_WIDE_INT nh, dummy2;
2592
2593   /* lgup = ceil(log2(divisor)); */
2594   lgup = ceil_log2 (d);
2595
2596   if (lgup > n)
2597     abort ();
2598
2599   pow = n + lgup;
2600   pow2 = n + lgup - precision;
2601
2602   if (pow == 2 * HOST_BITS_PER_WIDE_INT)
2603     {
2604       /* We could handle this with some effort, but this case is much better
2605          handled directly with a scc insn, so rely on caller using that.  */
2606       abort ();
2607     }
2608
2609   /* mlow = 2^(N + lgup)/d */
2610  if (pow >= HOST_BITS_PER_WIDE_INT)
2611     {
2612       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
2613       nl = 0;
2614     }
2615   else
2616     {
2617       nh = 0;
2618       nl = (unsigned HOST_WIDE_INT) 1 << pow;
2619     }
2620   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2621                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
2622
2623   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
2624   if (pow2 >= HOST_BITS_PER_WIDE_INT)
2625     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
2626   else
2627     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
2628   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2629                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
2630
2631   if (mhigh_hi && nh - d >= d)
2632     abort ();
2633   if (mhigh_hi > 1 || mlow_hi > 1)
2634     abort ();
2635   /* Assert that mlow < mhigh.  */
2636   if (! (mlow_hi < mhigh_hi || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo)))
2637     abort ();
2638
2639   /* If precision == N, then mlow, mhigh exceed 2^N
2640      (but they do not exceed 2^(N+1)).  */
2641
2642   /* Reduce to lowest terms.  */
2643   for (post_shift = lgup; post_shift > 0; post_shift--)
2644     {
2645       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
2646       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
2647       if (ml_lo >= mh_lo)
2648         break;
2649
2650       mlow_hi = 0;
2651       mlow_lo = ml_lo;
2652       mhigh_hi = 0;
2653       mhigh_lo = mh_lo;
2654     }
2655
2656   *post_shift_ptr = post_shift;
2657   *lgup_ptr = lgup;
2658   if (n < HOST_BITS_PER_WIDE_INT)
2659     {
2660       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
2661       *multiplier_ptr = mhigh_lo & mask;
2662       return mhigh_lo >= mask;
2663     }
2664   else
2665     {
2666       *multiplier_ptr = mhigh_lo;
2667       return mhigh_hi;
2668     }
2669 }
2670
2671 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
2672    congruent to 1 (mod 2**N).  */
2673
2674 static unsigned HOST_WIDE_INT
2675 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
2676 {
2677   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
2678
2679   /* The algorithm notes that the choice y = x satisfies
2680      x*y == 1 mod 2^3, since x is assumed odd.
2681      Each iteration doubles the number of bits of significance in y.  */
2682
2683   unsigned HOST_WIDE_INT mask;
2684   unsigned HOST_WIDE_INT y = x;
2685   int nbit = 3;
2686
2687   mask = (n == HOST_BITS_PER_WIDE_INT
2688           ? ~(unsigned HOST_WIDE_INT) 0
2689           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
2690
2691   while (nbit < n)
2692     {
2693       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
2694       nbit *= 2;
2695     }
2696   return y;
2697 }
2698
2699 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
2700    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
2701    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
2702    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
2703    become signed.
2704
2705    The result is put in TARGET if that is convenient.
2706
2707    MODE is the mode of operation.  */
2708
2709 rtx
2710 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
2711                              rtx op1, rtx target, int unsignedp)
2712 {
2713   rtx tem;
2714   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
2715
2716   tem = expand_shift (RSHIFT_EXPR, mode, op0,
2717                       build_int_2 (GET_MODE_BITSIZE (mode) - 1, 0),
2718                       NULL_RTX, 0);
2719   tem = expand_and (mode, tem, op1, NULL_RTX);
2720   adj_operand
2721     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
2722                      adj_operand);
2723
2724   tem = expand_shift (RSHIFT_EXPR, mode, op1,
2725                       build_int_2 (GET_MODE_BITSIZE (mode) - 1, 0),
2726                       NULL_RTX, 0);
2727   tem = expand_and (mode, tem, op0, NULL_RTX);
2728   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
2729                           target);
2730
2731   return target;
2732 }
2733
2734 /* Emit code to multiply OP0 and CNST1, putting the high half of the result
2735    in TARGET if that is convenient, and return where the result is.  If the
2736    operation can not be performed, 0 is returned.
2737
2738    MODE is the mode of operation and result.
2739
2740    UNSIGNEDP nonzero means unsigned multiply.
2741
2742    MAX_COST is the total allowed cost for the expanded RTL.  */
2743
2744 rtx
2745 expand_mult_highpart (enum machine_mode mode, rtx op0,
2746                       unsigned HOST_WIDE_INT cnst1, rtx target,
2747                       int unsignedp, int max_cost)
2748 {
2749   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
2750   optab mul_highpart_optab;
2751   optab moptab;
2752   rtx tem;
2753   int size = GET_MODE_BITSIZE (mode);
2754   rtx op1, wide_op1;
2755
2756   /* We can't support modes wider than HOST_BITS_PER_INT.  */
2757   if (size > HOST_BITS_PER_WIDE_INT)
2758     abort ();
2759
2760   op1 = gen_int_mode (cnst1, mode);
2761
2762   wide_op1
2763     = immed_double_const (cnst1,
2764                           (unsignedp
2765                            ? (HOST_WIDE_INT) 0
2766                            : -(cnst1 >> (HOST_BITS_PER_WIDE_INT - 1))),
2767                           wider_mode);
2768
2769   /* expand_mult handles constant multiplication of word_mode
2770      or narrower.  It does a poor job for large modes.  */
2771   if (size < BITS_PER_WORD
2772       && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
2773     {
2774       /* We have to do this, since expand_binop doesn't do conversion for
2775          multiply.  Maybe change expand_binop to handle widening multiply?  */
2776       op0 = convert_to_mode (wider_mode, op0, unsignedp);
2777
2778       /* We know that this can't have signed overflow, so pretend this is
2779          an unsigned multiply.  */
2780       tem = expand_mult (wider_mode, op0, wide_op1, NULL_RTX, 0);
2781       tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2782                           build_int_2 (size, 0), NULL_RTX, 1);
2783       return convert_modes (mode, wider_mode, tem, unsignedp);
2784     }
2785
2786   if (target == 0)
2787     target = gen_reg_rtx (mode);
2788
2789   /* Firstly, try using a multiplication insn that only generates the needed
2790      high part of the product, and in the sign flavor of unsignedp.  */
2791   if (mul_highpart_cost[(int) mode] < max_cost)
2792     {
2793       mul_highpart_optab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
2794       target = expand_binop (mode, mul_highpart_optab,
2795                              op0, op1, target, unsignedp, OPTAB_DIRECT);
2796       if (target)
2797         return target;
2798     }
2799
2800   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
2801      Need to adjust the result after the multiplication.  */
2802   if (size - 1 < BITS_PER_WORD
2803       && (mul_highpart_cost[(int) mode] + 2 * shift_cost[size-1] + 4 * add_cost
2804           < max_cost))
2805     {
2806       mul_highpart_optab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
2807       target = expand_binop (mode, mul_highpart_optab,
2808                              op0, op1, target, unsignedp, OPTAB_DIRECT);
2809       if (target)
2810         /* We used the wrong signedness.  Adjust the result.  */
2811         return expand_mult_highpart_adjust (mode, target, op0,
2812                                             op1, target, unsignedp);
2813     }
2814
2815   /* Try widening multiplication.  */
2816   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
2817   if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2818       && mul_widen_cost[(int) wider_mode] < max_cost)
2819     {
2820       op1 = force_reg (mode, op1);
2821       goto try;
2822     }
2823
2824   /* Try widening the mode and perform a non-widening multiplication.  */
2825   moptab = smul_optab;
2826   if (smul_optab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2827       && size - 1 < BITS_PER_WORD
2828       && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
2829     {
2830       op1 = wide_op1;
2831       goto try;
2832     }
2833
2834   /* Try widening multiplication of opposite signedness, and adjust.  */
2835   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
2836   if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2837       && size - 1 < BITS_PER_WORD
2838       && (mul_widen_cost[(int) wider_mode]
2839           + 2 * shift_cost[size-1] + 4 * add_cost < max_cost))
2840     {
2841       rtx regop1 = force_reg (mode, op1);
2842       tem = expand_binop (wider_mode, moptab, op0, regop1,
2843                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
2844       if (tem != 0)
2845         {
2846           /* Extract the high half of the just generated product.  */
2847           tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2848                               build_int_2 (size, 0), NULL_RTX, 1);
2849           tem = convert_modes (mode, wider_mode, tem, unsignedp);
2850           /* We used the wrong signedness.  Adjust the result.  */
2851           return expand_mult_highpart_adjust (mode, tem, op0, op1,
2852                                               target, unsignedp);
2853         }
2854     }
2855
2856   return 0;
2857
2858  try:
2859   /* Pass NULL_RTX as target since TARGET has wrong mode.  */
2860   tem = expand_binop (wider_mode, moptab, op0, op1,
2861                       NULL_RTX, unsignedp, OPTAB_WIDEN);
2862   if (tem == 0)
2863     return 0;
2864
2865   /* Extract the high half of the just generated product.  */
2866   if (mode == word_mode)
2867     {
2868       return gen_highpart (mode, tem);
2869     }
2870   else
2871     {
2872       tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2873                           build_int_2 (size, 0), NULL_RTX, 1);
2874       return convert_modes (mode, wider_mode, tem, unsignedp);
2875     }
2876 }
2877 \f
2878 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
2879    if that is convenient, and returning where the result is.
2880    You may request either the quotient or the remainder as the result;
2881    specify REM_FLAG nonzero to get the remainder.
2882
2883    CODE is the expression code for which kind of division this is;
2884    it controls how rounding is done.  MODE is the machine mode to use.
2885    UNSIGNEDP nonzero means do unsigned division.  */
2886
2887 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
2888    and then correct it by or'ing in missing high bits
2889    if result of ANDI is nonzero.
2890    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
2891    This could optimize to a bfexts instruction.
2892    But C doesn't use these operations, so their optimizations are
2893    left for later.  */
2894 /* ??? For modulo, we don't actually need the highpart of the first product,
2895    the low part will do nicely.  And for small divisors, the second multiply
2896    can also be a low-part only multiply or even be completely left out.
2897    E.g. to calculate the remainder of a division by 3 with a 32 bit
2898    multiply, multiply with 0x55555556 and extract the upper two bits;
2899    the result is exact for inputs up to 0x1fffffff.
2900    The input range can be reduced by using cross-sum rules.
2901    For odd divisors >= 3, the following table gives right shift counts
2902    so that if a number is shifted by an integer multiple of the given
2903    amount, the remainder stays the same:
2904    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
2905    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
2906    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
2907    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
2908    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
2909
2910    Cross-sum rules for even numbers can be derived by leaving as many bits
2911    to the right alone as the divisor has zeros to the right.
2912    E.g. if x is an unsigned 32 bit number:
2913    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
2914    */
2915
2916 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
2917
2918 rtx
2919 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
2920                rtx op0, rtx op1, rtx target, int unsignedp)
2921 {
2922   enum machine_mode compute_mode;
2923   rtx tquotient;
2924   rtx quotient = 0, remainder = 0;
2925   rtx last;
2926   int size;
2927   rtx insn, set;
2928   optab optab1, optab2;
2929   int op1_is_constant, op1_is_pow2 = 0;
2930   int max_cost, extra_cost;
2931   static HOST_WIDE_INT last_div_const = 0;
2932   static HOST_WIDE_INT ext_op1;
2933
2934   op1_is_constant = GET_CODE (op1) == CONST_INT;
2935   if (op1_is_constant)
2936     {
2937       ext_op1 = INTVAL (op1);
2938       if (unsignedp)
2939         ext_op1 &= GET_MODE_MASK (mode);
2940       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
2941                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
2942     }
2943
2944   /*
2945      This is the structure of expand_divmod:
2946
2947      First comes code to fix up the operands so we can perform the operations
2948      correctly and efficiently.
2949
2950      Second comes a switch statement with code specific for each rounding mode.
2951      For some special operands this code emits all RTL for the desired
2952      operation, for other cases, it generates only a quotient and stores it in
2953      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
2954      to indicate that it has not done anything.
2955
2956      Last comes code that finishes the operation.  If QUOTIENT is set and
2957      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
2958      QUOTIENT is not set, it is computed using trunc rounding.
2959
2960      We try to generate special code for division and remainder when OP1 is a
2961      constant.  If |OP1| = 2**n we can use shifts and some other fast
2962      operations.  For other values of OP1, we compute a carefully selected
2963      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
2964      by m.
2965
2966      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
2967      half of the product.  Different strategies for generating the product are
2968      implemented in expand_mult_highpart.
2969
2970      If what we actually want is the remainder, we generate that by another
2971      by-constant multiplication and a subtraction.  */
2972
2973   /* We shouldn't be called with OP1 == const1_rtx, but some of the
2974      code below will malfunction if we are, so check here and handle
2975      the special case if so.  */
2976   if (op1 == const1_rtx)
2977     return rem_flag ? const0_rtx : op0;
2978
2979     /* When dividing by -1, we could get an overflow.
2980      negv_optab can handle overflows.  */
2981   if (! unsignedp && op1 == constm1_rtx)
2982     {
2983       if (rem_flag)
2984         return const0_rtx;
2985       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
2986                           ? negv_optab : neg_optab, op0, target, 0);
2987     }
2988
2989   if (target
2990       /* Don't use the function value register as a target
2991          since we have to read it as well as write it,
2992          and function-inlining gets confused by this.  */
2993       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
2994           /* Don't clobber an operand while doing a multi-step calculation.  */
2995           || ((rem_flag || op1_is_constant)
2996               && (reg_mentioned_p (target, op0)
2997                   || (GET_CODE (op0) == MEM && GET_CODE (target) == MEM)))
2998           || reg_mentioned_p (target, op1)
2999           || (GET_CODE (op1) == MEM && GET_CODE (target) == MEM)))
3000     target = 0;
3001
3002   /* Get the mode in which to perform this computation.  Normally it will
3003      be MODE, but sometimes we can't do the desired operation in MODE.
3004      If so, pick a wider mode in which we can do the operation.  Convert
3005      to that mode at the start to avoid repeated conversions.
3006
3007      First see what operations we need.  These depend on the expression
3008      we are evaluating.  (We assume that divxx3 insns exist under the
3009      same conditions that modxx3 insns and that these insns don't normally
3010      fail.  If these assumptions are not correct, we may generate less
3011      efficient code in some cases.)
3012
3013      Then see if we find a mode in which we can open-code that operation
3014      (either a division, modulus, or shift).  Finally, check for the smallest
3015      mode for which we can do the operation with a library call.  */
3016
3017   /* We might want to refine this now that we have division-by-constant
3018      optimization.  Since expand_mult_highpart tries so many variants, it is
3019      not straightforward to generalize this.  Maybe we should make an array
3020      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3021
3022   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3023             ? (unsignedp ? lshr_optab : ashr_optab)
3024             : (unsignedp ? udiv_optab : sdiv_optab));
3025   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3026             ? optab1
3027             : (unsignedp ? udivmod_optab : sdivmod_optab));
3028
3029   for (compute_mode = mode; compute_mode != VOIDmode;
3030        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3031     if (optab1->handlers[(int) compute_mode].insn_code != CODE_FOR_nothing
3032         || optab2->handlers[(int) compute_mode].insn_code != CODE_FOR_nothing)
3033       break;
3034
3035   if (compute_mode == VOIDmode)
3036     for (compute_mode = mode; compute_mode != VOIDmode;
3037          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3038       if (optab1->handlers[(int) compute_mode].libfunc
3039           || optab2->handlers[(int) compute_mode].libfunc)
3040         break;
3041
3042   /* If we still couldn't find a mode, use MODE, but we'll probably abort
3043      in expand_binop.  */
3044   if (compute_mode == VOIDmode)
3045     compute_mode = mode;
3046
3047   if (target && GET_MODE (target) == compute_mode)
3048     tquotient = target;
3049   else
3050     tquotient = gen_reg_rtx (compute_mode);
3051
3052   size = GET_MODE_BITSIZE (compute_mode);
3053 #if 0
3054   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3055      (mode), and thereby get better code when OP1 is a constant.  Do that
3056      later.  It will require going over all usages of SIZE below.  */
3057   size = GET_MODE_BITSIZE (mode);
3058 #endif
3059
3060   /* Only deduct something for a REM if the last divide done was
3061      for a different constant.   Then set the constant of the last
3062      divide.  */
3063   max_cost = div_cost[(int) compute_mode]
3064     - (rem_flag && ! (last_div_const != 0 && op1_is_constant
3065                       && INTVAL (op1) == last_div_const)
3066        ? mul_cost[(int) compute_mode] + add_cost : 0);
3067
3068   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3069
3070   /* Now convert to the best mode to use.  */
3071   if (compute_mode != mode)
3072     {
3073       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3074       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3075
3076       /* convert_modes may have placed op1 into a register, so we
3077          must recompute the following.  */
3078       op1_is_constant = GET_CODE (op1) == CONST_INT;
3079       op1_is_pow2 = (op1_is_constant
3080                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3081                           || (! unsignedp
3082                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3083     }
3084
3085   /* If one of the operands is a volatile MEM, copy it into a register.  */
3086
3087   if (GET_CODE (op0) == MEM && MEM_VOLATILE_P (op0))
3088     op0 = force_reg (compute_mode, op0);
3089   if (GET_CODE (op1) == MEM && MEM_VOLATILE_P (op1))
3090     op1 = force_reg (compute_mode, op1);
3091
3092   /* If we need the remainder or if OP1 is constant, we need to
3093      put OP0 in a register in case it has any queued subexpressions.  */
3094   if (rem_flag || op1_is_constant)
3095     op0 = force_reg (compute_mode, op0);
3096
3097   last = get_last_insn ();
3098
3099   /* Promote floor rounding to trunc rounding for unsigned operations.  */
3100   if (unsignedp)
3101     {
3102       if (code == FLOOR_DIV_EXPR)
3103         code = TRUNC_DIV_EXPR;
3104       if (code == FLOOR_MOD_EXPR)
3105         code = TRUNC_MOD_EXPR;
3106       if (code == EXACT_DIV_EXPR && op1_is_pow2)
3107         code = TRUNC_DIV_EXPR;
3108     }
3109
3110   if (op1 != const0_rtx)
3111     switch (code)
3112       {
3113       case TRUNC_MOD_EXPR:
3114       case TRUNC_DIV_EXPR:
3115         if (op1_is_constant)
3116           {
3117             if (unsignedp)
3118               {
3119                 unsigned HOST_WIDE_INT mh, ml;
3120                 int pre_shift, post_shift;
3121                 int dummy;
3122                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
3123                                             & GET_MODE_MASK (compute_mode));
3124
3125                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3126                   {
3127                     pre_shift = floor_log2 (d);
3128                     if (rem_flag)
3129                       {
3130                         remainder
3131                           = expand_binop (compute_mode, and_optab, op0,
3132                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3133                                           remainder, 1,
3134                                           OPTAB_LIB_WIDEN);
3135                         if (remainder)
3136                           return gen_lowpart (mode, remainder);
3137                       }
3138                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3139                                              build_int_2 (pre_shift, 0),
3140                                              tquotient, 1);
3141                   }
3142                 else if (size <= HOST_BITS_PER_WIDE_INT)
3143                   {
3144                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
3145                       {
3146                         /* Most significant bit of divisor is set; emit an scc
3147                            insn.  */
3148                         quotient = emit_store_flag (tquotient, GEU, op0, op1,
3149                                                     compute_mode, 1, 1);
3150                         if (quotient == 0)
3151                           goto fail1;
3152                       }
3153                     else
3154                       {
3155                         /* Find a suitable multiplier and right shift count
3156                            instead of multiplying with D.  */
3157
3158                         mh = choose_multiplier (d, size, size,
3159                                                 &ml, &post_shift, &dummy);
3160
3161                         /* If the suggested multiplier is more than SIZE bits,
3162                            we can do better for even divisors, using an
3163                            initial right shift.  */
3164                         if (mh != 0 && (d & 1) == 0)
3165                           {
3166                             pre_shift = floor_log2 (d & -d);
3167                             mh = choose_multiplier (d >> pre_shift, size,
3168                                                     size - pre_shift,
3169                                                     &ml, &post_shift, &dummy);
3170                             if (mh)
3171                               abort ();
3172                           }
3173                         else
3174                           pre_shift = 0;
3175
3176                         if (mh != 0)
3177                           {
3178                             rtx t1, t2, t3, t4;
3179
3180                             if (post_shift - 1 >= BITS_PER_WORD)
3181                               goto fail1;
3182
3183                             extra_cost = (shift_cost[post_shift - 1]
3184                                           + shift_cost[1] + 2 * add_cost);
3185                             t1 = expand_mult_highpart (compute_mode, op0, ml,
3186                                                        NULL_RTX, 1,
3187                                                        max_cost - extra_cost);
3188                             if (t1 == 0)
3189                               goto fail1;
3190                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
3191                                                                op0, t1),
3192                                                 NULL_RTX);
3193                             t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3194                                                build_int_2 (1, 0), NULL_RTX,1);
3195                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
3196                                                               t1, t3),
3197                                                 NULL_RTX);
3198                             quotient
3199                               = expand_shift (RSHIFT_EXPR, compute_mode, t4,
3200                                               build_int_2 (post_shift - 1, 0),
3201                                               tquotient, 1);
3202                           }
3203                         else
3204                           {
3205                             rtx t1, t2;
3206
3207                             if (pre_shift >= BITS_PER_WORD
3208                                 || post_shift >= BITS_PER_WORD)
3209                               goto fail1;
3210
3211                             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3212                                                build_int_2 (pre_shift, 0),
3213                                                NULL_RTX, 1);
3214                             extra_cost = (shift_cost[pre_shift]
3215                                           + shift_cost[post_shift]);
3216                             t2 = expand_mult_highpart (compute_mode, t1, ml,
3217                                                        NULL_RTX, 1,
3218                                                        max_cost - extra_cost);
3219                             if (t2 == 0)
3220                               goto fail1;
3221                             quotient
3222                               = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3223                                               build_int_2 (post_shift, 0),
3224                                               tquotient, 1);
3225                           }
3226                       }
3227                   }
3228                 else            /* Too wide mode to use tricky code */
3229                   break;
3230
3231                 insn = get_last_insn ();
3232                 if (insn != last
3233                     && (set = single_set (insn)) != 0
3234                     && SET_DEST (set) == quotient)
3235                   set_unique_reg_note (insn,
3236                                        REG_EQUAL,
3237                                        gen_rtx_UDIV (compute_mode, op0, op1));
3238               }
3239             else                /* TRUNC_DIV, signed */
3240               {
3241                 unsigned HOST_WIDE_INT ml;
3242                 int lgup, post_shift;
3243                 HOST_WIDE_INT d = INTVAL (op1);
3244                 unsigned HOST_WIDE_INT abs_d = d >= 0 ? d : -d;
3245
3246                 /* n rem d = n rem -d */
3247                 if (rem_flag && d < 0)
3248                   {
3249                     d = abs_d;
3250                     op1 = gen_int_mode (abs_d, compute_mode);
3251                   }
3252
3253                 if (d == 1)
3254                   quotient = op0;
3255                 else if (d == -1)
3256                   quotient = expand_unop (compute_mode, neg_optab, op0,
3257                                           tquotient, 0);
3258                 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
3259                   {
3260                     /* This case is not handled correctly below.  */
3261                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
3262                                                 compute_mode, 1, 1);
3263                     if (quotient == 0)
3264                       goto fail1;
3265                   }
3266                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
3267                          && (rem_flag ? smod_pow2_cheap : sdiv_pow2_cheap)
3268                          /* ??? The cheap metric is computed only for
3269                             word_mode.  If this operation is wider, this may
3270                             not be so.  Assume true if the optab has an
3271                             expander for this mode.  */
3272                          && (((rem_flag ? smod_optab : sdiv_optab)
3273                               ->handlers[(int) compute_mode].insn_code
3274                               != CODE_FOR_nothing)
3275                              || (sdivmod_optab->handlers[(int) compute_mode]
3276                                  .insn_code != CODE_FOR_nothing)))
3277                   ;
3278                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
3279                   {
3280                     lgup = floor_log2 (abs_d);
3281                     if (BRANCH_COST < 1 || (abs_d != 2 && BRANCH_COST < 3))
3282                       {
3283                         rtx label = gen_label_rtx ();
3284                         rtx t1;
3285
3286                         t1 = copy_to_mode_reg (compute_mode, op0);
3287                         do_cmp_and_jump (t1, const0_rtx, GE,
3288                                          compute_mode, label);
3289                         expand_inc (t1, gen_int_mode (abs_d - 1,
3290                                                       compute_mode));
3291                         emit_label (label);
3292                         quotient = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3293                                                  build_int_2 (lgup, 0),
3294                                                  tquotient, 0);
3295                       }
3296                     else
3297                       {
3298                         rtx t1, t2, t3;
3299                         t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3300                                            build_int_2 (size - 1, 0),
3301                                            NULL_RTX, 0);
3302                         t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3303                                            build_int_2 (size - lgup, 0),
3304                                            NULL_RTX, 1);
3305                         t3 = force_operand (gen_rtx_PLUS (compute_mode,
3306                                                           op0, t2),
3307                                             NULL_RTX);
3308                         quotient = expand_shift (RSHIFT_EXPR, compute_mode, t3,
3309                                                  build_int_2 (lgup, 0),
3310                                                  tquotient, 0);
3311                       }
3312
3313                     /* We have computed OP0 / abs(OP1).  If OP1 is negative, negate
3314                        the quotient.  */
3315                     if (d < 0)
3316                       {
3317                         insn = get_last_insn ();
3318                         if (insn != last
3319                             && (set = single_set (insn)) != 0
3320                             && SET_DEST (set) == quotient
3321                             && abs_d < ((unsigned HOST_WIDE_INT) 1
3322                                         << (HOST_BITS_PER_WIDE_INT - 1)))
3323                           set_unique_reg_note (insn,
3324                                                REG_EQUAL,
3325                                                gen_rtx_DIV (compute_mode,
3326                                                             op0,
3327                                                             GEN_INT
3328                                                             (trunc_int_for_mode
3329                                                              (abs_d,
3330                                                               compute_mode))));
3331
3332                         quotient = expand_unop (compute_mode, neg_optab,
3333                                                 quotient, quotient, 0);
3334                       }
3335                   }
3336                 else if (size <= HOST_BITS_PER_WIDE_INT)
3337                   {
3338                     choose_multiplier (abs_d, size, size - 1,
3339                                        &ml, &post_shift, &lgup);
3340                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
3341                       {
3342                         rtx t1, t2, t3;
3343
3344                         if (post_shift >= BITS_PER_WORD
3345                             || size - 1 >= BITS_PER_WORD)
3346                           goto fail1;
3347
3348                         extra_cost = (shift_cost[post_shift]
3349                                       + shift_cost[size - 1] + add_cost);
3350                         t1 = expand_mult_highpart (compute_mode, op0, ml,
3351                                                    NULL_RTX, 0,
3352                                                    max_cost - extra_cost);
3353                         if (t1 == 0)
3354                           goto fail1;
3355                         t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3356                                            build_int_2 (post_shift, 0), NULL_RTX, 0);
3357                         t3 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3358                                            build_int_2 (size - 1, 0), NULL_RTX, 0);
3359                         if (d < 0)
3360                           quotient
3361                             = force_operand (gen_rtx_MINUS (compute_mode,
3362                                                             t3, t2),
3363                                              tquotient);
3364                         else
3365                           quotient
3366                             = force_operand (gen_rtx_MINUS (compute_mode,
3367                                                             t2, t3),
3368                                              tquotient);
3369                       }
3370                     else
3371                       {
3372                         rtx t1, t2, t3, t4;
3373
3374                         if (post_shift >= BITS_PER_WORD
3375                             || size - 1 >= BITS_PER_WORD)
3376                           goto fail1;
3377
3378                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
3379                         extra_cost = (shift_cost[post_shift]
3380                                       + shift_cost[size - 1] + 2 * add_cost);
3381                         t1 = expand_mult_highpart (compute_mode, op0, ml,
3382                                                    NULL_RTX, 0,
3383                                                    max_cost - extra_cost);
3384                         if (t1 == 0)
3385                           goto fail1;
3386                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
3387                                                           t1, op0),
3388                                             NULL_RTX);
3389                         t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3390                                            build_int_2 (post_shift, 0),
3391                                            NULL_RTX, 0);
3392                         t4 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3393                                            build_int_2 (size - 1, 0),
3394                                            NULL_RTX, 0);
3395                         if (d < 0)
3396                           quotient
3397                             = force_operand (gen_rtx_MINUS (compute_mode,
3398                                                             t4, t3),
3399                                              tquotient);
3400                         else
3401                           quotient
3402                             = force_operand (gen_rtx_MINUS (compute_mode,
3403                                                             t3, t4),
3404                                              tquotient);
3405                       }
3406                   }
3407                 else            /* Too wide mode to use tricky code */
3408                   break;
3409
3410                 insn = get_last_insn ();
3411                 if (insn != last
3412                     && (set = single_set (insn)) != 0
3413                     && SET_DEST (set) == quotient)
3414                   set_unique_reg_note (insn,
3415                                        REG_EQUAL,
3416                                        gen_rtx_DIV (compute_mode, op0, op1));
3417               }
3418             break;
3419           }
3420       fail1:
3421         delete_insns_since (last);
3422         break;
3423
3424       case FLOOR_DIV_EXPR:
3425       case FLOOR_MOD_EXPR:
3426       /* We will come here only for signed operations.  */
3427         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
3428           {
3429             unsigned HOST_WIDE_INT mh, ml;
3430             int pre_shift, lgup, post_shift;
3431             HOST_WIDE_INT d = INTVAL (op1);
3432
3433             if (d > 0)
3434               {
3435                 /* We could just as easily deal with negative constants here,
3436                    but it does not seem worth the trouble for GCC 2.6.  */
3437                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3438                   {
3439                     pre_shift = floor_log2 (d);
3440                     if (rem_flag)
3441                       {
3442                         remainder = expand_binop (compute_mode, and_optab, op0,
3443                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3444                                                   remainder, 0, OPTAB_LIB_WIDEN);
3445                         if (remainder)
3446                           return gen_lowpart (mode, remainder);
3447                       }
3448                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3449                                              build_int_2 (pre_shift, 0),
3450                                              tquotient, 0);
3451                   }
3452                 else
3453                   {
3454                     rtx t1, t2, t3, t4;
3455
3456                     mh = choose_multiplier (d, size, size - 1,
3457                                             &ml, &post_shift, &lgup);
3458                     if (mh)
3459                       abort ();
3460
3461                     if (post_shift < BITS_PER_WORD
3462                         && size - 1 < BITS_PER_WORD)
3463                       {
3464                         t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3465                                            build_int_2 (size - 1, 0),
3466                                            NULL_RTX, 0);
3467                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
3468                                            NULL_RTX, 0, OPTAB_WIDEN);
3469                         extra_cost = (shift_cost[post_shift]
3470                                       + shift_cost[size - 1] + 2 * add_cost);
3471                         t3 = expand_mult_highpart (compute_mode, t2, ml,
3472                                                    NULL_RTX, 1,
3473                                                    max_cost - extra_cost);
3474                         if (t3 != 0)
3475                           {
3476                             t4 = expand_shift (RSHIFT_EXPR, compute_mode, t3,
3477                                                build_int_2 (post_shift, 0),
3478                                                NULL_RTX, 1);
3479                             quotient = expand_binop (compute_mode, xor_optab,
3480                                                      t4, t1, tquotient, 0,
3481                                                      OPTAB_WIDEN);
3482                           }
3483                       }
3484                   }
3485               }
3486             else
3487               {
3488                 rtx nsign, t1, t2, t3, t4;
3489                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
3490                                                   op0, constm1_rtx), NULL_RTX);
3491                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
3492                                    0, OPTAB_WIDEN);
3493                 nsign = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3494                                       build_int_2 (size - 1, 0), NULL_RTX, 0);
3495                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
3496                                     NULL_RTX);
3497                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
3498                                     NULL_RTX, 0);
3499                 if (t4)
3500                   {
3501                     rtx t5;
3502                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
3503                                       NULL_RTX, 0);
3504                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
3505                                                             t4, t5),
3506                                               tquotient);
3507                   }
3508               }
3509           }
3510
3511         if (quotient != 0)
3512           break;
3513         delete_insns_since (last);
3514
3515         /* Try using an instruction that produces both the quotient and
3516            remainder, using truncation.  We can easily compensate the quotient
3517            or remainder to get floor rounding, once we have the remainder.
3518            Notice that we compute also the final remainder value here,
3519            and return the result right away.  */
3520         if (target == 0 || GET_MODE (target) != compute_mode)
3521           target = gen_reg_rtx (compute_mode);
3522
3523         if (rem_flag)
3524           {
3525             remainder
3526               = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode);
3527             quotient = gen_reg_rtx (compute_mode);
3528           }
3529         else
3530           {
3531             quotient
3532               = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode);
3533             remainder = gen_reg_rtx (compute_mode);
3534           }
3535
3536         if (expand_twoval_binop (sdivmod_optab, op0, op1,
3537                                  quotient, remainder, 0))
3538           {
3539             /* This could be computed with a branch-less sequence.
3540                Save that for later.  */
3541             rtx tem;
3542             rtx label = gen_label_rtx ();
3543             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
3544             tem = expand_binop (compute_mode, xor_optab, op0, op1,
3545                                 NULL_RTX, 0, OPTAB_WIDEN);
3546             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
3547             expand_dec (quotient, const1_rtx);
3548             expand_inc (remainder, op1);
3549             emit_label (label);
3550             return gen_lowpart (mode, rem_flag ? remainder : quotient);
3551           }
3552
3553         /* No luck with division elimination or divmod.  Have to do it
3554            by conditionally adjusting op0 *and* the result.  */
3555         {
3556           rtx label1, label2, label3, label4, label5;
3557           rtx adjusted_op0;
3558           rtx tem;
3559
3560           quotient = gen_reg_rtx (compute_mode);
3561           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3562           label1 = gen_label_rtx ();
3563           label2 = gen_label_rtx ();
3564           label3 = gen_label_rtx ();
3565           label4 = gen_label_rtx ();
3566           label5 = gen_label_rtx ();
3567           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
3568           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
3569           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3570                               quotient, 0, OPTAB_LIB_WIDEN);
3571           if (tem != quotient)
3572             emit_move_insn (quotient, tem);
3573           emit_jump_insn (gen_jump (label5));
3574           emit_barrier ();
3575           emit_label (label1);
3576           expand_inc (adjusted_op0, const1_rtx);
3577           emit_jump_insn (gen_jump (label4));
3578           emit_barrier ();
3579           emit_label (label2);
3580           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
3581           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3582                               quotient, 0, OPTAB_LIB_WIDEN);
3583           if (tem != quotient)
3584             emit_move_insn (quotient, tem);
3585           emit_jump_insn (gen_jump (label5));
3586           emit_barrier ();
3587           emit_label (label3);
3588           expand_dec (adjusted_op0, const1_rtx);
3589           emit_label (label4);
3590           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3591                               quotient, 0, OPTAB_LIB_WIDEN);
3592           if (tem != quotient)
3593             emit_move_insn (quotient, tem);
3594           expand_dec (quotient, const1_rtx);
3595           emit_label (label5);
3596         }
3597         break;
3598
3599       case CEIL_DIV_EXPR:
3600       case CEIL_MOD_EXPR:
3601         if (unsignedp)
3602           {
3603             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
3604               {
3605                 rtx t1, t2, t3;
3606                 unsigned HOST_WIDE_INT d = INTVAL (op1);
3607                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3608                                    build_int_2 (floor_log2 (d), 0),
3609                                    tquotient, 1);
3610                 t2 = expand_binop (compute_mode, and_optab, op0,
3611                                    GEN_INT (d - 1),
3612                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3613                 t3 = gen_reg_rtx (compute_mode);
3614                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
3615                                       compute_mode, 1, 1);
3616                 if (t3 == 0)
3617                   {
3618                     rtx lab;
3619                     lab = gen_label_rtx ();
3620                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
3621                     expand_inc (t1, const1_rtx);
3622                     emit_label (lab);
3623                     quotient = t1;
3624                   }
3625                 else
3626                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
3627                                                           t1, t3),
3628                                             tquotient);
3629                 break;
3630               }
3631
3632             /* Try using an instruction that produces both the quotient and
3633                remainder, using truncation.  We can easily compensate the
3634                quotient or remainder to get ceiling rounding, once we have the
3635                remainder.  Notice that we compute also the final remainder
3636                value here, and return the result right away.  */
3637             if (target == 0 || GET_MODE (target) != compute_mode)
3638               target = gen_reg_rtx (compute_mode);
3639
3640             if (rem_flag)
3641               {
3642                 remainder = (GET_CODE (target) == REG
3643                              ? target : gen_reg_rtx (compute_mode));
3644                 quotient = gen_reg_rtx (compute_mode);
3645               }
3646             else
3647               {
3648                 quotient = (GET_CODE (target) == REG
3649                             ? target : gen_reg_rtx (compute_mode));
3650                 remainder = gen_reg_rtx (compute_mode);
3651               }
3652
3653             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
3654                                      remainder, 1))
3655               {
3656                 /* This could be computed with a branch-less sequence.
3657                    Save that for later.  */
3658                 rtx label = gen_label_rtx ();
3659                 do_cmp_and_jump (remainder, const0_rtx, EQ,
3660                                  compute_mode, label);
3661                 expand_inc (quotient, const1_rtx);
3662                 expand_dec (remainder, op1);
3663                 emit_label (label);
3664                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
3665               }
3666
3667             /* No luck with division elimination or divmod.  Have to do it
3668                by conditionally adjusting op0 *and* the result.  */
3669             {
3670               rtx label1, label2;
3671               rtx adjusted_op0, tem;
3672
3673               quotient = gen_reg_rtx (compute_mode);
3674               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3675               label1 = gen_label_rtx ();
3676               label2 = gen_label_rtx ();
3677               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
3678                                compute_mode, label1);
3679               emit_move_insn  (quotient, const0_rtx);
3680               emit_jump_insn (gen_jump (label2));
3681               emit_barrier ();
3682               emit_label (label1);
3683               expand_dec (adjusted_op0, const1_rtx);
3684               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
3685                                   quotient, 1, OPTAB_LIB_WIDEN);
3686               if (tem != quotient)
3687                 emit_move_insn (quotient, tem);
3688               expand_inc (quotient, const1_rtx);
3689               emit_label (label2);
3690             }
3691           }
3692         else /* signed */
3693           {
3694             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3695                 && INTVAL (op1) >= 0)
3696               {
3697                 /* This is extremely similar to the code for the unsigned case
3698                    above.  For 2.7 we should merge these variants, but for
3699                    2.6.1 I don't want to touch the code for unsigned since that
3700                    get used in C.  The signed case will only be used by other
3701                    languages (Ada).  */
3702
3703                 rtx t1, t2, t3;
3704                 unsigned HOST_WIDE_INT d = INTVAL (op1);
3705                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3706                                    build_int_2 (floor_log2 (d), 0),
3707                                    tquotient, 0);
3708                 t2 = expand_binop (compute_mode, and_optab, op0,
3709                                    GEN_INT (d - 1),
3710                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3711                 t3 = gen_reg_rtx (compute_mode);
3712                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
3713                                       compute_mode, 1, 1);
3714                 if (t3 == 0)
3715                   {
3716                     rtx lab;
3717                     lab = gen_label_rtx ();
3718                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
3719                     expand_inc (t1, const1_rtx);
3720                     emit_label (lab);
3721                     quotient = t1;
3722                   }
3723                 else
3724                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
3725                                                           t1, t3),
3726                                             tquotient);
3727                 break;
3728               }
3729
3730             /* Try using an instruction that produces both the quotient and
3731                remainder, using truncation.  We can easily compensate the
3732                quotient or remainder to get ceiling rounding, once we have the
3733                remainder.  Notice that we compute also the final remainder
3734                value here, and return the result right away.  */
3735             if (target == 0 || GET_MODE (target) != compute_mode)
3736               target = gen_reg_rtx (compute_mode);
3737             if (rem_flag)
3738               {
3739                 remainder= (GET_CODE (target) == REG
3740                             ? target : gen_reg_rtx (compute_mode));
3741                 quotient = gen_reg_rtx (compute_mode);
3742               }
3743             else
3744               {
3745                 quotient = (GET_CODE (target) == REG
3746                             ? target : gen_reg_rtx (compute_mode));
3747                 remainder = gen_reg_rtx (compute_mode);
3748               }
3749
3750             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
3751                                      remainder, 0))
3752               {
3753                 /* This could be computed with a branch-less sequence.
3754                    Save that for later.  */
3755                 rtx tem;
3756                 rtx label = gen_label_rtx ();
3757                 do_cmp_and_jump (remainder, const0_rtx, EQ,
3758                                  compute_mode, label);
3759                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
3760                                     NULL_RTX, 0, OPTAB_WIDEN);
3761                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
3762                 expand_inc (quotient, const1_rtx);
3763                 expand_dec (remainder, op1);
3764                 emit_label (label);
3765                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
3766               }
3767
3768             /* No luck with division elimination or divmod.  Have to do it
3769                by conditionally adjusting op0 *and* the result.  */
3770             {
3771               rtx label1, label2, label3, label4, label5;
3772               rtx adjusted_op0;
3773               rtx tem;
3774
3775               quotient = gen_reg_rtx (compute_mode);
3776               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3777               label1 = gen_label_rtx ();
3778               label2 = gen_label_rtx ();
3779               label3 = gen_label_rtx ();
3780               label4 = gen_label_rtx ();
3781               label5 = gen_label_rtx ();
3782               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
3783               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
3784                                compute_mode, label1);
3785               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3786                                   quotient, 0, OPTAB_LIB_WIDEN);
3787               if (tem != quotient)
3788                 emit_move_insn (quotient, tem);
3789               emit_jump_insn (gen_jump (label5));
3790               emit_barrier ();
3791               emit_label (label1);
3792               expand_dec (adjusted_op0, const1_rtx);
3793               emit_jump_insn (gen_jump (label4));
3794               emit_barrier ();
3795               emit_label (label2);
3796               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
3797                                compute_mode, label3);
3798               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3799                                   quotient, 0, OPTAB_LIB_WIDEN);
3800               if (tem != quotient)
3801                 emit_move_insn (quotient, tem);
3802               emit_jump_insn (gen_jump (label5));
3803               emit_barrier ();
3804               emit_label (label3);
3805               expand_inc (adjusted_op0, const1_rtx);
3806               emit_label (label4);
3807               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3808                                   quotient, 0, OPTAB_LIB_WIDEN);
3809               if (tem != quotient)
3810                 emit_move_insn (quotient, tem);
3811               expand_inc (quotient, const1_rtx);
3812               emit_label (label5);
3813             }
3814           }
3815         break;
3816
3817       case EXACT_DIV_EXPR:
3818         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
3819           {
3820             HOST_WIDE_INT d = INTVAL (op1);
3821             unsigned HOST_WIDE_INT ml;
3822             int pre_shift;
3823             rtx t1;
3824
3825             pre_shift = floor_log2 (d & -d);
3826             ml = invert_mod2n (d >> pre_shift, size);
3827             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3828                                build_int_2 (pre_shift, 0), NULL_RTX, unsignedp);
3829             quotient = expand_mult (compute_mode, t1,
3830                                     gen_int_mode (ml, compute_mode),
3831                                     NULL_RTX, 1);
3832
3833             insn = get_last_insn ();
3834             set_unique_reg_note (insn,
3835                                  REG_EQUAL,
3836                                  gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
3837                                                  compute_mode,
3838                                                  op0, op1));
3839           }
3840         break;
3841
3842       case ROUND_DIV_EXPR:
3843       case ROUND_MOD_EXPR:
3844         if (unsignedp)
3845           {
3846             rtx tem;
3847             rtx label;
3848             label = gen_label_rtx ();
3849             quotient = gen_reg_rtx (compute_mode);
3850             remainder = gen_reg_rtx (compute_mode);
3851             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
3852               {
3853                 rtx tem;
3854                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
3855                                          quotient, 1, OPTAB_LIB_WIDEN);
3856                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
3857                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
3858                                           remainder, 1, OPTAB_LIB_WIDEN);
3859               }
3860             tem = plus_constant (op1, -1);
3861             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
3862                                 build_int_2 (1, 0), NULL_RTX, 1);
3863             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
3864             expand_inc (quotient, const1_rtx);
3865             expand_dec (remainder, op1);
3866             emit_label (label);
3867           }
3868         else
3869           {
3870             rtx abs_rem, abs_op1, tem, mask;
3871             rtx label;
3872             label = gen_label_rtx ();
3873             quotient = gen_reg_rtx (compute_mode);
3874             remainder = gen_reg_rtx (compute_mode);
3875             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
3876               {
3877                 rtx tem;
3878                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
3879                                          quotient, 0, OPTAB_LIB_WIDEN);
3880                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
3881                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
3882                                           remainder, 0, OPTAB_LIB_WIDEN);
3883               }
3884             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
3885             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
3886             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
3887                                 build_int_2 (1, 0), NULL_RTX, 1);
3888             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
3889             tem = expand_binop (compute_mode, xor_optab, op0, op1,
3890                                 NULL_RTX, 0, OPTAB_WIDEN);
3891             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
3892                                 build_int_2 (size - 1, 0), NULL_RTX, 0);
3893             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
3894                                 NULL_RTX, 0, OPTAB_WIDEN);
3895             tem = expand_binop (compute_mode, sub_optab, tem, mask,
3896                                 NULL_RTX, 0, OPTAB_WIDEN);
3897             expand_inc (quotient, tem);
3898             tem = expand_binop (compute_mode, xor_optab, mask, op1,
3899                                 NULL_RTX, 0, OPTAB_WIDEN);
3900             tem = expand_binop (compute_mode, sub_optab, tem, mask,
3901                                 NULL_RTX, 0, OPTAB_WIDEN);
3902             expand_dec (remainder, tem);
3903             emit_label (label);
3904           }
3905         return gen_lowpart (mode, rem_flag ? remainder : quotient);
3906
3907       default:
3908         abort ();
3909       }
3910
3911   if (quotient == 0)
3912     {
3913       if (target && GET_MODE (target) != compute_mode)
3914         target = 0;
3915
3916       if (rem_flag)
3917         {
3918           /* Try to produce the remainder without producing the quotient.
3919              If we seem to have a divmod pattern that does not require widening,
3920              don't try widening here.  We should really have a WIDEN argument
3921              to expand_twoval_binop, since what we'd really like to do here is
3922              1) try a mod insn in compute_mode
3923              2) try a divmod insn in compute_mode
3924              3) try a div insn in compute_mode and multiply-subtract to get
3925                 remainder
3926              4) try the same things with widening allowed.  */
3927           remainder
3928             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
3929                                  op0, op1, target,
3930                                  unsignedp,
3931                                  ((optab2->handlers[(int) compute_mode].insn_code
3932                                    != CODE_FOR_nothing)
3933                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
3934           if (remainder == 0)
3935             {
3936               /* No luck there.  Can we do remainder and divide at once
3937                  without a library call?  */
3938               remainder = gen_reg_rtx (compute_mode);
3939               if (! expand_twoval_binop ((unsignedp
3940                                           ? udivmod_optab
3941                                           : sdivmod_optab),
3942                                          op0, op1,
3943                                          NULL_RTX, remainder, unsignedp))
3944                 remainder = 0;
3945             }
3946
3947           if (remainder)
3948             return gen_lowpart (mode, remainder);
3949         }
3950
3951       /* Produce the quotient.  Try a quotient insn, but not a library call.
3952          If we have a divmod in this mode, use it in preference to widening
3953          the div (for this test we assume it will not fail). Note that optab2
3954          is set to the one of the two optabs that the call below will use.  */
3955       quotient
3956         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
3957                              op0, op1, rem_flag ? NULL_RTX : target,
3958                              unsignedp,
3959                              ((optab2->handlers[(int) compute_mode].insn_code
3960                                != CODE_FOR_nothing)
3961                               ? OPTAB_DIRECT : OPTAB_WIDEN));
3962
3963       if (quotient == 0)
3964         {
3965           /* No luck there.  Try a quotient-and-remainder insn,
3966              keeping the quotient alone.  */
3967           quotient = gen_reg_rtx (compute_mode);
3968           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
3969                                      op0, op1,
3970                                      quotient, NULL_RTX, unsignedp))
3971             {
3972               quotient = 0;
3973               if (! rem_flag)
3974                 /* Still no luck.  If we are not computing the remainder,
3975                    use a library call for the quotient.  */
3976                 quotient = sign_expand_binop (compute_mode,
3977                                               udiv_optab, sdiv_optab,
3978                                               op0, op1, target,
3979                                               unsignedp, OPTAB_LIB_WIDEN);
3980             }
3981         }
3982     }
3983
3984   if (rem_flag)
3985     {
3986       if (target && GET_MODE (target) != compute_mode)
3987         target = 0;
3988
3989       if (quotient == 0)
3990         /* No divide instruction either.  Use library for remainder.  */
3991         remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
3992                                        op0, op1, target,
3993                                        unsignedp, OPTAB_LIB_WIDEN);
3994       else
3995         {
3996           /* We divided.  Now finish doing X - Y * (X / Y).  */
3997           remainder = expand_mult (compute_mode, quotient, op1,
3998                                    NULL_RTX, unsignedp);
3999           remainder = expand_binop (compute_mode, sub_optab, op0,
4000                                     remainder, target, unsignedp,
4001                                     OPTAB_LIB_WIDEN);
4002         }
4003     }
4004
4005   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4006 }
4007 \f
4008 /* Return a tree node with data type TYPE, describing the value of X.
4009    Usually this is an RTL_EXPR, if there is no obvious better choice.
4010    X may be an expression, however we only support those expressions
4011    generated by loop.c.  */
4012
4013 tree
4014 make_tree (tree type, rtx x)
4015 {
4016   tree t;
4017
4018   switch (GET_CODE (x))
4019     {
4020     case CONST_INT:
4021       t = build_int_2 (INTVAL (x),
4022                        (TREE_UNSIGNED (type)
4023                         && (GET_MODE_BITSIZE (TYPE_MODE (type)) < HOST_BITS_PER_WIDE_INT))
4024                        || INTVAL (x) >= 0 ? 0 : -1);
4025       TREE_TYPE (t) = type;
4026       return t;
4027
4028     case CONST_DOUBLE:
4029       if (GET_MODE (x) == VOIDmode)
4030         {
4031           t = build_int_2 (CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4032           TREE_TYPE (t) = type;
4033         }
4034       else
4035         {
4036           REAL_VALUE_TYPE d;
4037
4038           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4039           t = build_real (type, d);
4040         }
4041
4042       return t;
4043
4044     case CONST_VECTOR:
4045       {
4046         int i, units;
4047         rtx elt;
4048         tree t = NULL_TREE;
4049
4050         units = CONST_VECTOR_NUNITS (x);
4051
4052         /* Build a tree with vector elements.  */
4053         for (i = units - 1; i >= 0; --i)
4054           {
4055             elt = CONST_VECTOR_ELT (x, i);
4056             t = tree_cons (NULL_TREE, make_tree (type, elt), t);
4057           }
4058
4059         return build_vector (type, t);
4060       }
4061
4062     case PLUS:
4063       return fold (build (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4064                           make_tree (type, XEXP (x, 1))));
4065
4066     case MINUS:
4067       return fold (build (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4068                           make_tree (type, XEXP (x, 1))));
4069
4070     case NEG:
4071       return fold (build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0))));
4072
4073     case MULT:
4074       return fold (build (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4075                           make_tree (type, XEXP (x, 1))));
4076
4077     case ASHIFT:
4078       return fold (build (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4079                           make_tree (type, XEXP (x, 1))));
4080
4081     case LSHIFTRT:
4082       t = (*lang_hooks.types.unsigned_type) (type);
4083       return fold (convert (type,
4084                             build (RSHIFT_EXPR, t,
4085                                    make_tree (t, XEXP (x, 0)),
4086                                    make_tree (type, XEXP (x, 1)))));
4087
4088     case ASHIFTRT:
4089       t = (*lang_hooks.types.signed_type) (type);
4090       return fold (convert (type,
4091                             build (RSHIFT_EXPR, t,
4092                                    make_tree (t, XEXP (x, 0)),
4093                                    make_tree (type, XEXP (x, 1)))));
4094
4095     case DIV:
4096       if (TREE_CODE (type) != REAL_TYPE)
4097         t = (*lang_hooks.types.signed_type) (type);
4098       else
4099         t = type;
4100
4101       return fold (convert (type,
4102                             build (TRUNC_DIV_EXPR, t,
4103                                    make_tree (t, XEXP (x, 0)),
4104                                    make_tree (t, XEXP (x, 1)))));
4105     case UDIV:
4106       t = (*lang_hooks.types.unsigned_type) (type);
4107       return fold (convert (type,
4108                             build (TRUNC_DIV_EXPR, t,
4109                                    make_tree (t, XEXP (x, 0)),
4110                                    make_tree (t, XEXP (x, 1)))));
4111
4112     case SIGN_EXTEND:
4113     case ZERO_EXTEND:
4114       t = (*lang_hooks.types.type_for_mode) (GET_MODE (XEXP (x, 0)),
4115                                              GET_CODE (x) == ZERO_EXTEND);
4116       return fold (convert (type, make_tree (t, XEXP (x, 0))));
4117
4118    default:
4119       t = make_node (RTL_EXPR);
4120       TREE_TYPE (t) = type;
4121
4122       /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
4123          ptr_mode.  So convert.  */
4124       if (POINTER_TYPE_P (type))
4125         x = convert_memory_address (TYPE_MODE (type), x);
4126
4127       RTL_EXPR_RTL (t) = x;
4128       /* There are no insns to be output
4129          when this rtl_expr is used.  */
4130       RTL_EXPR_SEQUENCE (t) = 0;
4131       return t;
4132     }
4133 }
4134
4135 /* Check whether the multiplication X * MULT + ADD overflows.
4136    X, MULT and ADD must be CONST_*.
4137    MODE is the machine mode for the computation.
4138    X and MULT must have mode MODE.  ADD may have a different mode.
4139    So can X (defaults to same as MODE).
4140    UNSIGNEDP is nonzero to do unsigned multiplication.  */
4141
4142 bool
4143 const_mult_add_overflow_p (rtx x, rtx mult, rtx add, enum machine_mode mode, int unsignedp)
4144 {
4145   tree type, mult_type, add_type, result;
4146
4147   type = (*lang_hooks.types.type_for_mode) (mode, unsignedp);
4148
4149   /* In order to get a proper overflow indication from an unsigned
4150      type, we have to pretend that it's a sizetype.  */
4151   mult_type = type;
4152   if (unsignedp)
4153     {
4154       mult_type = copy_node (type);
4155       TYPE_IS_SIZETYPE (mult_type) = 1;
4156     }
4157
4158   add_type = (GET_MODE (add) == VOIDmode ? mult_type
4159               : (*lang_hooks.types.type_for_mode) (GET_MODE (add), unsignedp));
4160
4161   result = fold (build (PLUS_EXPR, mult_type,
4162                         fold (build (MULT_EXPR, mult_type,
4163                                      make_tree (mult_type, x),
4164                                      make_tree (mult_type, mult))),
4165                         make_tree (add_type, add)));
4166
4167   return TREE_CONSTANT_OVERFLOW (result);
4168 }
4169
4170 /* Return an rtx representing the value of X * MULT + ADD.
4171    TARGET is a suggestion for where to store the result (an rtx).
4172    MODE is the machine mode for the computation.
4173    X and MULT must have mode MODE.  ADD may have a different mode.
4174    So can X (defaults to same as MODE).
4175    UNSIGNEDP is nonzero to do unsigned multiplication.
4176    This may emit insns.  */
4177
4178 rtx
4179 expand_mult_add (rtx x, rtx target, rtx mult, rtx add, enum machine_mode mode,
4180                  int unsignedp)
4181 {
4182   tree type = (*lang_hooks.types.type_for_mode) (mode, unsignedp);
4183   tree add_type = (GET_MODE (add) == VOIDmode
4184                    ? type: (*lang_hooks.types.type_for_mode) (GET_MODE (add),
4185                                                               unsignedp));
4186   tree result =  fold (build (PLUS_EXPR, type,
4187                               fold (build (MULT_EXPR, type,
4188                                            make_tree (type, x),
4189                                            make_tree (type, mult))),
4190                               make_tree (add_type, add)));
4191
4192   return expand_expr (result, target, VOIDmode, 0);
4193 }
4194 \f
4195 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
4196    and returning TARGET.
4197
4198    If TARGET is 0, a pseudo-register or constant is returned.  */
4199
4200 rtx
4201 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
4202 {
4203   rtx tem = 0;
4204
4205   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
4206     tem = simplify_binary_operation (AND, mode, op0, op1);
4207   if (tem == 0)
4208     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
4209
4210   if (target == 0)
4211     target = tem;
4212   else if (tem != target)
4213     emit_move_insn (target, tem);
4214   return target;
4215 }
4216 \f
4217 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
4218    and storing in TARGET.  Normally return TARGET.
4219    Return 0 if that cannot be done.
4220
4221    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
4222    it is VOIDmode, they cannot both be CONST_INT.
4223
4224    UNSIGNEDP is for the case where we have to widen the operands
4225    to perform the operation.  It says to use zero-extension.
4226
4227    NORMALIZEP is 1 if we should convert the result to be either zero
4228    or one.  Normalize is -1 if we should convert the result to be
4229    either zero or -1.  If NORMALIZEP is zero, the result will be left
4230    "raw" out of the scc insn.  */
4231
4232 rtx
4233 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
4234                  enum machine_mode mode, int unsignedp, int normalizep)
4235 {
4236   rtx subtarget;
4237   enum insn_code icode;
4238   enum machine_mode compare_mode;
4239   enum machine_mode target_mode = GET_MODE (target);
4240   rtx tem;
4241   rtx last = get_last_insn ();
4242   rtx pattern, comparison;
4243
4244   /* ??? Ok to do this and then fail? */
4245   op0 = protect_from_queue (op0, 0);
4246   op1 = protect_from_queue (op1, 0);
4247
4248   if (unsignedp)
4249     code = unsigned_condition (code);
4250
4251   /* If one operand is constant, make it the second one.  Only do this
4252      if the other operand is not constant as well.  */
4253
4254   if (swap_commutative_operands_p (op0, op1))
4255     {
4256       tem = op0;
4257       op0 = op1;
4258       op1 = tem;
4259       code = swap_condition (code);
4260     }
4261
4262   if (mode == VOIDmode)
4263     mode = GET_MODE (op0);
4264
4265   /* For some comparisons with 1 and -1, we can convert this to
4266      comparisons with zero.  This will often produce more opportunities for
4267      store-flag insns.  */
4268
4269   switch (code)
4270     {
4271     case LT:
4272       if (op1 == const1_rtx)
4273         op1 = const0_rtx, code = LE;
4274       break;
4275     case LE:
4276       if (op1 == constm1_rtx)
4277         op1 = const0_rtx, code = LT;
4278       break;
4279     case GE:
4280       if (op1 == const1_rtx)
4281         op1 = const0_rtx, code = GT;
4282       break;
4283     case GT:
4284       if (op1 == constm1_rtx)
4285         op1 = const0_rtx, code = GE;
4286       break;
4287     case GEU:
4288       if (op1 == const1_rtx)
4289         op1 = const0_rtx, code = NE;
4290       break;
4291     case LTU:
4292       if (op1 == const1_rtx)
4293         op1 = const0_rtx, code = EQ;
4294       break;
4295     default:
4296       break;
4297     }
4298
4299   /* If we are comparing a double-word integer with zero, we can convert
4300      the comparison into one involving a single word.  */
4301   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
4302       && GET_MODE_CLASS (mode) == MODE_INT
4303       && op1 == const0_rtx
4304       && (GET_CODE (op0) != MEM || ! MEM_VOLATILE_P (op0)))
4305     {
4306       if (code == EQ || code == NE)
4307         {
4308           rtx op00, op01, op0both;
4309
4310           /* Do a logical OR of the two words and compare the result.  */
4311           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
4312           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
4313           op0both = expand_binop (word_mode, ior_optab, op00, op01,
4314                                   NULL_RTX, unsignedp, OPTAB_DIRECT);
4315           if (op0both != 0)
4316             return emit_store_flag (target, code, op0both, op1, word_mode,
4317                                     unsignedp, normalizep);
4318         }
4319       else if (code == LT || code == GE)
4320         {
4321           rtx op0h;
4322
4323           /* If testing the sign bit, can just test on high word.  */
4324           op0h = simplify_gen_subreg (word_mode, op0, mode,
4325                                       subreg_highpart_offset (word_mode, mode));
4326           return emit_store_flag (target, code, op0h, op1, word_mode,
4327                                   unsignedp, normalizep);
4328         }
4329     }
4330
4331   /* From now on, we won't change CODE, so set ICODE now.  */
4332   icode = setcc_gen_code[(int) code];
4333
4334   /* If this is A < 0 or A >= 0, we can do this by taking the ones
4335      complement of A (for GE) and shifting the sign bit to the low bit.  */
4336   if (op1 == const0_rtx && (code == LT || code == GE)
4337       && GET_MODE_CLASS (mode) == MODE_INT
4338       && (normalizep || STORE_FLAG_VALUE == 1
4339           || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4340               && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
4341                   == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
4342     {
4343       subtarget = target;
4344
4345       /* If the result is to be wider than OP0, it is best to convert it
4346          first.  If it is to be narrower, it is *incorrect* to convert it
4347          first.  */
4348       if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
4349         {
4350           op0 = protect_from_queue (op0, 0);
4351           op0 = convert_modes (target_mode, mode, op0, 0);
4352           mode = target_mode;
4353         }
4354
4355       if (target_mode != mode)
4356         subtarget = 0;
4357
4358       if (code == GE)
4359         op0 = expand_unop (mode, one_cmpl_optab, op0,
4360                            ((STORE_FLAG_VALUE == 1 || normalizep)
4361                             ? 0 : subtarget), 0);
4362
4363       if (STORE_FLAG_VALUE == 1 || normalizep)
4364         /* If we are supposed to produce a 0/1 value, we want to do
4365            a logical shift from the sign bit to the low-order bit; for
4366            a -1/0 value, we do an arithmetic shift.  */
4367         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
4368                             size_int (GET_MODE_BITSIZE (mode) - 1),
4369                             subtarget, normalizep != -1);
4370
4371       if (mode != target_mode)
4372         op0 = convert_modes (target_mode, mode, op0, 0);
4373
4374       return op0;
4375     }
4376
4377   if (icode != CODE_FOR_nothing)
4378     {
4379       insn_operand_predicate_fn pred;
4380
4381       /* We think we may be able to do this with a scc insn.  Emit the
4382          comparison and then the scc insn.
4383
4384          compare_from_rtx may call emit_queue, which would be deleted below
4385          if the scc insn fails.  So call it ourselves before setting LAST.
4386          Likewise for do_pending_stack_adjust.  */
4387
4388       emit_queue ();
4389       do_pending_stack_adjust ();
4390       last = get_last_insn ();
4391
4392       comparison
4393         = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX);
4394       if (GET_CODE (comparison) == CONST_INT)
4395         return (comparison == const0_rtx ? const0_rtx
4396                 : normalizep == 1 ? const1_rtx
4397                 : normalizep == -1 ? constm1_rtx
4398                 : const_true_rtx);
4399
4400       /* The code of COMPARISON may not match CODE if compare_from_rtx
4401          decided to swap its operands and reverse the original code.
4402
4403          We know that compare_from_rtx returns either a CONST_INT or
4404          a new comparison code, so it is safe to just extract the
4405          code from COMPARISON.  */
4406       code = GET_CODE (comparison);
4407
4408       /* Get a reference to the target in the proper mode for this insn.  */
4409       compare_mode = insn_data[(int) icode].operand[0].mode;
4410       subtarget = target;
4411       pred = insn_data[(int) icode].operand[0].predicate;
4412       if (preserve_subexpressions_p ()
4413           || ! (*pred) (subtarget, compare_mode))
4414         subtarget = gen_reg_rtx (compare_mode);
4415
4416       pattern = GEN_FCN (icode) (subtarget);
4417       if (pattern)
4418         {
4419           emit_insn (pattern);
4420
4421           /* If we are converting to a wider mode, first convert to
4422              TARGET_MODE, then normalize.  This produces better combining
4423              opportunities on machines that have a SIGN_EXTRACT when we are
4424              testing a single bit.  This mostly benefits the 68k.
4425
4426              If STORE_FLAG_VALUE does not have the sign bit set when
4427              interpreted in COMPARE_MODE, we can do this conversion as
4428              unsigned, which is usually more efficient.  */
4429           if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (compare_mode))
4430             {
4431               convert_move (target, subtarget,
4432                             (GET_MODE_BITSIZE (compare_mode)
4433                              <= HOST_BITS_PER_WIDE_INT)
4434                             && 0 == (STORE_FLAG_VALUE
4435                                      & ((HOST_WIDE_INT) 1
4436                                         << (GET_MODE_BITSIZE (compare_mode) -1))));
4437               op0 = target;
4438               compare_mode = target_mode;
4439             }
4440           else
4441             op0 = subtarget;
4442
4443           /* If we want to keep subexpressions around, don't reuse our
4444              last target.  */
4445
4446           if (preserve_subexpressions_p ())
4447             subtarget = 0;
4448
4449           /* Now normalize to the proper value in COMPARE_MODE.  Sometimes
4450              we don't have to do anything.  */
4451           if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
4452             ;
4453           /* STORE_FLAG_VALUE might be the most negative number, so write
4454              the comparison this way to avoid a compiler-time warning.  */
4455           else if (- normalizep == STORE_FLAG_VALUE)
4456             op0 = expand_unop (compare_mode, neg_optab, op0, subtarget, 0);
4457
4458           /* We don't want to use STORE_FLAG_VALUE < 0 below since this
4459              makes it hard to use a value of just the sign bit due to
4460              ANSI integer constant typing rules.  */
4461           else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_WIDE_INT
4462                    && (STORE_FLAG_VALUE
4463                        & ((HOST_WIDE_INT) 1
4464                           << (GET_MODE_BITSIZE (compare_mode) - 1))))
4465             op0 = expand_shift (RSHIFT_EXPR, compare_mode, op0,
4466                                 size_int (GET_MODE_BITSIZE (compare_mode) - 1),
4467                                 subtarget, normalizep == 1);
4468           else if (STORE_FLAG_VALUE & 1)
4469             {
4470               op0 = expand_and (compare_mode, op0, const1_rtx, subtarget);
4471               if (normalizep == -1)
4472                 op0 = expand_unop (compare_mode, neg_optab, op0, op0, 0);
4473             }
4474           else
4475             abort ();
4476
4477           /* If we were converting to a smaller mode, do the
4478              conversion now.  */
4479           if (target_mode != compare_mode)
4480             {
4481               convert_move (target, op0, 0);
4482               return target;
4483             }
4484           else
4485             return op0;
4486         }
4487     }
4488
4489   delete_insns_since (last);
4490
4491   /* If expensive optimizations, use different pseudo registers for each
4492      insn, instead of reusing the same pseudo.  This leads to better CSE,
4493      but slows down the compiler, since there are more pseudos */
4494   subtarget = (!flag_expensive_optimizations
4495                && (target_mode == mode)) ? target : NULL_RTX;
4496
4497   /* If we reached here, we can't do this with a scc insn.  However, there
4498      are some comparisons that can be done directly.  For example, if
4499      this is an equality comparison of integers, we can try to exclusive-or
4500      (or subtract) the two operands and use a recursive call to try the
4501      comparison with zero.  Don't do any of these cases if branches are
4502      very cheap.  */
4503
4504   if (BRANCH_COST > 0
4505       && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
4506       && op1 != const0_rtx)
4507     {
4508       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
4509                           OPTAB_WIDEN);
4510
4511       if (tem == 0)
4512         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
4513                             OPTAB_WIDEN);
4514       if (tem != 0)
4515         tem = emit_store_flag (target, code, tem, const0_rtx,
4516                                mode, unsignedp, normalizep);
4517       if (tem == 0)
4518         delete_insns_since (last);
4519       return tem;
4520     }
4521
4522   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
4523      the constant zero.  Reject all other comparisons at this point.  Only
4524      do LE and GT if branches are expensive since they are expensive on
4525      2-operand machines.  */
4526
4527   if (BRANCH_COST == 0
4528       || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
4529       || (code != EQ && code != NE
4530           && (BRANCH_COST <= 1 || (code != LE && code != GT))))
4531     return 0;
4532
4533   /* See what we need to return.  We can only return a 1, -1, or the
4534      sign bit.  */
4535
4536   if (normalizep == 0)
4537     {
4538       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
4539         normalizep = STORE_FLAG_VALUE;
4540
4541       else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4542                && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
4543                    == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
4544         ;
4545       else
4546         return 0;
4547     }
4548
4549   /* Try to put the result of the comparison in the sign bit.  Assume we can't
4550      do the necessary operation below.  */
4551
4552   tem = 0;
4553
4554   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
4555      the sign bit set.  */
4556
4557   if (code == LE)
4558     {
4559       /* This is destructive, so SUBTARGET can't be OP0.  */
4560       if (rtx_equal_p (subtarget, op0))
4561         subtarget = 0;
4562
4563       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
4564                           OPTAB_WIDEN);
4565       if (tem)
4566         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
4567                             OPTAB_WIDEN);
4568     }
4569
4570   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
4571      number of bits in the mode of OP0, minus one.  */
4572
4573   if (code == GT)
4574     {
4575       if (rtx_equal_p (subtarget, op0))
4576         subtarget = 0;
4577
4578       tem = expand_shift (RSHIFT_EXPR, mode, op0,
4579                           size_int (GET_MODE_BITSIZE (mode) - 1),
4580                           subtarget, 0);
4581       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
4582                           OPTAB_WIDEN);
4583     }
4584
4585   if (code == EQ || code == NE)
4586     {
4587       /* For EQ or NE, one way to do the comparison is to apply an operation
4588          that converts the operand into a positive number if it is nonzero
4589          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
4590          for NE we negate.  This puts the result in the sign bit.  Then we
4591          normalize with a shift, if needed.
4592
4593          Two operations that can do the above actions are ABS and FFS, so try
4594          them.  If that doesn't work, and MODE is smaller than a full word,
4595          we can use zero-extension to the wider mode (an unsigned conversion)
4596          as the operation.  */
4597
4598       /* Note that ABS doesn't yield a positive number for INT_MIN, but
4599          that is compensated by the subsequent overflow when subtracting
4600          one / negating.  */
4601
4602       if (abs_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
4603         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
4604       else if (ffs_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
4605         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
4606       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
4607         {
4608           op0 = protect_from_queue (op0, 0);
4609           tem = convert_modes (word_mode, mode, op0, 1);
4610           mode = word_mode;
4611         }
4612
4613       if (tem != 0)
4614         {
4615           if (code == EQ)
4616             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
4617                                 0, OPTAB_WIDEN);
4618           else
4619             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
4620         }
4621
4622       /* If we couldn't do it that way, for NE we can "or" the two's complement
4623          of the value with itself.  For EQ, we take the one's complement of
4624          that "or", which is an extra insn, so we only handle EQ if branches
4625          are expensive.  */
4626
4627       if (tem == 0 && (code == NE || BRANCH_COST > 1))
4628         {
4629           if (rtx_equal_p (subtarget, op0))
4630             subtarget = 0;
4631
4632           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
4633           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
4634                               OPTAB_WIDEN);
4635
4636           if (tem && code == EQ)
4637             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
4638         }
4639     }
4640
4641   if (tem && normalizep)
4642     tem = expand_shift (RSHIFT_EXPR, mode, tem,
4643                         size_int (GET_MODE_BITSIZE (mode) - 1),
4644                         subtarget, normalizep == 1);
4645
4646   if (tem)
4647     {
4648       if (GET_MODE (tem) != target_mode)
4649         {
4650           convert_move (target, tem, 0);
4651           tem = target;
4652         }
4653       else if (!subtarget)
4654         {
4655           emit_move_insn (target, tem);
4656           tem = target;
4657         }
4658     }
4659   else
4660     delete_insns_since (last);
4661
4662   return tem;
4663 }
4664
4665 /* Like emit_store_flag, but always succeeds.  */
4666
4667 rtx
4668 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
4669                        enum machine_mode mode, int unsignedp, int normalizep)
4670 {
4671   rtx tem, label;
4672
4673   /* First see if emit_store_flag can do the job.  */
4674   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
4675   if (tem != 0)
4676     return tem;
4677
4678   if (normalizep == 0)
4679     normalizep = 1;
4680
4681   /* If this failed, we have to do this with set/compare/jump/set code.  */
4682
4683   if (GET_CODE (target) != REG
4684       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
4685     target = gen_reg_rtx (GET_MODE (target));
4686
4687   emit_move_insn (target, const1_rtx);
4688   label = gen_label_rtx ();
4689   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
4690                            NULL_RTX, label);
4691
4692   emit_move_insn (target, const0_rtx);
4693   emit_label (label);
4694
4695   return target;
4696 }
4697 \f
4698 /* Perform possibly multi-word comparison and conditional jump to LABEL
4699    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE
4700
4701    The algorithm is based on the code in expr.c:do_jump.
4702
4703    Note that this does not perform a general comparison.  Only variants
4704    generated within expmed.c are correctly handled, others abort (but could
4705    be handled if needed).  */
4706
4707 static void
4708 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
4709                  rtx label)
4710 {
4711   /* If this mode is an integer too wide to compare properly,
4712      compare word by word.  Rely on cse to optimize constant cases.  */
4713
4714   if (GET_MODE_CLASS (mode) == MODE_INT
4715       && ! can_compare_p (op, mode, ccp_jump))
4716     {
4717       rtx label2 = gen_label_rtx ();
4718
4719       switch (op)
4720         {
4721         case LTU:
4722           do_jump_by_parts_greater_rtx (mode, 1, arg2, arg1, label2, label);
4723           break;
4724
4725         case LEU:
4726           do_jump_by_parts_greater_rtx (mode, 1, arg1, arg2, label, label2);
4727           break;
4728
4729         case LT:
4730           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label2, label);
4731           break;
4732
4733         case GT:
4734           do_jump_by_parts_greater_rtx (mode, 0, arg1, arg2, label2, label);
4735           break;
4736
4737         case GE:
4738           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label, label2);
4739           break;
4740
4741           /* do_jump_by_parts_equality_rtx compares with zero.  Luckily
4742              that's the only equality operations we do */
4743         case EQ:
4744           if (arg2 != const0_rtx || mode != GET_MODE(arg1))
4745             abort ();
4746           do_jump_by_parts_equality_rtx (arg1, label2, label);
4747           break;
4748
4749         case NE:
4750           if (arg2 != const0_rtx || mode != GET_MODE(arg1))
4751             abort ();
4752           do_jump_by_parts_equality_rtx (arg1, label, label2);
4753           break;
4754
4755         default:
4756           abort ();
4757         }
4758
4759       emit_label (label2);
4760     }
4761   else
4762     emit_cmp_and_jump_insns (arg1, arg2, op, NULL_RTX, mode, 0, label);
4763 }