gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 2, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING.  If not, write to the Free
  20 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  21 02111-1307, USA.  */
  22
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "coretypes.h"
  27 #include "tm.h"
  28 #include "toplev.h"
  29 #include "rtl.h"
  30 #include "tree.h"
  31 #include "tm_p.h"
  32 #include "flags.h"
  33 #include "insn-config.h"
  34 #include "expr.h"
  35 #include "optabs.h"
  36 #include "real.h"
  37 #include "recog.h"
  38 #include "langhooks.h"
  39
  40 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  41                                    unsigned HOST_WIDE_INT,
  42                                    unsigned HOST_WIDE_INT, rtx);
  43 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  44                                    unsigned HOST_WIDE_INT, rtx);
  45 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  46                                     unsigned HOST_WIDE_INT,
  47                                     unsigned HOST_WIDE_INT,
  48                                     unsigned HOST_WIDE_INT, rtx, int);
  49 static rtx mask_rtx (enum machine_mode, int, int, int);
  50 static rtx lshift_value (enum machine_mode, rtx, int, int);
  51 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  52                                     unsigned HOST_WIDE_INT, int);
  53 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  54 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  55 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  56
  57 /* Nonzero means divides or modulus operations are relatively cheap for
  58    powers of two, so don't use branches; emit the operation instead.
  59    Usually, this will mean that the MD file will emit non-branch
  60    sequences.  */
  61
  62 static bool sdiv_pow2_cheap[NUM_MACHINE_MODES];
  63 static bool smod_pow2_cheap[NUM_MACHINE_MODES];
  64
  65 #ifndef SLOW_UNALIGNED_ACCESS
  66 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  67 #endif
  68
  69 /* For compilers that support multiple targets with different word sizes,
  70    MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
  71    is the H8/300(H) compiler.  */
  72
  73 #ifndef MAX_BITS_PER_WORD
  74 #define MAX_BITS_PER_WORD BITS_PER_WORD
  75 #endif
  76
  77 /* Reduce conditional compilation elsewhere.  */
  78 #ifndef HAVE_insv
  79 #define HAVE_insv       0
  80 #define CODE_FOR_insv   CODE_FOR_nothing
  81 #define gen_insv(a,b,c,d) NULL_RTX
  82 #endif
  83 #ifndef HAVE_extv
  84 #define HAVE_extv       0
  85 #define CODE_FOR_extv   CODE_FOR_nothing
  86 #define gen_extv(a,b,c,d) NULL_RTX
  87 #endif
  88 #ifndef HAVE_extzv
  89 #define HAVE_extzv      0
  90 #define CODE_FOR_extzv  CODE_FOR_nothing
  91 #define gen_extzv(a,b,c,d) NULL_RTX
  92 #endif
  93
  94 /* Cost of various pieces of RTL.  Note that some of these are indexed by
  95    shift count and some by mode.  */
  96 static int zero_cost;
  97 static int add_cost[NUM_MACHINE_MODES];
  98 static int neg_cost[NUM_MACHINE_MODES];
  99 static int shift_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 100 static int shiftadd_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 101 static int shiftsub_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 102 static int mul_cost[NUM_MACHINE_MODES];
 103 static int div_cost[NUM_MACHINE_MODES];
 104 static int mul_widen_cost[NUM_MACHINE_MODES];
 105 static int mul_highpart_cost[NUM_MACHINE_MODES];
 106
 107 void
 108 init_expmed (void)
 109 {
 110   struct
 111   {
 112     struct rtx_def reg;         rtunion reg_fld[2];
 113     struct rtx_def plus;        rtunion plus_fld1;
 114     struct rtx_def neg;
 115     struct rtx_def udiv;        rtunion udiv_fld1;
 116     struct rtx_def mult;        rtunion mult_fld1;
 117     struct rtx_def div;         rtunion div_fld1;
 118     struct rtx_def mod;         rtunion mod_fld1;
 119     struct rtx_def zext;
 120     struct rtx_def wide_mult;   rtunion wide_mult_fld1;
 121     struct rtx_def wide_lshr;   rtunion wide_lshr_fld1;
 122     struct rtx_def wide_trunc;
 123     struct rtx_def shift;       rtunion shift_fld1;
 124     struct rtx_def shift_mult;  rtunion shift_mult_fld1;
 125     struct rtx_def shift_add;   rtunion shift_add_fld1;
 126     struct rtx_def shift_sub;   rtunion shift_sub_fld1;
 127   } all;
 128
 129   rtx pow2[MAX_BITS_PER_WORD];
 130   rtx cint[MAX_BITS_PER_WORD];
 131   int m, n;
 132   enum machine_mode mode, wider_mode;
 133
 134   zero_cost = rtx_cost (const0_rtx, 0);
 135
 136   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 137     {
 138       pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 139       cint[m] = GEN_INT (m);
 140     }
 141
 142   memset (&all, 0, sizeof all);
 143
 144   PUT_CODE (&all.reg, REG);
 145   REGNO (&all.reg) = 10000;
 146
 147   PUT_CODE (&all.plus, PLUS);
 148   XEXP (&all.plus, 0) = &all.reg;
 149   XEXP (&all.plus, 1) = &all.reg;
 150
 151   PUT_CODE (&all.neg, NEG);
 152   XEXP (&all.neg, 0) = &all.reg;
 153
 154   PUT_CODE (&all.udiv, UDIV);
 155   XEXP (&all.udiv, 0) = &all.reg;
 156   XEXP (&all.udiv, 1) = &all.reg;
 157
 158   PUT_CODE (&all.mult, MULT);
 159   XEXP (&all.mult, 0) = &all.reg;
 160   XEXP (&all.mult, 1) = &all.reg;
 161
 162   PUT_CODE (&all.div, DIV);
 163   XEXP (&all.div, 0) = &all.reg;
 164   XEXP (&all.div, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
 165
 166   PUT_CODE (&all.mod, MOD);
 167   XEXP (&all.mod, 0) = &all.reg;
 168   XEXP (&all.mod, 1) = XEXP (&all.div, 1);
 169
 170   PUT_CODE (&all.zext, ZERO_EXTEND);
 171   XEXP (&all.zext, 0) = &all.reg;
 172
 173   PUT_CODE (&all.wide_mult, MULT);
 174   XEXP (&all.wide_mult, 0) = &all.zext;
 175   XEXP (&all.wide_mult, 1) = &all.zext;
 176
 177   PUT_CODE (&all.wide_lshr, LSHIFTRT);
 178   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
 179
 180   PUT_CODE (&all.wide_trunc, TRUNCATE);
 181   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
 182
 183   PUT_CODE (&all.shift, ASHIFT);
 184   XEXP (&all.shift, 0) = &all.reg;
 185
 186   PUT_CODE (&all.shift_mult, MULT);
 187   XEXP (&all.shift_mult, 0) = &all.reg;
 188
 189   PUT_CODE (&all.shift_add, PLUS);
 190   XEXP (&all.shift_add, 0) = &all.shift_mult;
 191   XEXP (&all.shift_add, 1) = &all.reg;
 192
 193   PUT_CODE (&all.shift_sub, MINUS);
 194   XEXP (&all.shift_sub, 0) = &all.shift_mult;
 195   XEXP (&all.shift_sub, 1) = &all.reg;
 196
 197   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 198        mode != VOIDmode;
 199        mode = GET_MODE_WIDER_MODE (mode))
 200     {
 201       PUT_MODE (&all.reg, mode);
 202       PUT_MODE (&all.plus, mode);
 203       PUT_MODE (&all.neg, mode);
 204       PUT_MODE (&all.udiv, mode);
 205       PUT_MODE (&all.mult, mode);
 206       PUT_MODE (&all.div, mode);
 207       PUT_MODE (&all.mod, mode);
 208       PUT_MODE (&all.wide_trunc, mode);
 209       PUT_MODE (&all.shift, mode);
 210       PUT_MODE (&all.shift_mult, mode);
 211       PUT_MODE (&all.shift_add, mode);
 212       PUT_MODE (&all.shift_sub, mode);
 213
 214       add_cost[mode] = rtx_cost (&all.plus, SET);
 215       neg_cost[mode] = rtx_cost (&all.neg, SET);
 216       div_cost[mode] = rtx_cost (&all.udiv, SET);
 217       mul_cost[mode] = rtx_cost (&all.mult, SET);
 218
 219       sdiv_pow2_cheap[mode] = (rtx_cost (&all.div, SET) <= 2 * add_cost[mode]);
 220       smod_pow2_cheap[mode] = (rtx_cost (&all.mod, SET) <= 4 * add_cost[mode]);
 221
 222       wider_mode = GET_MODE_WIDER_MODE (mode);
 223       if (wider_mode != VOIDmode)
 224         {
 225           PUT_MODE (&all.zext, wider_mode);
 226           PUT_MODE (&all.wide_mult, wider_mode);
 227           PUT_MODE (&all.wide_lshr, wider_mode);
 228           XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
 229
 230           mul_widen_cost[wider_mode] = rtx_cost (&all.wide_mult, SET);
 231           mul_highpart_cost[mode] = rtx_cost (&all.wide_trunc, SET);
 232         }
 233
 234       shift_cost[mode][0] = 0;
 235       shiftadd_cost[mode][0] = shiftsub_cost[mode][0] = add_cost[mode];
 236
 237       n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
 238       for (m = 1; m < n; m++)
 239         {
 240           XEXP (&all.shift, 1) = cint[m];
 241           XEXP (&all.shift_mult, 1) = pow2[m];
 242
 243           shift_cost[mode][m] = rtx_cost (&all.shift, SET);
 244           shiftadd_cost[mode][m] = rtx_cost (&all.shift_add, SET);
 245           shiftsub_cost[mode][m] = rtx_cost (&all.shift_sub, SET);
 246         }
 247     }
 248 }
 249
 250 /* Return an rtx representing minus the value of X.
 251    MODE is the intended mode of the result,
 252    useful if X is a CONST_INT.  */
 253
 254 rtx
 255 negate_rtx (enum machine_mode mode, rtx x)
 256 {
 257   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 258
 259   if (result == 0)
 260     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 261
 262   return result;
 263 }
 264
 265 /* Report on the availability of insv/extv/extzv and the desired mode
 266    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
 267    is false; else the mode of the specified operand.  If OPNO is -1,
 268    all the caller cares about is whether the insn is available.  */
 269 enum machine_mode
 270 mode_for_extraction (enum extraction_pattern pattern, int opno)
 271 {
 272   const struct insn_data *data;
 273
 274   switch (pattern)
 275     {
 276     case EP_insv:
 277       if (HAVE_insv)
 278         {
 279           data = &insn_data[CODE_FOR_insv];
 280           break;
 281         }
 282       return MAX_MACHINE_MODE;
 283
 284     case EP_extv:
 285       if (HAVE_extv)
 286         {
 287           data = &insn_data[CODE_FOR_extv];
 288           break;
 289         }
 290       return MAX_MACHINE_MODE;
 291
 292     case EP_extzv:
 293       if (HAVE_extzv)
 294         {
 295           data = &insn_data[CODE_FOR_extzv];
 296           break;
 297         }
 298       return MAX_MACHINE_MODE;
 299
 300     default:
 301       abort ();
 302     }
 303
 304   if (opno == -1)
 305     return VOIDmode;
 306
 307   /* Everyone who uses this function used to follow it with
 308      if (result == VOIDmode) result = word_mode; */
 309   if (data->operand[opno].mode == VOIDmode)
 310     return word_mode;
 311   return data->operand[opno].mode;
 312 }
 313
 314 \f
 315 /* Generate code to store value from rtx VALUE
 316    into a bit-field within structure STR_RTX
 317    containing BITSIZE bits starting at bit BITNUM.
 318    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
 319    ALIGN is the alignment that STR_RTX is known to have.
 320    TOTAL_SIZE is the size of the structure in bytes, or -1 if varying.  */
 321
 322 /* ??? Note that there are two different ideas here for how
 323    to determine the size to count bits within, for a register.
 324    One is BITS_PER_WORD, and the other is the size of operand 3
 325    of the insv pattern.
 326
 327    If operand 3 of the insv pattern is VOIDmode, then we will use BITS_PER_WORD
 328    else, we use the mode of operand 3.  */
 329
 330 rtx
 331 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 332                  unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
 333                  rtx value)
 334 {
 335   unsigned int unit
 336     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
 337   unsigned HOST_WIDE_INT offset = bitnum / unit;
 338   unsigned HOST_WIDE_INT bitpos = bitnum % unit;
 339   rtx op0 = str_rtx;
 340   int byte_offset;
 341
 342   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
 343
 344   while (GET_CODE (op0) == SUBREG)
 345     {
 346       /* The following line once was done only if WORDS_BIG_ENDIAN,
 347          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 348          meaningful at a much higher level; when structures are copied
 349          between memory and regs, the higher-numbered regs
 350          always get higher addresses.  */
 351       offset += (SUBREG_BYTE (op0) / UNITS_PER_WORD);
 352       /* We used to adjust BITPOS here, but now we do the whole adjustment
 353          right after the loop.  */
 354       op0 = SUBREG_REG (op0);
 355     }
 356
 357   /* Use vec_set patterns for inserting parts of vectors whenever
 358      available.  */
 359   if (VECTOR_MODE_P (GET_MODE (op0))
 360       && !MEM_P (op0)
 361       && (vec_set_optab->handlers[GET_MODE (op0)].insn_code
 362           != CODE_FOR_nothing)
 363       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 364       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 365       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 366     {
 367       enum machine_mode outermode = GET_MODE (op0);
 368       enum machine_mode innermode = GET_MODE_INNER (outermode);
 369       int icode = (int) vec_set_optab->handlers[outermode].insn_code;
 370       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 371       rtx rtxpos = GEN_INT (pos);
 372       rtx src = value;
 373       rtx dest = op0;
 374       rtx pat, seq;
 375       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
 376       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
 377       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
 378
 379       start_sequence ();
 380
 381       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
 382         src = copy_to_mode_reg (mode1, src);
 383
 384       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
 385         rtxpos = copy_to_mode_reg (mode1, rtxpos);
 386
 387       /* We could handle this, but we should always be called with a pseudo
 388          for our targets and all insns should take them as outputs.  */
 389       if (! (*insn_data[icode].operand[0].predicate) (dest, mode0)
 390           || ! (*insn_data[icode].operand[1].predicate) (src, mode1)
 391           || ! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
 392         abort ();
 393       pat = GEN_FCN (icode) (dest, src, rtxpos);
 394       seq = get_insns ();
 395       end_sequence ();
 396       if (pat)
 397         {
 398           emit_insn (seq);
 399           emit_insn (pat);
 400           return dest;
 401         }
 402     }
 403
 404   if (flag_force_mem)
 405     {
 406       int old_generating_concat_p = generating_concat_p;
 407       generating_concat_p = 0;
 408       value = force_not_mem (value);
 409       generating_concat_p = old_generating_concat_p;
 410     }
 411
 412   /* If the target is a register, overwriting the entire object, or storing
 413      a full-word or multi-word field can be done with just a SUBREG.
 414
 415      If the target is memory, storing any naturally aligned field can be
 416      done with a simple store.  For targets that support fast unaligned
 417      memory, any naturally sized, unit aligned field can be done directly.  */
 418
 419   byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 420                 + (offset * UNITS_PER_WORD);
 421
 422   if (bitpos == 0
 423       && bitsize == GET_MODE_BITSIZE (fieldmode)
 424       && (!MEM_P (op0)
 425           ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 426              || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
 427              && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
 428           : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 429              || (offset * BITS_PER_UNIT % bitsize == 0
 430                  && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
 431     {
 432       if (GET_MODE (op0) != fieldmode)
 433         {
 434           if (GET_CODE (op0) == SUBREG)
 435             {
 436               if (GET_MODE (SUBREG_REG (op0)) == fieldmode
 437                   || GET_MODE_CLASS (fieldmode) == MODE_INT
 438                   || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT)
 439                 op0 = SUBREG_REG (op0);
 440               else
 441                 /* Else we've got some float mode source being extracted into
 442                    a different float mode destination -- this combination of
 443                    subregs results in Severe Tire Damage.  */
 444                 abort ();
 445             }
 446           if (REG_P (op0))
 447             op0 = gen_rtx_SUBREG (fieldmode, op0, byte_offset);
 448           else
 449             op0 = adjust_address (op0, fieldmode, offset);
 450         }
 451       emit_move_insn (op0, value);
 452       return value;
 453     }
 454
 455   /* Make sure we are playing with integral modes.  Pun with subregs
 456      if we aren't.  This must come after the entire register case above,
 457      since that case is valid for any mode.  The following cases are only
 458      valid for integral modes.  */
 459   {
 460     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 461     if (imode != GET_MODE (op0))
 462       {
 463         if (MEM_P (op0))
 464           op0 = adjust_address (op0, imode, 0);
 465         else if (imode != BLKmode)
 466           op0 = gen_lowpart (imode, op0);
 467         else
 468           abort ();
 469       }
 470   }
 471
 472   /* We may be accessing data outside the field, which means
 473      we can alias adjacent data.  */
 474   if (MEM_P (op0))
 475     {
 476       op0 = shallow_copy_rtx (op0);
 477       set_mem_alias_set (op0, 0);
 478       set_mem_expr (op0, 0);
 479     }
 480
 481   /* If OP0 is a register, BITPOS must count within a word.
 482      But as we have it, it counts within whatever size OP0 now has.
 483      On a bigendian machine, these are not the same, so convert.  */
 484   if (BYTES_BIG_ENDIAN
 485       && !MEM_P (op0)
 486       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 487     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 488
 489   /* Storing an lsb-aligned field in a register
 490      can be done with a movestrict instruction.  */
 491
 492   if (!MEM_P (op0)
 493       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 494       && bitsize == GET_MODE_BITSIZE (fieldmode)
 495       && (movstrict_optab->handlers[fieldmode].insn_code
 496           != CODE_FOR_nothing))
 497     {
 498       int icode = movstrict_optab->handlers[fieldmode].insn_code;
 499
 500       /* Get appropriate low part of the value being stored.  */
 501       if (GET_CODE (value) == CONST_INT || REG_P (value))
 502         value = gen_lowpart (fieldmode, value);
 503       else if (!(GET_CODE (value) == SYMBOL_REF
 504                  || GET_CODE (value) == LABEL_REF
 505                  || GET_CODE (value) == CONST))
 506         value = convert_to_mode (fieldmode, value, 0);
 507
 508       if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
 509         value = copy_to_mode_reg (fieldmode, value);
 510
 511       if (GET_CODE (op0) == SUBREG)
 512         {
 513           if (GET_MODE (SUBREG_REG (op0)) == fieldmode
 514               || GET_MODE_CLASS (fieldmode) == MODE_INT
 515               || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT)
 516             op0 = SUBREG_REG (op0);
 517           else
 518             /* Else we've got some float mode source being extracted into
 519                a different float mode destination -- this combination of
 520                subregs results in Severe Tire Damage.  */
 521             abort ();
 522         }
 523
 524       emit_insn (GEN_FCN (icode)
 525                  (gen_rtx_SUBREG (fieldmode, op0,
 526                                   (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 527                                   + (offset * UNITS_PER_WORD)),
 528                                   value));
 529
 530       return value;
 531     }
 532
 533   /* Handle fields bigger than a word.  */
 534
 535   if (bitsize > BITS_PER_WORD)
 536     {
 537       /* Here we transfer the words of the field
 538          in the order least significant first.
 539          This is because the most significant word is the one which may
 540          be less than full.
 541          However, only do that if the value is not BLKmode.  */
 542
 543       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 544       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 545       unsigned int i;
 546
 547       /* This is the mode we must force value to, so that there will be enough
 548          subwords to extract.  Note that fieldmode will often (always?) be
 549          VOIDmode, because that is what store_field uses to indicate that this
 550          is a bit field, but passing VOIDmode to operand_subword_force will
 551          result in an abort.  */
 552       fieldmode = GET_MODE (value);
 553       if (fieldmode == VOIDmode)
 554         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 555
 556       for (i = 0; i < nwords; i++)
 557         {
 558           /* If I is 0, use the low-order word in both field and target;
 559              if I is 1, use the next to lowest word; and so on.  */
 560           unsigned int wordnum = (backwards ? nwords - i - 1 : i);
 561           unsigned int bit_offset = (backwards
 562                                      ? MAX ((int) bitsize - ((int) i + 1)
 563                                             * BITS_PER_WORD,
 564                                             0)
 565                                      : (int) i * BITS_PER_WORD);
 566
 567           store_bit_field (op0, MIN (BITS_PER_WORD,
 568                                      bitsize - i * BITS_PER_WORD),
 569                            bitnum + bit_offset, word_mode,
 570                            operand_subword_force (value, wordnum, fieldmode));
 571         }
 572       return value;
 573     }
 574
 575   /* From here on we can assume that the field to be stored in is
 576      a full-word (whatever type that is), since it is shorter than a word.  */
 577
 578   /* OFFSET is the number of words or bytes (UNIT says which)
 579      from STR_RTX to the first word or byte containing part of the field.  */
 580
 581   if (!MEM_P (op0))
 582     {
 583       if (offset != 0
 584           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 585         {
 586           if (!REG_P (op0))
 587             {
 588               /* Since this is a destination (lvalue), we can't copy it to a
 589                  pseudo.  We can trivially remove a SUBREG that does not
 590                  change the size of the operand.  Such a SUBREG may have been
 591                  added above.  Otherwise, abort.  */
 592               if (GET_CODE (op0) == SUBREG
 593                   && (GET_MODE_SIZE (GET_MODE (op0))
 594                       == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))))
 595                 op0 = SUBREG_REG (op0);
 596               else
 597                 abort ();
 598             }
 599           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 600                                 op0, (offset * UNITS_PER_WORD));
 601         }
 602       offset = 0;
 603     }
 604
 605   /* If VALUE is a floating-point mode, access it as an integer of the
 606      corresponding size.  This can occur on a machine with 64 bit registers
 607      that uses SFmode for float.  This can also occur for unaligned float
 608      structure fields.  */
 609   if (GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 610       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 611     value = gen_lowpart ((GET_MODE (value) == VOIDmode
 612                           ? word_mode : int_mode_for_mode (GET_MODE (value))),
 613                          value);
 614
 615   /* Now OFFSET is nonzero only if OP0 is memory
 616      and is therefore always measured in bytes.  */
 617
 618   if (HAVE_insv
 619       && GET_MODE (value) != BLKmode
 620       && !(bitsize == 1 && GET_CODE (value) == CONST_INT)
 621       /* Ensure insv's size is wide enough for this field.  */
 622       && (GET_MODE_BITSIZE (op_mode) >= bitsize)
 623       && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
 624             && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))))
 625     {
 626       int xbitpos = bitpos;
 627       rtx value1;
 628       rtx xop0 = op0;
 629       rtx last = get_last_insn ();
 630       rtx pat;
 631       enum machine_mode maxmode = mode_for_extraction (EP_insv, 3);
 632       int save_volatile_ok = volatile_ok;
 633
 634       volatile_ok = 1;
 635
 636       /* If this machine's insv can only insert into a register, copy OP0
 637          into a register and save it back later.  */
 638       /* This used to check flag_force_mem, but that was a serious
 639          de-optimization now that flag_force_mem is enabled by -O2.  */
 640       if (MEM_P (op0)
 641           && ! ((*insn_data[(int) CODE_FOR_insv].operand[0].predicate)
 642                 (op0, VOIDmode)))
 643         {
 644           rtx tempreg;
 645           enum machine_mode bestmode;
 646
 647           /* Get the mode to use for inserting into this field.  If OP0 is
 648              BLKmode, get the smallest mode consistent with the alignment. If
 649              OP0 is a non-BLKmode object that is no wider than MAXMODE, use its
 650              mode. Otherwise, use the smallest mode containing the field.  */
 651
 652           if (GET_MODE (op0) == BLKmode
 653               || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (maxmode))
 654             bestmode
 655               = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0), maxmode,
 656                                MEM_VOLATILE_P (op0));
 657           else
 658             bestmode = GET_MODE (op0);
 659
 660           if (bestmode == VOIDmode
 661               || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
 662                   && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
 663             goto insv_loses;
 664
 665           /* Adjust address to point to the containing unit of that mode.
 666              Compute offset as multiple of this unit, counting in bytes.  */
 667           unit = GET_MODE_BITSIZE (bestmode);
 668           offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 669           bitpos = bitnum % unit;
 670           op0 = adjust_address (op0, bestmode,  offset);
 671
 672           /* Fetch that unit, store the bitfield in it, then store
 673              the unit.  */
 674           tempreg = copy_to_reg (op0);
 675           store_bit_field (tempreg, bitsize, bitpos, fieldmode, value);
 676           emit_move_insn (op0, tempreg);
 677           return value;
 678         }
 679       volatile_ok = save_volatile_ok;
 680
 681       /* Add OFFSET into OP0's address.  */
 682       if (MEM_P (xop0))
 683         xop0 = adjust_address (xop0, byte_mode, offset);
 684
 685       /* If xop0 is a register, we need it in MAXMODE
 686          to make it acceptable to the format of insv.  */
 687       if (GET_CODE (xop0) == SUBREG)
 688         /* We can't just change the mode, because this might clobber op0,
 689            and we will need the original value of op0 if insv fails.  */
 690         xop0 = gen_rtx_SUBREG (maxmode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 691       if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
 692         xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
 693
 694       /* On big-endian machines, we count bits from the most significant.
 695          If the bit field insn does not, we must invert.  */
 696
 697       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 698         xbitpos = unit - bitsize - xbitpos;
 699
 700       /* We have been counting XBITPOS within UNIT.
 701          Count instead within the size of the register.  */
 702       if (BITS_BIG_ENDIAN && !MEM_P (xop0))
 703         xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
 704
 705       unit = GET_MODE_BITSIZE (maxmode);
 706
 707       /* Convert VALUE to maxmode (which insv insn wants) in VALUE1.  */
 708       value1 = value;
 709       if (GET_MODE (value) != maxmode)
 710         {
 711           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 712             {
 713               /* Optimization: Don't bother really extending VALUE
 714                  if it has all the bits we will actually use.  However,
 715                  if we must narrow it, be sure we do it correctly.  */
 716
 717               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (maxmode))
 718                 {
 719                   rtx tmp;
 720
 721                   tmp = simplify_subreg (maxmode, value1, GET_MODE (value), 0);
 722                   if (! tmp)
 723                     tmp = simplify_gen_subreg (maxmode,
 724                                                force_reg (GET_MODE (value),
 725                                                           value1),
 726                                                GET_MODE (value), 0);
 727                   value1 = tmp;
 728                 }
 729               else
 730                 value1 = gen_lowpart (maxmode, value1);
 731             }
 732           else if (GET_CODE (value) == CONST_INT)
 733             value1 = gen_int_mode (INTVAL (value), maxmode);
 734           else if (!CONSTANT_P (value))
 735             /* Parse phase is supposed to make VALUE's data type
 736                match that of the component reference, which is a type
 737                at least as wide as the field; so VALUE should have
 738                a mode that corresponds to that type.  */
 739             abort ();
 740         }
 741
 742       /* If this machine's insv insists on a register,
 743          get VALUE1 into a register.  */
 744       if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
 745              (value1, maxmode)))
 746         value1 = force_reg (maxmode, value1);
 747
 748       pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
 749       if (pat)
 750         emit_insn (pat);
 751       else
 752         {
 753           delete_insns_since (last);
 754           store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 755         }
 756     }
 757   else
 758     insv_loses:
 759     /* Insv is not available; store using shifts and boolean ops.  */
 760     store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 761   return value;
 762 }
 763 \f
 764 /* Use shifts and boolean operations to store VALUE
 765    into a bit field of width BITSIZE
 766    in a memory location specified by OP0 except offset by OFFSET bytes.
 767      (OFFSET must be 0 if OP0 is a register.)
 768    The field starts at position BITPOS within the byte.
 769     (If OP0 is a register, it may be a full word or a narrower mode,
 770      but BITPOS still counts within a full word,
 771      which is significant on bigendian machines.)  */
 772
 773 static void
 774 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
 775                        unsigned HOST_WIDE_INT bitsize,
 776                        unsigned HOST_WIDE_INT bitpos, rtx value)
 777 {
 778   enum machine_mode mode;
 779   unsigned int total_bits = BITS_PER_WORD;
 780   rtx subtarget, temp;
 781   int all_zero = 0;
 782   int all_one = 0;
 783
 784   /* There is a case not handled here:
 785      a structure with a known alignment of just a halfword
 786      and a field split across two aligned halfwords within the structure.
 787      Or likewise a structure with a known alignment of just a byte
 788      and a field split across two bytes.
 789      Such cases are not supposed to be able to occur.  */
 790
 791   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 792     {
 793       if (offset != 0)
 794         abort ();
 795       /* Special treatment for a bit field split across two registers.  */
 796       if (bitsize + bitpos > BITS_PER_WORD)
 797         {
 798           store_split_bit_field (op0, bitsize, bitpos, value);
 799           return;
 800         }
 801     }
 802   else
 803     {
 804       /* Get the proper mode to use for this field.  We want a mode that
 805          includes the entire field.  If such a mode would be larger than
 806          a word, we won't be doing the extraction the normal way.
 807          We don't want a mode bigger than the destination.  */
 808
 809       mode = GET_MODE (op0);
 810       if (GET_MODE_BITSIZE (mode) == 0
 811           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 812         mode = word_mode;
 813       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 814                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 815
 816       if (mode == VOIDmode)
 817         {
 818           /* The only way this should occur is if the field spans word
 819              boundaries.  */
 820           store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
 821                                  value);
 822           return;
 823         }
 824
 825       total_bits = GET_MODE_BITSIZE (mode);
 826
 827       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 828          be in the range 0 to total_bits-1, and put any excess bytes in
 829          OFFSET.  */
 830       if (bitpos >= total_bits)
 831         {
 832           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 833           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 834                      * BITS_PER_UNIT);
 835         }
 836
 837       /* Get ref to an aligned byte, halfword, or word containing the field.
 838          Adjust BITPOS to be position within a word,
 839          and OFFSET to be the offset of that word.
 840          Then alter OP0 to refer to that word.  */
 841       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 842       offset -= (offset % (total_bits / BITS_PER_UNIT));
 843       op0 = adjust_address (op0, mode, offset);
 844     }
 845
 846   mode = GET_MODE (op0);
 847
 848   /* Now MODE is either some integral mode for a MEM as OP0,
 849      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 850      The bit field is contained entirely within OP0.
 851      BITPOS is the starting bit number within OP0.
 852      (OP0's mode may actually be narrower than MODE.)  */
 853
 854   if (BYTES_BIG_ENDIAN)
 855       /* BITPOS is the distance between our msb
 856          and that of the containing datum.
 857          Convert it to the distance from the lsb.  */
 858       bitpos = total_bits - bitsize - bitpos;
 859
 860   /* Now BITPOS is always the distance between our lsb
 861      and that of OP0.  */
 862
 863   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 864      we must first convert its mode to MODE.  */
 865
 866   if (GET_CODE (value) == CONST_INT)
 867     {
 868       HOST_WIDE_INT v = INTVAL (value);
 869
 870       if (bitsize < HOST_BITS_PER_WIDE_INT)
 871         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 872
 873       if (v == 0)
 874         all_zero = 1;
 875       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 876                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 877                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 878         all_one = 1;
 879
 880       value = lshift_value (mode, value, bitpos, bitsize);
 881     }
 882   else
 883     {
 884       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 885                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 886
 887       if (GET_MODE (value) != mode)
 888         {
 889           if ((REG_P (value) || GET_CODE (value) == SUBREG)
 890               && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
 891             value = gen_lowpart (mode, value);
 892           else
 893             value = convert_to_mode (mode, value, 1);
 894         }
 895
 896       if (must_and)
 897         value = expand_binop (mode, and_optab, value,
 898                               mask_rtx (mode, 0, bitsize, 0),
 899                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
 900       if (bitpos > 0)
 901         value = expand_shift (LSHIFT_EXPR, mode, value,
 902                               build_int_cst (NULL_TREE, bitpos), NULL_RTX, 1);
 903     }
 904
 905   /* Now clear the chosen bits in OP0,
 906      except that if VALUE is -1 we need not bother.  */
 907
 908   subtarget = (REG_P (op0) || ! flag_force_mem) ? op0 : 0;
 909
 910   if (! all_one)
 911     {
 912       temp = expand_binop (mode, and_optab, op0,
 913                            mask_rtx (mode, bitpos, bitsize, 1),
 914                            subtarget, 1, OPTAB_LIB_WIDEN);
 915       subtarget = temp;
 916     }
 917   else
 918     temp = op0;
 919
 920   /* Now logical-or VALUE into OP0, unless it is zero.  */
 921
 922   if (! all_zero)
 923     temp = expand_binop (mode, ior_optab, temp, value,
 924                          subtarget, 1, OPTAB_LIB_WIDEN);
 925   if (op0 != temp)
 926     emit_move_insn (op0, temp);
 927 }
 928 \f
 929 /* Store a bit field that is split across multiple accessible memory objects.
 930
 931    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
 932    BITSIZE is the field width; BITPOS the position of its first bit
 933    (within the word).
 934    VALUE is the value to store.
 935
 936    This does not yet handle fields wider than BITS_PER_WORD.  */
 937
 938 static void
 939 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
 940                        unsigned HOST_WIDE_INT bitpos, rtx value)
 941 {
 942   unsigned int unit;
 943   unsigned int bitsdone = 0;
 944
 945   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
 946      much at a time.  */
 947   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 948     unit = BITS_PER_WORD;
 949   else
 950     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
 951
 952   /* If VALUE is a constant other than a CONST_INT, get it into a register in
 953      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
 954      that VALUE might be a floating-point constant.  */
 955   if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
 956     {
 957       rtx word = gen_lowpart_common (word_mode, value);
 958
 959       if (word && (value != word))
 960         value = word;
 961       else
 962         value = gen_lowpart_common (word_mode,
 963                                     force_reg (GET_MODE (value) != VOIDmode
 964                                                ? GET_MODE (value)
 965                                                : word_mode, value));
 966     }
 967
 968   while (bitsdone < bitsize)
 969     {
 970       unsigned HOST_WIDE_INT thissize;
 971       rtx part, word;
 972       unsigned HOST_WIDE_INT thispos;
 973       unsigned HOST_WIDE_INT offset;
 974
 975       offset = (bitpos + bitsdone) / unit;
 976       thispos = (bitpos + bitsdone) % unit;
 977
 978       /* THISSIZE must not overrun a word boundary.  Otherwise,
 979          store_fixed_bit_field will call us again, and we will mutually
 980          recurse forever.  */
 981       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
 982       thissize = MIN (thissize, unit - thispos);
 983
 984       if (BYTES_BIG_ENDIAN)
 985         {
 986           int total_bits;
 987
 988           /* We must do an endian conversion exactly the same way as it is
 989              done in extract_bit_field, so that the two calls to
 990              extract_fixed_bit_field will have comparable arguments.  */
 991           if (!MEM_P (value) || GET_MODE (value) == BLKmode)
 992             total_bits = BITS_PER_WORD;
 993           else
 994             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
 995
 996           /* Fetch successively less significant portions.  */
 997           if (GET_CODE (value) == CONST_INT)
 998             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 999                              >> (bitsize - bitsdone - thissize))
1000                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1001           else
1002             /* The args are chosen so that the last part includes the
1003                lsb.  Give extract_bit_field the value it needs (with
1004                endianness compensation) to fetch the piece we want.  */
1005             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1006                                             total_bits - bitsize + bitsdone,
1007                                             NULL_RTX, 1);
1008         }
1009       else
1010         {
1011           /* Fetch successively more significant portions.  */
1012           if (GET_CODE (value) == CONST_INT)
1013             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1014                              >> bitsdone)
1015                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1016           else
1017             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1018                                             bitsdone, NULL_RTX, 1);
1019         }
1020
1021       /* If OP0 is a register, then handle OFFSET here.
1022
1023          When handling multiword bitfields, extract_bit_field may pass
1024          down a word_mode SUBREG of a larger REG for a bitfield that actually
1025          crosses a word boundary.  Thus, for a SUBREG, we must find
1026          the current word starting from the base register.  */
1027       if (GET_CODE (op0) == SUBREG)
1028         {
1029           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1030           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1031                                         GET_MODE (SUBREG_REG (op0)));
1032           offset = 0;
1033         }
1034       else if (REG_P (op0))
1035         {
1036           word = operand_subword_force (op0, offset, GET_MODE (op0));
1037           offset = 0;
1038         }
1039       else
1040         word = op0;
1041
1042       /* OFFSET is in UNITs, and UNIT is in bits.
1043          store_fixed_bit_field wants offset in bytes.  */
1044       store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1045                              thispos, part);
1046       bitsdone += thissize;
1047     }
1048 }
1049 \f
1050 /* Generate code to extract a byte-field from STR_RTX
1051    containing BITSIZE bits, starting at BITNUM,
1052    and put it in TARGET if possible (if TARGET is nonzero).
1053    Regardless of TARGET, we return the rtx for where the value is placed.
1054
1055    STR_RTX is the structure containing the byte (a REG or MEM).
1056    UNSIGNEDP is nonzero if this is an unsigned bit field.
1057    MODE is the natural mode of the field value once extracted.
1058    TMODE is the mode the caller would like the value to have;
1059    but the value may be returned with type MODE instead.
1060
1061    TOTAL_SIZE is the size in bytes of the containing structure,
1062    or -1 if varying.
1063
1064    If a TARGET is specified and we can store in it at no extra cost,
1065    we do so, and return TARGET.
1066    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1067    if they are equally easy.  */
1068
1069 rtx
1070 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1071                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1072                    enum machine_mode mode, enum machine_mode tmode)
1073 {
1074   unsigned int unit
1075     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1076   unsigned HOST_WIDE_INT offset = bitnum / unit;
1077   unsigned HOST_WIDE_INT bitpos = bitnum % unit;
1078   rtx op0 = str_rtx;
1079   rtx spec_target = target;
1080   rtx spec_target_subreg = 0;
1081   enum machine_mode int_mode;
1082   enum machine_mode extv_mode = mode_for_extraction (EP_extv, 0);
1083   enum machine_mode extzv_mode = mode_for_extraction (EP_extzv, 0);
1084   enum machine_mode mode1;
1085   int byte_offset;
1086
1087   if (tmode == VOIDmode)
1088     tmode = mode;
1089
1090   while (GET_CODE (op0) == SUBREG)
1091     {
1092       bitpos += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1093       if (bitpos > unit)
1094         {
1095           offset += (bitpos / unit);
1096           bitpos %= unit;
1097         }
1098       op0 = SUBREG_REG (op0);
1099     }
1100
1101   if (REG_P (op0)
1102       && mode == GET_MODE (op0)
1103       && bitnum == 0
1104       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1105     {
1106       /* We're trying to extract a full register from itself.  */
1107       return op0;
1108     }
1109
1110   /* Use vec_extract patterns for extracting parts of vectors whenever
1111      available.  */
1112   if (VECTOR_MODE_P (GET_MODE (op0))
1113       && !MEM_P (op0)
1114       && (vec_extract_optab->handlers[GET_MODE (op0)].insn_code
1115           != CODE_FOR_nothing)
1116       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1117           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1118     {
1119       enum machine_mode outermode = GET_MODE (op0);
1120       enum machine_mode innermode = GET_MODE_INNER (outermode);
1121       int icode = (int) vec_extract_optab->handlers[outermode].insn_code;
1122       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1123       rtx rtxpos = GEN_INT (pos);
1124       rtx src = op0;
1125       rtx dest = NULL, pat, seq;
1126       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
1127       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
1128       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
1129
1130       if (innermode == tmode || innermode == mode)
1131         dest = target;
1132
1133       if (!dest)
1134         dest = gen_reg_rtx (innermode);
1135
1136       start_sequence ();
1137
1138       if (! (*insn_data[icode].operand[0].predicate) (dest, mode0))
1139         dest = copy_to_mode_reg (mode0, dest);
1140
1141       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
1142         src = copy_to_mode_reg (mode1, src);
1143
1144       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1145         rtxpos = copy_to_mode_reg (mode1, rtxpos);
1146
1147       /* We could handle this, but we should always be called with a pseudo
1148          for our targets and all insns should take them as outputs.  */
1149       if (! (*insn_data[icode].operand[0].predicate) (dest, mode0)
1150           || ! (*insn_data[icode].operand[1].predicate) (src, mode1)
1151           || ! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1152         abort ();
1153
1154       pat = GEN_FCN (icode) (dest, src, rtxpos);
1155       seq = get_insns ();
1156       end_sequence ();
1157       if (pat)
1158         {
1159           emit_insn (seq);
1160           emit_insn (pat);
1161           return dest;
1162         }
1163     }
1164
1165   /* Make sure we are playing with integral modes.  Pun with subregs
1166      if we aren't.  */
1167   {
1168     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1169     if (imode != GET_MODE (op0))
1170       {
1171         if (MEM_P (op0))
1172           op0 = adjust_address (op0, imode, 0);
1173         else if (imode != BLKmode)
1174           op0 = gen_lowpart (imode, op0);
1175         else
1176           abort ();
1177       }
1178   }
1179
1180   /* We may be accessing data outside the field, which means
1181      we can alias adjacent data.  */
1182   if (MEM_P (op0))
1183     {
1184       op0 = shallow_copy_rtx (op0);
1185       set_mem_alias_set (op0, 0);
1186       set_mem_expr (op0, 0);
1187     }
1188
1189   /* Extraction of a full-word or multi-word value from a structure
1190      in a register or aligned memory can be done with just a SUBREG.
1191      A subword value in the least significant part of a register
1192      can also be extracted with a SUBREG.  For this, we need the
1193      byte offset of the value in op0.  */
1194
1195   byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1196
1197   /* If OP0 is a register, BITPOS must count within a word.
1198      But as we have it, it counts within whatever size OP0 now has.
1199      On a bigendian machine, these are not the same, so convert.  */
1200   if (BYTES_BIG_ENDIAN
1201       && !MEM_P (op0)
1202       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1203     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1204
1205   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1206      If that's wrong, the solution is to test for it and set TARGET to 0
1207      if needed.  */
1208
1209   /* Only scalar integer modes can be converted via subregs.  There is an
1210      additional problem for FP modes here in that they can have a precision
1211      which is different from the size.  mode_for_size uses precision, but
1212      we want a mode based on the size, so we must avoid calling it for FP
1213      modes.  */
1214   mode1  = (SCALAR_INT_MODE_P (tmode)
1215             ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1216             : mode);
1217
1218   if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1219         && bitpos % BITS_PER_WORD == 0)
1220        || (mode1 != BLKmode
1221            /* ??? The big endian test here is wrong.  This is correct
1222               if the value is in a register, and if mode_for_size is not
1223               the same mode as op0.  This causes us to get unnecessarily
1224               inefficient code from the Thumb port when -mbig-endian.  */
1225            && (BYTES_BIG_ENDIAN
1226                ? bitpos + bitsize == BITS_PER_WORD
1227                : bitpos == 0)))
1228       && ((!MEM_P (op0)
1229            && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
1230                                      GET_MODE_BITSIZE (GET_MODE (op0)))
1231            && GET_MODE_SIZE (mode1) != 0
1232            && byte_offset % GET_MODE_SIZE (mode1) == 0)
1233           || (MEM_P (op0)
1234               && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1235                   || (offset * BITS_PER_UNIT % bitsize == 0
1236                       && MEM_ALIGN (op0) % bitsize == 0)))))
1237     {
1238       if (mode1 != GET_MODE (op0))
1239         {
1240           if (GET_CODE (op0) == SUBREG)
1241             {
1242               if (GET_MODE (SUBREG_REG (op0)) == mode1
1243                   || GET_MODE_CLASS (mode1) == MODE_INT
1244                   || GET_MODE_CLASS (mode1) == MODE_PARTIAL_INT)
1245                 op0 = SUBREG_REG (op0);
1246               else
1247                 /* Else we've got some float mode source being extracted into
1248                    a different float mode destination -- this combination of
1249                    subregs results in Severe Tire Damage.  */
1250                 goto no_subreg_mode_swap;
1251             }
1252           if (REG_P (op0))
1253             op0 = gen_rtx_SUBREG (mode1, op0, byte_offset);
1254           else
1255             op0 = adjust_address (op0, mode1, offset);
1256         }
1257       if (mode1 != mode)
1258         return convert_to_mode (tmode, op0, unsignedp);
1259       return op0;
1260     }
1261  no_subreg_mode_swap:
1262
1263   /* Handle fields bigger than a word.  */
1264
1265   if (bitsize > BITS_PER_WORD)
1266     {
1267       /* Here we transfer the words of the field
1268          in the order least significant first.
1269          This is because the most significant word is the one which may
1270          be less than full.  */
1271
1272       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1273       unsigned int i;
1274
1275       if (target == 0 || !REG_P (target))
1276         target = gen_reg_rtx (mode);
1277
1278       /* Indicate for flow that the entire target reg is being set.  */
1279       emit_insn (gen_rtx_CLOBBER (VOIDmode, target));
1280
1281       for (i = 0; i < nwords; i++)
1282         {
1283           /* If I is 0, use the low-order word in both field and target;
1284              if I is 1, use the next to lowest word; and so on.  */
1285           /* Word number in TARGET to use.  */
1286           unsigned int wordnum
1287             = (WORDS_BIG_ENDIAN
1288                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1289                : i);
1290           /* Offset from start of field in OP0.  */
1291           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1292                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1293                                                 * (int) BITS_PER_WORD))
1294                                      : (int) i * BITS_PER_WORD);
1295           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1296           rtx result_part
1297             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1298                                            bitsize - i * BITS_PER_WORD),
1299                                  bitnum + bit_offset, 1, target_part, mode,
1300                                  word_mode);
1301
1302           if (target_part == 0)
1303             abort ();
1304
1305           if (result_part != target_part)
1306             emit_move_insn (target_part, result_part);
1307         }
1308
1309       if (unsignedp)
1310         {
1311           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1312              need to be zero'd out.  */
1313           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1314             {
1315               unsigned int i, total_words;
1316
1317               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1318               for (i = nwords; i < total_words; i++)
1319                 emit_move_insn
1320                   (operand_subword (target,
1321                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1322                                     1, VOIDmode),
1323                    const0_rtx);
1324             }
1325           return target;
1326         }
1327
1328       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1329       target = expand_shift (LSHIFT_EXPR, mode, target,
1330                              build_int_cst (NULL_TREE,
1331                                             GET_MODE_BITSIZE (mode) - bitsize),
1332                              NULL_RTX, 0);
1333       return expand_shift (RSHIFT_EXPR, mode, target,
1334                            build_int_cst (NULL_TREE,
1335                                           GET_MODE_BITSIZE (mode) - bitsize),
1336                            NULL_RTX, 0);
1337     }
1338
1339   /* From here on we know the desired field is smaller than a word.  */
1340
1341   /* Check if there is a correspondingly-sized integer field, so we can
1342      safely extract it as one size of integer, if necessary; then
1343      truncate or extend to the size that is wanted; then use SUBREGs or
1344      convert_to_mode to get one of the modes we really wanted.  */
1345
1346   int_mode = int_mode_for_mode (tmode);
1347   if (int_mode == BLKmode)
1348     int_mode = int_mode_for_mode (mode);
1349   if (int_mode == BLKmode)
1350     abort ();    /* Should probably push op0 out to memory and then
1351                     do a load.  */
1352
1353   /* OFFSET is the number of words or bytes (UNIT says which)
1354      from STR_RTX to the first word or byte containing part of the field.  */
1355
1356   if (!MEM_P (op0))
1357     {
1358       if (offset != 0
1359           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1360         {
1361           if (!REG_P (op0))
1362             op0 = copy_to_reg (op0);
1363           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1364                                 op0, (offset * UNITS_PER_WORD));
1365         }
1366       offset = 0;
1367     }
1368
1369   /* Now OFFSET is nonzero only for memory operands.  */
1370
1371   if (unsignedp)
1372     {
1373       if (HAVE_extzv
1374           && (GET_MODE_BITSIZE (extzv_mode) >= bitsize)
1375           && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1376                 && (bitsize + bitpos > GET_MODE_BITSIZE (extzv_mode))))
1377         {
1378           unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1379           rtx bitsize_rtx, bitpos_rtx;
1380           rtx last = get_last_insn ();
1381           rtx xop0 = op0;
1382           rtx xtarget = target;
1383           rtx xspec_target = spec_target;
1384           rtx xspec_target_subreg = spec_target_subreg;
1385           rtx pat;
1386           enum machine_mode maxmode = mode_for_extraction (EP_extzv, 0);
1387
1388           if (MEM_P (xop0))
1389             {
1390               int save_volatile_ok = volatile_ok;
1391               volatile_ok = 1;
1392
1393               /* Is the memory operand acceptable?  */
1394               if (! ((*insn_data[(int) CODE_FOR_extzv].operand[1].predicate)
1395                      (xop0, GET_MODE (xop0))))
1396                 {
1397                   /* No, load into a reg and extract from there.  */
1398                   enum machine_mode bestmode;
1399
1400                   /* Get the mode to use for inserting into this field.  If
1401                      OP0 is BLKmode, get the smallest mode consistent with the
1402                      alignment. If OP0 is a non-BLKmode object that is no
1403                      wider than MAXMODE, use its mode. Otherwise, use the
1404                      smallest mode containing the field.  */
1405
1406                   if (GET_MODE (xop0) == BLKmode
1407                       || (GET_MODE_SIZE (GET_MODE (op0))
1408                           > GET_MODE_SIZE (maxmode)))
1409                     bestmode = get_best_mode (bitsize, bitnum,
1410                                               MEM_ALIGN (xop0), maxmode,
1411                                               MEM_VOLATILE_P (xop0));
1412                   else
1413                     bestmode = GET_MODE (xop0);
1414
1415                   if (bestmode == VOIDmode
1416                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1417                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1418                     goto extzv_loses;
1419
1420                   /* Compute offset as multiple of this unit,
1421                      counting in bytes.  */
1422                   unit = GET_MODE_BITSIZE (bestmode);
1423                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1424                   xbitpos = bitnum % unit;
1425                   xop0 = adjust_address (xop0, bestmode, xoffset);
1426
1427                   /* Fetch it to a register in that size.  */
1428                   xop0 = force_reg (bestmode, xop0);
1429
1430                   /* XBITPOS counts within UNIT, which is what is expected.  */
1431                 }
1432               else
1433                 /* Get ref to first byte containing part of the field.  */
1434                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1435
1436               volatile_ok = save_volatile_ok;
1437             }
1438
1439           /* If op0 is a register, we need it in MAXMODE (which is usually
1440              SImode). to make it acceptable to the format of extzv.  */
1441           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1442             goto extzv_loses;
1443           if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1444             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1445
1446           /* On big-endian machines, we count bits from the most significant.
1447              If the bit field insn does not, we must invert.  */
1448           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1449             xbitpos = unit - bitsize - xbitpos;
1450
1451           /* Now convert from counting within UNIT to counting in MAXMODE.  */
1452           if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1453             xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
1454
1455           unit = GET_MODE_BITSIZE (maxmode);
1456
1457           if (xtarget == 0
1458               || (flag_force_mem && MEM_P (xtarget)))
1459             xtarget = xspec_target = gen_reg_rtx (tmode);
1460
1461           if (GET_MODE (xtarget) != maxmode)
1462             {
1463               if (REG_P (xtarget))
1464                 {
1465                   int wider = (GET_MODE_SIZE (maxmode)
1466                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1467                   xtarget = gen_lowpart (maxmode, xtarget);
1468                   if (wider)
1469                     xspec_target_subreg = xtarget;
1470                 }
1471               else
1472                 xtarget = gen_reg_rtx (maxmode);
1473             }
1474
1475           /* If this machine's extzv insists on a register target,
1476              make sure we have one.  */
1477           if (! ((*insn_data[(int) CODE_FOR_extzv].operand[0].predicate)
1478                  (xtarget, maxmode)))
1479             xtarget = gen_reg_rtx (maxmode);
1480
1481           bitsize_rtx = GEN_INT (bitsize);
1482           bitpos_rtx = GEN_INT (xbitpos);
1483
1484           pat = gen_extzv (xtarget, xop0, bitsize_rtx, bitpos_rtx);
1485           if (pat)
1486             {
1487               emit_insn (pat);
1488               target = xtarget;
1489               spec_target = xspec_target;
1490               spec_target_subreg = xspec_target_subreg;
1491             }
1492           else
1493             {
1494               delete_insns_since (last);
1495               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1496                                                 bitpos, target, 1);
1497             }
1498         }
1499       else
1500       extzv_loses:
1501         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1502                                           bitpos, target, 1);
1503     }
1504   else
1505     {
1506       if (HAVE_extv
1507           && (GET_MODE_BITSIZE (extv_mode) >= bitsize)
1508           && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1509                 && (bitsize + bitpos > GET_MODE_BITSIZE (extv_mode))))
1510         {
1511           int xbitpos = bitpos, xoffset = offset;
1512           rtx bitsize_rtx, bitpos_rtx;
1513           rtx last = get_last_insn ();
1514           rtx xop0 = op0, xtarget = target;
1515           rtx xspec_target = spec_target;
1516           rtx xspec_target_subreg = spec_target_subreg;
1517           rtx pat;
1518           enum machine_mode maxmode = mode_for_extraction (EP_extv, 0);
1519
1520           if (MEM_P (xop0))
1521             {
1522               /* Is the memory operand acceptable?  */
1523               if (! ((*insn_data[(int) CODE_FOR_extv].operand[1].predicate)
1524                      (xop0, GET_MODE (xop0))))
1525                 {
1526                   /* No, load into a reg and extract from there.  */
1527                   enum machine_mode bestmode;
1528
1529                   /* Get the mode to use for inserting into this field.  If
1530                      OP0 is BLKmode, get the smallest mode consistent with the
1531                      alignment. If OP0 is a non-BLKmode object that is no
1532                      wider than MAXMODE, use its mode. Otherwise, use the
1533                      smallest mode containing the field.  */
1534
1535                   if (GET_MODE (xop0) == BLKmode
1536                       || (GET_MODE_SIZE (GET_MODE (op0))
1537                           > GET_MODE_SIZE (maxmode)))
1538                     bestmode = get_best_mode (bitsize, bitnum,
1539                                               MEM_ALIGN (xop0), maxmode,
1540                                               MEM_VOLATILE_P (xop0));
1541                   else
1542                     bestmode = GET_MODE (xop0);
1543
1544                   if (bestmode == VOIDmode
1545                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1546                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1547                     goto extv_loses;
1548
1549                   /* Compute offset as multiple of this unit,
1550                      counting in bytes.  */
1551                   unit = GET_MODE_BITSIZE (bestmode);
1552                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1553                   xbitpos = bitnum % unit;
1554                   xop0 = adjust_address (xop0, bestmode, xoffset);
1555
1556                   /* Fetch it to a register in that size.  */
1557                   xop0 = force_reg (bestmode, xop0);
1558
1559                   /* XBITPOS counts within UNIT, which is what is expected.  */
1560                 }
1561               else
1562                 /* Get ref to first byte containing part of the field.  */
1563                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1564             }
1565
1566           /* If op0 is a register, we need it in MAXMODE (which is usually
1567              SImode) to make it acceptable to the format of extv.  */
1568           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1569             goto extv_loses;
1570           if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1571             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1572
1573           /* On big-endian machines, we count bits from the most significant.
1574              If the bit field insn does not, we must invert.  */
1575           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1576             xbitpos = unit - bitsize - xbitpos;
1577
1578           /* XBITPOS counts within a size of UNIT.
1579              Adjust to count within a size of MAXMODE.  */
1580           if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1581             xbitpos += (GET_MODE_BITSIZE (maxmode) - unit);
1582
1583           unit = GET_MODE_BITSIZE (maxmode);
1584
1585           if (xtarget == 0
1586               || (flag_force_mem && MEM_P (xtarget)))
1587             xtarget = xspec_target = gen_reg_rtx (tmode);
1588
1589           if (GET_MODE (xtarget) != maxmode)
1590             {
1591               if (REG_P (xtarget))
1592                 {
1593                   int wider = (GET_MODE_SIZE (maxmode)
1594                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1595                   xtarget = gen_lowpart (maxmode, xtarget);
1596                   if (wider)
1597                     xspec_target_subreg = xtarget;
1598                 }
1599               else
1600                 xtarget = gen_reg_rtx (maxmode);
1601             }
1602
1603           /* If this machine's extv insists on a register target,
1604              make sure we have one.  */
1605           if (! ((*insn_data[(int) CODE_FOR_extv].operand[0].predicate)
1606                  (xtarget, maxmode)))
1607             xtarget = gen_reg_rtx (maxmode);
1608
1609           bitsize_rtx = GEN_INT (bitsize);
1610           bitpos_rtx = GEN_INT (xbitpos);
1611
1612           pat = gen_extv (xtarget, xop0, bitsize_rtx, bitpos_rtx);
1613           if (pat)
1614             {
1615               emit_insn (pat);
1616               target = xtarget;
1617               spec_target = xspec_target;
1618               spec_target_subreg = xspec_target_subreg;
1619             }
1620           else
1621             {
1622               delete_insns_since (last);
1623               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1624                                                 bitpos, target, 0);
1625             }
1626         }
1627       else
1628       extv_loses:
1629         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1630                                           bitpos, target, 0);
1631     }
1632   if (target == spec_target)
1633     return target;
1634   if (target == spec_target_subreg)
1635     return spec_target;
1636   if (GET_MODE (target) != tmode && GET_MODE (target) != mode)
1637     {
1638       /* If the target mode is floating-point, first convert to the
1639          integer mode of that size and then access it as a floating-point
1640          value via a SUBREG.  */
1641       if (GET_MODE_CLASS (tmode) != MODE_INT
1642           && GET_MODE_CLASS (tmode) != MODE_PARTIAL_INT)
1643         {
1644           target = convert_to_mode (mode_for_size (GET_MODE_BITSIZE (tmode),
1645                                                    MODE_INT, 0),
1646                                     target, unsignedp);
1647           return gen_lowpart (tmode, target);
1648         }
1649       else
1650         return convert_to_mode (tmode, target, unsignedp);
1651     }
1652   return target;
1653 }
1654 \f
1655 /* Extract a bit field using shifts and boolean operations
1656    Returns an rtx to represent the value.
1657    OP0 addresses a register (word) or memory (byte).
1658    BITPOS says which bit within the word or byte the bit field starts in.
1659    OFFSET says how many bytes farther the bit field starts;
1660     it is 0 if OP0 is a register.
1661    BITSIZE says how many bits long the bit field is.
1662     (If OP0 is a register, it may be narrower than a full word,
1663      but BITPOS still counts within a full word,
1664      which is significant on bigendian machines.)
1665
1666    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1667    If TARGET is nonzero, attempts to store the value there
1668    and return TARGET, but this is not guaranteed.
1669    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1670
1671 static rtx
1672 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1673                          unsigned HOST_WIDE_INT offset,
1674                          unsigned HOST_WIDE_INT bitsize,
1675                          unsigned HOST_WIDE_INT bitpos, rtx target,
1676                          int unsignedp)
1677 {
1678   unsigned int total_bits = BITS_PER_WORD;
1679   enum machine_mode mode;
1680
1681   if (GET_CODE (op0) == SUBREG || REG_P (op0))
1682     {
1683       /* Special treatment for a bit field split across two registers.  */
1684       if (bitsize + bitpos > BITS_PER_WORD)
1685         return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1686     }
1687   else
1688     {
1689       /* Get the proper mode to use for this field.  We want a mode that
1690          includes the entire field.  If such a mode would be larger than
1691          a word, we won't be doing the extraction the normal way.  */
1692
1693       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1694                             MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1695
1696       if (mode == VOIDmode)
1697         /* The only way this should occur is if the field spans word
1698            boundaries.  */
1699         return extract_split_bit_field (op0, bitsize,
1700                                         bitpos + offset * BITS_PER_UNIT,
1701                                         unsignedp);
1702
1703       total_bits = GET_MODE_BITSIZE (mode);
1704
1705       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1706          be in the range 0 to total_bits-1, and put any excess bytes in
1707          OFFSET.  */
1708       if (bitpos >= total_bits)
1709         {
1710           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1711           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1712                      * BITS_PER_UNIT);
1713         }
1714
1715       /* Get ref to an aligned byte, halfword, or word containing the field.
1716          Adjust BITPOS to be position within a word,
1717          and OFFSET to be the offset of that word.
1718          Then alter OP0 to refer to that word.  */
1719       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1720       offset -= (offset % (total_bits / BITS_PER_UNIT));
1721       op0 = adjust_address (op0, mode, offset);
1722     }
1723
1724   mode = GET_MODE (op0);
1725
1726   if (BYTES_BIG_ENDIAN)
1727     /* BITPOS is the distance between our msb and that of OP0.
1728        Convert it to the distance from the lsb.  */
1729     bitpos = total_bits - bitsize - bitpos;
1730
1731   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1732      We have reduced the big-endian case to the little-endian case.  */
1733
1734   if (unsignedp)
1735     {
1736       if (bitpos)
1737         {
1738           /* If the field does not already start at the lsb,
1739              shift it so it does.  */
1740           tree amount = build_int_cst (NULL_TREE, bitpos);
1741           /* Maybe propagate the target for the shift.  */
1742           /* But not if we will return it--could confuse integrate.c.  */
1743           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1744           if (tmode != mode) subtarget = 0;
1745           op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1746         }
1747       /* Convert the value to the desired mode.  */
1748       if (mode != tmode)
1749         op0 = convert_to_mode (tmode, op0, 1);
1750
1751       /* Unless the msb of the field used to be the msb when we shifted,
1752          mask out the upper bits.  */
1753
1754       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1755         return expand_binop (GET_MODE (op0), and_optab, op0,
1756                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1757                              target, 1, OPTAB_LIB_WIDEN);
1758       return op0;
1759     }
1760
1761   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1762      then arithmetic-shift its lsb to the lsb of the word.  */
1763   op0 = force_reg (mode, op0);
1764   if (mode != tmode)
1765     target = 0;
1766
1767   /* Find the narrowest integer mode that contains the field.  */
1768
1769   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1770        mode = GET_MODE_WIDER_MODE (mode))
1771     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1772       {
1773         op0 = convert_to_mode (mode, op0, 0);
1774         break;
1775       }
1776
1777   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1778     {
1779       tree amount
1780         = build_int_cst (NULL_TREE,
1781                          GET_MODE_BITSIZE (mode) - (bitsize + bitpos));
1782       /* Maybe propagate the target for the shift.  */
1783       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1784       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1785     }
1786
1787   return expand_shift (RSHIFT_EXPR, mode, op0,
1788                        build_int_cst (NULL_TREE,
1789                                       GET_MODE_BITSIZE (mode) - bitsize),
1790                        target, 0);
1791 }
1792 \f
1793 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1794    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1795    complement of that if COMPLEMENT.  The mask is truncated if
1796    necessary to the width of mode MODE.  The mask is zero-extended if
1797    BITSIZE+BITPOS is too small for MODE.  */
1798
1799 static rtx
1800 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1801 {
1802   HOST_WIDE_INT masklow, maskhigh;
1803
1804   if (bitsize == 0)
1805     masklow = 0;
1806   else if (bitpos < HOST_BITS_PER_WIDE_INT)
1807     masklow = (HOST_WIDE_INT) -1 << bitpos;
1808   else
1809     masklow = 0;
1810
1811   if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1812     masklow &= ((unsigned HOST_WIDE_INT) -1
1813                 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1814
1815   if (bitpos <= HOST_BITS_PER_WIDE_INT)
1816     maskhigh = -1;
1817   else
1818     maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1819
1820   if (bitsize == 0)
1821     maskhigh = 0;
1822   else if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1823     maskhigh &= ((unsigned HOST_WIDE_INT) -1
1824                  >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1825   else
1826     maskhigh = 0;
1827
1828   if (complement)
1829     {
1830       maskhigh = ~maskhigh;
1831       masklow = ~masklow;
1832     }
1833
1834   return immed_double_const (masklow, maskhigh, mode);
1835 }
1836
1837 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1838    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1839
1840 static rtx
1841 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1842 {
1843   unsigned HOST_WIDE_INT v = INTVAL (value);
1844   HOST_WIDE_INT low, high;
1845
1846   if (bitsize < HOST_BITS_PER_WIDE_INT)
1847     v &= ~((HOST_WIDE_INT) -1 << bitsize);
1848
1849   if (bitpos < HOST_BITS_PER_WIDE_INT)
1850     {
1851       low = v << bitpos;
1852       high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1853     }
1854   else
1855     {
1856       low = 0;
1857       high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1858     }
1859
1860   return immed_double_const (low, high, mode);
1861 }
1862 \f
1863 /* Extract a bit field that is split across two words
1864    and return an RTX for the result.
1865
1866    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1867    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1868    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1869
1870 static rtx
1871 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1872                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1873 {
1874   unsigned int unit;
1875   unsigned int bitsdone = 0;
1876   rtx result = NULL_RTX;
1877   int first = 1;
1878
1879   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1880      much at a time.  */
1881   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1882     unit = BITS_PER_WORD;
1883   else
1884     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1885
1886   while (bitsdone < bitsize)
1887     {
1888       unsigned HOST_WIDE_INT thissize;
1889       rtx part, word;
1890       unsigned HOST_WIDE_INT thispos;
1891       unsigned HOST_WIDE_INT offset;
1892
1893       offset = (bitpos + bitsdone) / unit;
1894       thispos = (bitpos + bitsdone) % unit;
1895
1896       /* THISSIZE must not overrun a word boundary.  Otherwise,
1897          extract_fixed_bit_field will call us again, and we will mutually
1898          recurse forever.  */
1899       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1900       thissize = MIN (thissize, unit - thispos);
1901
1902       /* If OP0 is a register, then handle OFFSET here.
1903
1904          When handling multiword bitfields, extract_bit_field may pass
1905          down a word_mode SUBREG of a larger REG for a bitfield that actually
1906          crosses a word boundary.  Thus, for a SUBREG, we must find
1907          the current word starting from the base register.  */
1908       if (GET_CODE (op0) == SUBREG)
1909         {
1910           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1911           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1912                                         GET_MODE (SUBREG_REG (op0)));
1913           offset = 0;
1914         }
1915       else if (REG_P (op0))
1916         {
1917           word = operand_subword_force (op0, offset, GET_MODE (op0));
1918           offset = 0;
1919         }
1920       else
1921         word = op0;
1922
1923       /* Extract the parts in bit-counting order,
1924          whose meaning is determined by BYTES_PER_UNIT.
1925          OFFSET is in UNITs, and UNIT is in bits.
1926          extract_fixed_bit_field wants offset in bytes.  */
1927       part = extract_fixed_bit_field (word_mode, word,
1928                                       offset * unit / BITS_PER_UNIT,
1929                                       thissize, thispos, 0, 1);
1930       bitsdone += thissize;
1931
1932       /* Shift this part into place for the result.  */
1933       if (BYTES_BIG_ENDIAN)
1934         {
1935           if (bitsize != bitsdone)
1936             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1937                                  build_int_cst (NULL_TREE, bitsize - bitsdone),
1938                                  0, 1);
1939         }
1940       else
1941         {
1942           if (bitsdone != thissize)
1943             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1944                                  build_int_cst (NULL_TREE,
1945                                                 bitsdone - thissize), 0, 1);
1946         }
1947
1948       if (first)
1949         result = part;
1950       else
1951         /* Combine the parts with bitwise or.  This works
1952            because we extracted each part as an unsigned bit field.  */
1953         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1954                                OPTAB_LIB_WIDEN);
1955
1956       first = 0;
1957     }
1958
1959   /* Unsigned bit field: we are done.  */
1960   if (unsignedp)
1961     return result;
1962   /* Signed bit field: sign-extend with two arithmetic shifts.  */
1963   result = expand_shift (LSHIFT_EXPR, word_mode, result,
1964                          build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
1965                          NULL_RTX, 0);
1966   return expand_shift (RSHIFT_EXPR, word_mode, result,
1967                        build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
1968                        NULL_RTX, 0);
1969 }
1970 \f
1971 /* Add INC into TARGET.  */
1972
1973 void
1974 expand_inc (rtx target, rtx inc)
1975 {
1976   rtx value = expand_binop (GET_MODE (target), add_optab,
1977                             target, inc,
1978                             target, 0, OPTAB_LIB_WIDEN);
1979   if (value != target)
1980     emit_move_insn (target, value);
1981 }
1982
1983 /* Subtract DEC from TARGET.  */
1984
1985 void
1986 expand_dec (rtx target, rtx dec)
1987 {
1988   rtx value = expand_binop (GET_MODE (target), sub_optab,
1989                             target, dec,
1990                             target, 0, OPTAB_LIB_WIDEN);
1991   if (value != target)
1992     emit_move_insn (target, value);
1993 }
1994 \f
1995 /* Output a shift instruction for expression code CODE,
1996    with SHIFTED being the rtx for the value to shift,
1997    and AMOUNT the tree for the amount to shift by.
1998    Store the result in the rtx TARGET, if that is convenient.
1999    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2000    Return the rtx for where the value is.  */
2001
2002 rtx
2003 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2004               tree amount, rtx target, int unsignedp)
2005 {
2006   rtx op1, temp = 0;
2007   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2008   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2009   int try;
2010
2011   /* Previously detected shift-counts computed by NEGATE_EXPR
2012      and shifted in the other direction; but that does not work
2013      on all machines.  */
2014
2015   op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0);
2016
2017   if (SHIFT_COUNT_TRUNCATED)
2018     {
2019       if (GET_CODE (op1) == CONST_INT
2020           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2021               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2022         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2023                        % GET_MODE_BITSIZE (mode));
2024       else if (GET_CODE (op1) == SUBREG
2025                && subreg_lowpart_p (op1))
2026         op1 = SUBREG_REG (op1);
2027     }
2028
2029   if (op1 == const0_rtx)
2030     return shifted;
2031
2032   /* Check whether its cheaper to implement a left shift by a constant
2033      bit count by a sequence of additions.  */
2034   if (code == LSHIFT_EXPR
2035       && GET_CODE (op1) == CONST_INT
2036       && INTVAL (op1) > 0
2037       && INTVAL (op1) < GET_MODE_BITSIZE (mode)
2038       && shift_cost[mode][INTVAL (op1)] > INTVAL (op1) * add_cost[mode])
2039     {
2040       int i;
2041       for (i = 0; i < INTVAL (op1); i++)
2042         {
2043           temp = force_reg (mode, shifted);
2044           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2045                                   unsignedp, OPTAB_LIB_WIDEN);
2046         }
2047       return shifted;
2048     }
2049
2050   for (try = 0; temp == 0 && try < 3; try++)
2051     {
2052       enum optab_methods methods;
2053
2054       if (try == 0)
2055         methods = OPTAB_DIRECT;
2056       else if (try == 1)
2057         methods = OPTAB_WIDEN;
2058       else
2059         methods = OPTAB_LIB_WIDEN;
2060
2061       if (rotate)
2062         {
2063           /* Widening does not work for rotation.  */
2064           if (methods == OPTAB_WIDEN)
2065             continue;
2066           else if (methods == OPTAB_LIB_WIDEN)
2067             {
2068               /* If we have been unable to open-code this by a rotation,
2069                  do it as the IOR of two shifts.  I.e., to rotate A
2070                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2071                  where C is the bitsize of A.
2072
2073                  It is theoretically possible that the target machine might
2074                  not be able to perform either shift and hence we would
2075                  be making two libcalls rather than just the one for the
2076                  shift (similarly if IOR could not be done).  We will allow
2077                  this extremely unlikely lossage to avoid complicating the
2078                  code below.  */
2079
2080               rtx subtarget = target == shifted ? 0 : target;
2081               rtx temp1;
2082               tree type = TREE_TYPE (amount);
2083               tree new_amount = make_tree (type, op1);
2084               tree other_amount
2085                 = fold (build2 (MINUS_EXPR, type, convert
2086                                 (type, build_int_cst
2087                                  (NULL_TREE, GET_MODE_BITSIZE (mode))),
2088                                 amount));
2089
2090               shifted = force_reg (mode, shifted);
2091
2092               temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2093                                    mode, shifted, new_amount, subtarget, 1);
2094               temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2095                                     mode, shifted, other_amount, 0, 1);
2096               return expand_binop (mode, ior_optab, temp, temp1, target,
2097                                    unsignedp, methods);
2098             }
2099
2100           temp = expand_binop (mode,
2101                                left ? rotl_optab : rotr_optab,
2102                                shifted, op1, target, unsignedp, methods);
2103
2104           /* If we don't have the rotate, but we are rotating by a constant
2105              that is in range, try a rotate in the opposite direction.  */
2106
2107           if (temp == 0 && GET_CODE (op1) == CONST_INT
2108               && INTVAL (op1) > 0
2109               && (unsigned int) INTVAL (op1) < GET_MODE_BITSIZE (mode))
2110             temp = expand_binop (mode,
2111                                  left ? rotr_optab : rotl_optab,
2112                                  shifted,
2113                                  GEN_INT (GET_MODE_BITSIZE (mode)
2114                                           - INTVAL (op1)),
2115                                  target, unsignedp, methods);
2116         }
2117       else if (unsignedp)
2118         temp = expand_binop (mode,
2119                              left ? ashl_optab : lshr_optab,
2120                              shifted, op1, target, unsignedp, methods);
2121
2122       /* Do arithmetic shifts.
2123          Also, if we are going to widen the operand, we can just as well
2124          use an arithmetic right-shift instead of a logical one.  */
2125       if (temp == 0 && ! rotate
2126           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2127         {
2128           enum optab_methods methods1 = methods;
2129
2130           /* If trying to widen a log shift to an arithmetic shift,
2131              don't accept an arithmetic shift of the same size.  */
2132           if (unsignedp)
2133             methods1 = OPTAB_MUST_WIDEN;
2134
2135           /* Arithmetic shift */
2136
2137           temp = expand_binop (mode,
2138                                left ? ashl_optab : ashr_optab,
2139                                shifted, op1, target, unsignedp, methods1);
2140         }
2141
2142       /* We used to try extzv here for logical right shifts, but that was
2143          only useful for one machine, the VAX, and caused poor code
2144          generation there for lshrdi3, so the code was deleted and a
2145          define_expand for lshrsi3 was added to vax.md.  */
2146     }
2147
2148   if (temp == 0)
2149     abort ();
2150   return temp;
2151 }
2152 \f
2153 enum alg_code { alg_zero, alg_m, alg_shift,
2154                   alg_add_t_m2, alg_sub_t_m2,
2155                   alg_add_factor, alg_sub_factor,
2156                   alg_add_t2_m, alg_sub_t2_m };
2157
2158 /* This structure holds the "cost" of a multiply sequence.  The
2159    "cost" field holds the total rtx_cost of every operator in the
2160    synthetic multiplication sequence, hence cost(a op b) is defined
2161    as rtx_cost(op) + cost(a) + cost(b), where cost(leaf) is zero.
2162    The "latency" field holds the minimum possible latency of the
2163    synthetic multiply, on a hypothetical infinitely parallel CPU.
2164    This is the critical path, or the maximum height, of the expression
2165    tree which is the sum of rtx_costs on the most expensive path from
2166    any leaf to the root.  Hence latency(a op b) is defined as zero for
2167    leaves and rtx_cost(op) + max(latency(a), latency(b)) otherwise.  */
2168
2169 struct mult_cost {
2170   short cost;     /* Total rtx_cost of the multiplication sequence.  */
2171   short latency;  /* The latency of the multiplication sequence.  */
2172 };
2173
2174 /* This macro is used to compare a pointer to a mult_cost against an
2175    single integer "rtx_cost" value.  This is equivalent to the macro
2176    CHEAPER_MULT_COST(X,Z) where Z = {Y,Y}.  */
2177 #define MULT_COST_LESS(X,Y) ((X)->cost < (Y)    \
2178                              || ((X)->cost == (Y) && (X)->latency < (Y)))
2179
2180 /* This macro is used to compare two pointers to mult_costs against
2181    each other.  The macro returns true if X is cheaper than Y.
2182    Currently, the cheaper of two mult_costs is the one with the
2183    lower "cost".  If "cost"s are tied, the lower latency is cheaper.  */
2184 #define CHEAPER_MULT_COST(X,Y)  ((X)->cost < (Y)->cost          \
2185                                  || ((X)->cost == (Y)->cost     \
2186                                      && (X)->latency < (Y)->latency))
2187
2188 /* This structure records a sequence of operations.
2189    `ops' is the number of operations recorded.
2190    `cost' is their total cost.
2191    The operations are stored in `op' and the corresponding
2192    logarithms of the integer coefficients in `log'.
2193
2194    These are the operations:
2195    alg_zero             total := 0;
2196    alg_m                total := multiplicand;
2197    alg_shift            total := total * coeff
2198    alg_add_t_m2         total := total + multiplicand * coeff;
2199    alg_sub_t_m2         total := total - multiplicand * coeff;
2200    alg_add_factor       total := total * coeff + total;
2201    alg_sub_factor       total := total * coeff - total;
2202    alg_add_t2_m         total := total * coeff + multiplicand;
2203    alg_sub_t2_m         total := total * coeff - multiplicand;
2204
2205    The first operand must be either alg_zero or alg_m.  */
2206
2207 struct algorithm
2208 {
2209   struct mult_cost cost;
2210   short ops;
2211   /* The size of the OP and LOG fields are not directly related to the
2212      word size, but the worst-case algorithms will be if we have few
2213      consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2214      In that case we will generate shift-by-2, add, shift-by-2, add,...,
2215      in total wordsize operations.  */
2216   enum alg_code op[MAX_BITS_PER_WORD];
2217   char log[MAX_BITS_PER_WORD];
2218 };
2219
2220 /* Indicates the type of fixup needed after a constant multiplication.
2221    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2222    the result should be negated, and ADD_VARIANT means that the
2223    multiplicand should be added to the result.  */
2224 enum mult_variant {basic_variant, negate_variant, add_variant};
2225
2226 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2227                         const struct mult_cost *, enum machine_mode mode);
2228 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2229                                  struct algorithm *, enum mult_variant *, int);
2230 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2231                               const struct algorithm *, enum mult_variant);
2232 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2233                                                  int, unsigned HOST_WIDE_INT *,
2234                                                  int *, int *);
2235 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2236 static rtx extract_high_half (enum machine_mode, rtx);
2237 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2238                                        int, int);
2239 /* Compute and return the best algorithm for multiplying by T.
2240    The algorithm must cost less than cost_limit
2241    If retval.cost >= COST_LIMIT, no algorithm was found and all
2242    other field of the returned struct are undefined.
2243    MODE is the machine mode of the multiplication.  */
2244
2245 static void
2246 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2247             const struct mult_cost *cost_limit, enum machine_mode mode)
2248 {
2249   int m;
2250   struct algorithm *alg_in, *best_alg;
2251   struct mult_cost best_cost;
2252   struct mult_cost new_limit;
2253   int op_cost, op_latency;
2254   unsigned HOST_WIDE_INT q;
2255   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2256
2257   /* Indicate that no algorithm is yet found.  If no algorithm
2258      is found, this value will be returned and indicate failure.  */
2259   alg_out->cost.cost = cost_limit->cost + 1;
2260
2261   if (cost_limit->cost < 0
2262       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2263     return;
2264
2265   /* Restrict the bits of "t" to the multiplication's mode.  */
2266   t &= GET_MODE_MASK (mode);
2267
2268   /* t == 1 can be done in zero cost.  */
2269   if (t == 1)
2270     {
2271       alg_out->ops = 1;
2272       alg_out->cost.cost = 0;
2273       alg_out->cost.latency = 0;
2274       alg_out->op[0] = alg_m;
2275       return;
2276     }
2277
2278   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2279      fail now.  */
2280   if (t == 0)
2281     {
2282       if (MULT_COST_LESS (cost_limit, zero_cost))
2283         return;
2284       else
2285         {
2286           alg_out->ops = 1;
2287           alg_out->cost.cost = zero_cost;
2288           alg_out->cost.latency = zero_cost;
2289           alg_out->op[0] = alg_zero;
2290           return;
2291         }
2292     }
2293
2294   /* We'll be needing a couple extra algorithm structures now.  */
2295
2296   alg_in = alloca (sizeof (struct algorithm));
2297   best_alg = alloca (sizeof (struct algorithm));
2298   best_cost = *cost_limit;
2299
2300   /* If we have a group of zero bits at the low-order part of T, try
2301      multiplying by the remaining bits and then doing a shift.  */
2302
2303   if ((t & 1) == 0)
2304     {
2305       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2306       if (m < maxm)
2307         {
2308           q = t >> m;
2309           /* The function expand_shift will choose between a shift and
2310              a sequence of additions, so the observed cost is given as
2311              MIN (m * add_cost[mode], shift_cost[mode][m]).  */
2312           op_cost = m * add_cost[mode];
2313           if (shift_cost[mode][m] < op_cost)
2314             op_cost = shift_cost[mode][m];
2315           new_limit.cost = best_cost.cost - op_cost;
2316           new_limit.latency = best_cost.latency - op_cost;
2317           synth_mult (alg_in, q, &new_limit, mode);
2318
2319           alg_in->cost.cost += op_cost;
2320           alg_in->cost.latency += op_cost;
2321           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2322             {
2323               struct algorithm *x;
2324               best_cost = alg_in->cost;
2325               x = alg_in, alg_in = best_alg, best_alg = x;
2326               best_alg->log[best_alg->ops] = m;
2327               best_alg->op[best_alg->ops] = alg_shift;
2328             }
2329         }
2330     }
2331
2332   /* If we have an odd number, add or subtract one.  */
2333   if ((t & 1) != 0)
2334     {
2335       unsigned HOST_WIDE_INT w;
2336
2337       for (w = 1; (w & t) != 0; w <<= 1)
2338         ;
2339       /* If T was -1, then W will be zero after the loop.  This is another
2340          case where T ends with ...111.  Handling this with (T + 1) and
2341          subtract 1 produces slightly better code and results in algorithm
2342          selection much faster than treating it like the ...0111 case
2343          below.  */
2344       if (w == 0
2345           || (w > 2
2346               /* Reject the case where t is 3.
2347                  Thus we prefer addition in that case.  */
2348               && t != 3))
2349         {
2350           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2351
2352           op_cost = add_cost[mode];
2353           new_limit.cost = best_cost.cost - op_cost;
2354           new_limit.latency = best_cost.latency - op_cost;
2355           synth_mult (alg_in, t + 1, &new_limit, mode);
2356
2357           alg_in->cost.cost += op_cost;
2358           alg_in->cost.latency += op_cost;
2359           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2360             {
2361               struct algorithm *x;
2362               best_cost = alg_in->cost;
2363               x = alg_in, alg_in = best_alg, best_alg = x;
2364               best_alg->log[best_alg->ops] = 0;
2365               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2366             }
2367         }
2368       else
2369         {
2370           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2371
2372           op_cost = add_cost[mode];
2373           new_limit.cost = best_cost.cost - op_cost;
2374           new_limit.latency = best_cost.latency - op_cost;
2375           synth_mult (alg_in, t - 1, &new_limit, mode);
2376
2377           alg_in->cost.cost += op_cost;
2378           alg_in->cost.latency += op_cost;
2379           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2380             {
2381               struct algorithm *x;
2382               best_cost = alg_in->cost;
2383               x = alg_in, alg_in = best_alg, best_alg = x;
2384               best_alg->log[best_alg->ops] = 0;
2385               best_alg->op[best_alg->ops] = alg_add_t_m2;
2386             }
2387         }
2388     }
2389
2390   /* Look for factors of t of the form
2391      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2392      If we find such a factor, we can multiply by t using an algorithm that
2393      multiplies by q, shift the result by m and add/subtract it to itself.
2394
2395      We search for large factors first and loop down, even if large factors
2396      are less probable than small; if we find a large factor we will find a
2397      good sequence quickly, and therefore be able to prune (by decreasing
2398      COST_LIMIT) the search.  */
2399
2400   for (m = floor_log2 (t - 1); m >= 2; m--)
2401     {
2402       unsigned HOST_WIDE_INT d;
2403
2404       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2405       if (t % d == 0 && t > d && m < maxm)
2406         {
2407           /* If the target has a cheap shift-and-add instruction use
2408              that in preference to a shift insn followed by an add insn.
2409              Assume that the shift-and-add is "atomic" with a latency
2410              equal to it's cost, otherwise assume that on superscalar
2411              hardware the shift may be executed concurrently with the
2412              earlier steps in the algorithm.  */
2413           op_cost = add_cost[mode] + shift_cost[mode][m];
2414           if (shiftadd_cost[mode][m] < op_cost)
2415             {
2416               op_cost = shiftadd_cost[mode][m];
2417               op_latency = op_cost;
2418             }
2419           else
2420             op_latency = add_cost[mode];
2421
2422           new_limit.cost = best_cost.cost - op_cost;
2423           new_limit.latency = best_cost.latency - op_latency;
2424           synth_mult (alg_in, t / d, &new_limit, mode);
2425
2426           alg_in->cost.cost += op_cost;
2427           alg_in->cost.latency += op_latency;
2428           if (alg_in->cost.latency < op_cost)
2429             alg_in->cost.latency = op_cost;
2430           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2431             {
2432               struct algorithm *x;
2433               best_cost = alg_in->cost;
2434               x = alg_in, alg_in = best_alg, best_alg = x;
2435               best_alg->log[best_alg->ops] = m;
2436               best_alg->op[best_alg->ops] = alg_add_factor;
2437             }
2438           /* Other factors will have been taken care of in the recursion.  */
2439           break;
2440         }
2441
2442       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2443       if (t % d == 0 && t > d && m < maxm)
2444         {
2445           /* If the target has a cheap shift-and-subtract insn use
2446              that in preference to a shift insn followed by a sub insn.
2447              Assume that the shift-and-sub is "atomic" with a latency
2448              equal to it's cost, otherwise assume that on superscalar
2449              hardware the shift may be executed concurrently with the
2450              earlier steps in the algorithm.  */
2451           op_cost = add_cost[mode] + shift_cost[mode][m];
2452           if (shiftsub_cost[mode][m] < op_cost)
2453             {
2454               op_cost = shiftsub_cost[mode][m];
2455               op_latency = op_cost;
2456             }
2457           else
2458             op_latency = add_cost[mode];
2459
2460           new_limit.cost = best_cost.cost - op_cost;
2461           new_limit.cost = best_cost.cost - op_latency;
2462           synth_mult (alg_in, t / d, &new_limit, mode);
2463
2464           alg_in->cost.cost += op_cost;
2465           alg_in->cost.latency += op_latency;
2466           if (alg_in->cost.latency < op_cost)
2467             alg_in->cost.latency = op_cost;
2468           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2469             {
2470               struct algorithm *x;
2471               best_cost = alg_in->cost;
2472               x = alg_in, alg_in = best_alg, best_alg = x;
2473               best_alg->log[best_alg->ops] = m;
2474               best_alg->op[best_alg->ops] = alg_sub_factor;
2475             }
2476           break;
2477         }
2478     }
2479
2480   /* Try shift-and-add (load effective address) instructions,
2481      i.e. do a*3, a*5, a*9.  */
2482   if ((t & 1) != 0)
2483     {
2484       q = t - 1;
2485       q = q & -q;
2486       m = exact_log2 (q);
2487       if (m >= 0 && m < maxm)
2488         {
2489           op_cost = shiftadd_cost[mode][m];
2490           new_limit.cost = best_cost.cost - op_cost;
2491           new_limit.latency = best_cost.latency - op_cost;
2492           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2493
2494           alg_in->cost.cost += op_cost;
2495           alg_in->cost.latency += op_cost;
2496           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2497             {
2498               struct algorithm *x;
2499               best_cost = alg_in->cost;
2500               x = alg_in, alg_in = best_alg, best_alg = x;
2501               best_alg->log[best_alg->ops] = m;
2502               best_alg->op[best_alg->ops] = alg_add_t2_m;
2503             }
2504         }
2505
2506       q = t + 1;
2507       q = q & -q;
2508       m = exact_log2 (q);
2509       if (m >= 0 && m < maxm)
2510         {
2511           op_cost = shiftsub_cost[mode][m];
2512           new_limit.cost = best_cost.cost - op_cost;
2513           new_limit.latency = best_cost.latency - op_cost;
2514           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2515
2516           alg_in->cost.cost += op_cost;
2517           alg_in->cost.latency += op_cost;
2518           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2519             {
2520               struct algorithm *x;
2521               best_cost = alg_in->cost;
2522               x = alg_in, alg_in = best_alg, best_alg = x;
2523               best_alg->log[best_alg->ops] = m;
2524               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2525             }
2526         }
2527     }
2528
2529   /* If we are getting a too long sequence for `struct algorithm'
2530      to record, make this search fail.  */
2531   if (best_alg->ops == MAX_BITS_PER_WORD)
2532     return;
2533
2534   /* If best_cost has not decreased, we have not found any algorithm.  */
2535   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2536     return;
2537
2538   /* Copy the algorithm from temporary space to the space at alg_out.
2539      We avoid using structure assignment because the majority of
2540      best_alg is normally undefined, and this is a critical function.  */
2541   alg_out->ops = best_alg->ops + 1;
2542   alg_out->cost = best_cost;
2543   memcpy (alg_out->op, best_alg->op,
2544           alg_out->ops * sizeof *alg_out->op);
2545   memcpy (alg_out->log, best_alg->log,
2546           alg_out->ops * sizeof *alg_out->log);
2547 }
2548 \f
2549 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2550    Try three variations:
2551
2552        - a shift/add sequence based on VAL itself
2553        - a shift/add sequence based on -VAL, followed by a negation
2554        - a shift/add sequence based on VAL - 1, followed by an addition.
2555
2556    Return true if the cheapest of these cost less than MULT_COST,
2557    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2558
2559 static bool
2560 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2561                      struct algorithm *alg, enum mult_variant *variant,
2562                      int mult_cost)
2563 {
2564   struct algorithm alg2;
2565   struct mult_cost limit;
2566   int op_cost;
2567
2568   *variant = basic_variant;
2569   limit.cost = mult_cost;
2570   limit.latency = mult_cost;
2571   synth_mult (alg, val, &limit, mode);
2572
2573   /* This works only if the inverted value actually fits in an
2574      `unsigned int' */
2575   if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2576     {
2577       op_cost = neg_cost[mode];
2578       if (MULT_COST_LESS (&alg->cost, mult_cost))
2579         {
2580           limit.cost = alg->cost.cost - op_cost;
2581           limit.latency = alg->cost.latency - op_cost;
2582         }
2583       else
2584         {
2585           limit.cost = mult_cost - op_cost;
2586           limit.latency = mult_cost - op_cost;
2587         }
2588
2589       synth_mult (&alg2, -val, &limit, mode);
2590       alg2.cost.cost += op_cost;
2591       alg2.cost.latency += op_cost;
2592       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2593         *alg = alg2, *variant = negate_variant;
2594     }
2595
2596   /* This proves very useful for division-by-constant.  */
2597   op_cost = add_cost[mode];
2598   if (MULT_COST_LESS (&alg->cost, mult_cost))
2599     {
2600       limit.cost = alg->cost.cost - op_cost;
2601       limit.latency = alg->cost.latency - op_cost;
2602     }
2603   else
2604     {
2605       limit.cost = mult_cost - op_cost;
2606       limit.latency = mult_cost - op_cost;
2607     }
2608
2609   synth_mult (&alg2, val - 1, &limit, mode);
2610   alg2.cost.cost += op_cost;
2611   alg2.cost.latency += op_cost;
2612   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2613     *alg = alg2, *variant = add_variant;
2614
2615   return MULT_COST_LESS (&alg->cost, mult_cost);
2616 }
2617
2618 /* A subroutine of expand_mult, used for constant multiplications.
2619    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2620    convenient.  Use the shift/add sequence described by ALG and apply
2621    the final fixup specified by VARIANT.  */
2622
2623 static rtx
2624 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2625                    rtx target, const struct algorithm *alg,
2626                    enum mult_variant variant)
2627 {
2628   HOST_WIDE_INT val_so_far;
2629   rtx insn, accum, tem;
2630   int opno;
2631   enum machine_mode nmode;
2632
2633   /* Avoid referencing memory over and over.
2634      For speed, but also for correctness when mem is volatile.  */
2635   if (MEM_P (op0))
2636     op0 = force_reg (mode, op0);
2637
2638   /* ACCUM starts out either as OP0 or as a zero, depending on
2639      the first operation.  */
2640
2641   if (alg->op[0] == alg_zero)
2642     {
2643       accum = copy_to_mode_reg (mode, const0_rtx);
2644       val_so_far = 0;
2645     }
2646   else if (alg->op[0] == alg_m)
2647     {
2648       accum = copy_to_mode_reg (mode, op0);
2649       val_so_far = 1;
2650     }
2651   else
2652     abort ();
2653
2654   for (opno = 1; opno < alg->ops; opno++)
2655     {
2656       int log = alg->log[opno];
2657       rtx shift_subtarget = optimize ? 0 : accum;
2658       rtx add_target
2659         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2660            && !optimize)
2661           ? target : 0;
2662       rtx accum_target = optimize ? 0 : accum;
2663
2664       switch (alg->op[opno])
2665         {
2666         case alg_shift:
2667           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2668                                 build_int_cst (NULL_TREE, log),
2669                                 NULL_RTX, 0);
2670           val_so_far <<= log;
2671           break;
2672
2673         case alg_add_t_m2:
2674           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2675                               build_int_cst (NULL_TREE, log),
2676                               NULL_RTX, 0);
2677           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2678                                  add_target ? add_target : accum_target);
2679           val_so_far += (HOST_WIDE_INT) 1 << log;
2680           break;
2681
2682         case alg_sub_t_m2:
2683           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2684                               build_int_cst (NULL_TREE, log),
2685                               NULL_RTX, 0);
2686           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2687                                  add_target ? add_target : accum_target);
2688           val_so_far -= (HOST_WIDE_INT) 1 << log;
2689           break;
2690
2691         case alg_add_t2_m:
2692           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2693                                 build_int_cst (NULL_TREE, log),
2694                                 shift_subtarget,
2695                                 0);
2696           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2697                                  add_target ? add_target : accum_target);
2698           val_so_far = (val_so_far << log) + 1;
2699           break;
2700
2701         case alg_sub_t2_m:
2702           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2703                                 build_int_cst (NULL_TREE, log),
2704                                 shift_subtarget, 0);
2705           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2706                                  add_target ? add_target : accum_target);
2707           val_so_far = (val_so_far << log) - 1;
2708           break;
2709
2710         case alg_add_factor:
2711           tem = expand_shift (LSHIFT_EXPR, mode, accum,
2712                               build_int_cst (NULL_TREE, log),
2713                               NULL_RTX, 0);
2714           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2715                                  add_target ? add_target : accum_target);
2716           val_so_far += val_so_far << log;
2717           break;
2718
2719         case alg_sub_factor:
2720           tem = expand_shift (LSHIFT_EXPR, mode, accum,
2721                               build_int_cst (NULL_TREE, log),
2722                               NULL_RTX, 0);
2723           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2724                                  (add_target
2725                                   ? add_target : (optimize ? 0 : tem)));
2726           val_so_far = (val_so_far << log) - val_so_far;
2727           break;
2728
2729         default:
2730           abort ();
2731         }
2732
2733       /* Write a REG_EQUAL note on the last insn so that we can cse
2734          multiplication sequences.  Note that if ACCUM is a SUBREG,
2735          we've set the inner register and must properly indicate
2736          that.  */
2737
2738       tem = op0, nmode = mode;
2739       if (GET_CODE (accum) == SUBREG)
2740         {
2741           nmode = GET_MODE (SUBREG_REG (accum));
2742           tem = gen_lowpart (nmode, op0);
2743         }
2744
2745       insn = get_last_insn ();
2746       set_unique_reg_note (insn, REG_EQUAL,
2747                            gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)));
2748     }
2749
2750   if (variant == negate_variant)
2751     {
2752       val_so_far = -val_so_far;
2753       accum = expand_unop (mode, neg_optab, accum, target, 0);
2754     }
2755   else if (variant == add_variant)
2756     {
2757       val_so_far = val_so_far + 1;
2758       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
2759     }
2760
2761   /* Compare only the bits of val and val_so_far that are significant
2762      in the result mode, to avoid sign-/zero-extension confusion.  */
2763   val &= GET_MODE_MASK (mode);
2764   val_so_far &= GET_MODE_MASK (mode);
2765   if (val != val_so_far)
2766     abort ();
2767
2768   return accum;
2769 }
2770
2771 /* Perform a multiplication and return an rtx for the result.
2772    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
2773    TARGET is a suggestion for where to store the result (an rtx).
2774
2775    We check specially for a constant integer as OP1.
2776    If you want this check for OP0 as well, then before calling
2777    you should swap the two operands if OP0 would be constant.  */
2778
2779 rtx
2780 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
2781              int unsignedp)
2782 {
2783   rtx const_op1 = op1;
2784   enum mult_variant variant;
2785   struct algorithm algorithm;
2786
2787   /* synth_mult does an `unsigned int' multiply.  As long as the mode is
2788      less than or equal in size to `unsigned int' this doesn't matter.
2789      If the mode is larger than `unsigned int', then synth_mult works only
2790      if the constant value exactly fits in an `unsigned int' without any
2791      truncation.  This means that multiplying by negative values does
2792      not work; results are off by 2^32 on a 32 bit machine.  */
2793
2794   /* If we are multiplying in DImode, it may still be a win
2795      to try to work with shifts and adds.  */
2796   if (GET_CODE (op1) == CONST_DOUBLE
2797       && GET_MODE_CLASS (GET_MODE (op1)) == MODE_INT
2798       && HOST_BITS_PER_INT >= BITS_PER_WORD
2799       && CONST_DOUBLE_HIGH (op1) == 0)
2800     const_op1 = GEN_INT (CONST_DOUBLE_LOW (op1));
2801   else if (HOST_BITS_PER_INT < GET_MODE_BITSIZE (mode)
2802            && GET_CODE (op1) == CONST_INT
2803            && INTVAL (op1) < 0)
2804     const_op1 = 0;
2805
2806   /* We used to test optimize here, on the grounds that it's better to
2807      produce a smaller program when -O is not used.
2808      But this causes such a terrible slowdown sometimes
2809      that it seems better to use synth_mult always.  */
2810
2811   if (const_op1 && GET_CODE (const_op1) == CONST_INT
2812       && (unsignedp || !flag_trapv))
2813     {
2814       int mult_cost = rtx_cost (gen_rtx_MULT (mode, op0, op1), SET);
2815
2816       if (choose_mult_variant (mode, INTVAL (const_op1), &algorithm, &variant,
2817                                mult_cost))
2818         return expand_mult_const (mode, op0, INTVAL (const_op1), target,
2819                                   &algorithm, variant);
2820     }
2821
2822   if (GET_CODE (op0) == CONST_DOUBLE)
2823     {
2824       rtx temp = op0;
2825       op0 = op1;
2826       op1 = temp;
2827     }
2828
2829   /* Expand x*2.0 as x+x.  */
2830   if (GET_CODE (op1) == CONST_DOUBLE
2831       && GET_MODE_CLASS (mode) == MODE_FLOAT)
2832     {
2833       REAL_VALUE_TYPE d;
2834       REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
2835
2836       if (REAL_VALUES_EQUAL (d, dconst2))
2837         {
2838           op0 = force_reg (GET_MODE (op0), op0);
2839           return expand_binop (mode, add_optab, op0, op0,
2840                                target, unsignedp, OPTAB_LIB_WIDEN);
2841         }
2842     }
2843
2844   /* This used to use umul_optab if unsigned, but for non-widening multiply
2845      there is no difference between signed and unsigned.  */
2846   op0 = expand_binop (mode,
2847                       ! unsignedp
2848                       && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
2849                       ? smulv_optab : smul_optab,
2850                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
2851   if (op0 == 0)
2852     abort ();
2853   return op0;
2854 }
2855 \f
2856 /* Return the smallest n such that 2**n >= X.  */
2857
2858 int
2859 ceil_log2 (unsigned HOST_WIDE_INT x)
2860 {
2861   return floor_log2 (x - 1) + 1;
2862 }
2863
2864 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
2865    replace division by D, and put the least significant N bits of the result
2866    in *MULTIPLIER_PTR and return the most significant bit.
2867
2868    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
2869    needed precision is in PRECISION (should be <= N).
2870
2871    PRECISION should be as small as possible so this function can choose
2872    multiplier more freely.
2873
2874    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
2875    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
2876
2877    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
2878    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
2879
2880 static
2881 unsigned HOST_WIDE_INT
2882 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
2883                    unsigned HOST_WIDE_INT *multiplier_ptr,
2884                    int *post_shift_ptr, int *lgup_ptr)
2885 {
2886   HOST_WIDE_INT mhigh_hi, mlow_hi;
2887   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
2888   int lgup, post_shift;
2889   int pow, pow2;
2890   unsigned HOST_WIDE_INT nl, dummy1;
2891   HOST_WIDE_INT nh, dummy2;
2892
2893   /* lgup = ceil(log2(divisor)); */
2894   lgup = ceil_log2 (d);
2895
2896   if (lgup > n)
2897     abort ();
2898
2899   pow = n + lgup;
2900   pow2 = n + lgup - precision;
2901
2902   if (pow == 2 * HOST_BITS_PER_WIDE_INT)
2903     {
2904       /* We could handle this with some effort, but this case is much better
2905          handled directly with a scc insn, so rely on caller using that.  */
2906       abort ();
2907     }
2908
2909   /* mlow = 2^(N + lgup)/d */
2910  if (pow >= HOST_BITS_PER_WIDE_INT)
2911     {
2912       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
2913       nl = 0;
2914     }
2915   else
2916     {
2917       nh = 0;
2918       nl = (unsigned HOST_WIDE_INT) 1 << pow;
2919     }
2920   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2921                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
2922
2923   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
2924   if (pow2 >= HOST_BITS_PER_WIDE_INT)
2925     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
2926   else
2927     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
2928   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2929                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
2930
2931   if (mhigh_hi && nh - d >= d)
2932     abort ();
2933   if (mhigh_hi > 1 || mlow_hi > 1)
2934     abort ();
2935   /* Assert that mlow < mhigh.  */
2936   if (! (mlow_hi < mhigh_hi || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo)))
2937     abort ();
2938
2939   /* If precision == N, then mlow, mhigh exceed 2^N
2940      (but they do not exceed 2^(N+1)).  */
2941
2942   /* Reduce to lowest terms.  */
2943   for (post_shift = lgup; post_shift > 0; post_shift--)
2944     {
2945       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
2946       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
2947       if (ml_lo >= mh_lo)
2948         break;
2949
2950       mlow_hi = 0;
2951       mlow_lo = ml_lo;
2952       mhigh_hi = 0;
2953       mhigh_lo = mh_lo;
2954     }
2955
2956   *post_shift_ptr = post_shift;
2957   *lgup_ptr = lgup;
2958   if (n < HOST_BITS_PER_WIDE_INT)
2959     {
2960       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
2961       *multiplier_ptr = mhigh_lo & mask;
2962       return mhigh_lo >= mask;
2963     }
2964   else
2965     {
2966       *multiplier_ptr = mhigh_lo;
2967       return mhigh_hi;
2968     }
2969 }
2970
2971 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
2972    congruent to 1 (mod 2**N).  */
2973
2974 static unsigned HOST_WIDE_INT
2975 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
2976 {
2977   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
2978
2979   /* The algorithm notes that the choice y = x satisfies
2980      x*y == 1 mod 2^3, since x is assumed odd.
2981      Each iteration doubles the number of bits of significance in y.  */
2982
2983   unsigned HOST_WIDE_INT mask;
2984   unsigned HOST_WIDE_INT y = x;
2985   int nbit = 3;
2986
2987   mask = (n == HOST_BITS_PER_WIDE_INT
2988           ? ~(unsigned HOST_WIDE_INT) 0
2989           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
2990
2991   while (nbit < n)
2992     {
2993       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
2994       nbit *= 2;
2995     }
2996   return y;
2997 }
2998
2999 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3000    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3001    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3002    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3003    become signed.
3004
3005    The result is put in TARGET if that is convenient.
3006
3007    MODE is the mode of operation.  */
3008
3009 rtx
3010 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3011                              rtx op1, rtx target, int unsignedp)
3012 {
3013   rtx tem;
3014   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3015
3016   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3017                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3018                       NULL_RTX, 0);
3019   tem = expand_and (mode, tem, op1, NULL_RTX);
3020   adj_operand
3021     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3022                      adj_operand);
3023
3024   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3025                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3026                       NULL_RTX, 0);
3027   tem = expand_and (mode, tem, op0, NULL_RTX);
3028   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3029                           target);
3030
3031   return target;
3032 }
3033
3034 /* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */
3035
3036 static rtx
3037 extract_high_half (enum machine_mode mode, rtx op)
3038 {
3039   enum machine_mode wider_mode;
3040
3041   if (mode == word_mode)
3042     return gen_highpart (mode, op);
3043
3044   wider_mode = GET_MODE_WIDER_MODE (mode);
3045   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3046                      build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode)), 0, 1);
3047   return convert_modes (mode, wider_mode, op, 0);
3048 }
3049
3050 /* Like expand_mult_highpart, but only consider using a multiplication
3051    optab.  OP1 is an rtx for the constant operand.  */
3052
3053 static rtx
3054 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3055                             rtx target, int unsignedp, int max_cost)
3056 {
3057   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3058   enum machine_mode wider_mode;
3059   optab moptab;
3060   rtx tem;
3061   int size;
3062
3063   wider_mode = GET_MODE_WIDER_MODE (mode);
3064   size = GET_MODE_BITSIZE (mode);
3065
3066   /* Firstly, try using a multiplication insn that only generates the needed
3067      high part of the product, and in the sign flavor of unsignedp.  */
3068   if (mul_highpart_cost[mode] < max_cost)
3069     {
3070       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3071       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3072                           unsignedp, OPTAB_DIRECT);
3073       if (tem)
3074         return tem;
3075     }
3076
3077   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3078      Need to adjust the result after the multiplication.  */
3079   if (size - 1 < BITS_PER_WORD
3080       && (mul_highpart_cost[mode] + 2 * shift_cost[mode][size-1]
3081           + 4 * add_cost[mode] < max_cost))
3082     {
3083       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3084       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3085                           unsignedp, OPTAB_DIRECT);
3086       if (tem)
3087         /* We used the wrong signedness.  Adjust the result.  */
3088         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3089                                             tem, unsignedp);
3090     }
3091
3092   /* Try widening multiplication.  */
3093   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3094   if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3095       && mul_widen_cost[wider_mode] < max_cost)
3096     {
3097       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3098                           unsignedp, OPTAB_WIDEN);
3099       if (tem)
3100         return extract_high_half (mode, tem);
3101     }
3102
3103   /* Try widening the mode and perform a non-widening multiplication.  */
3104   moptab = smul_optab;
3105   if (smul_optab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3106       && size - 1 < BITS_PER_WORD
3107       && mul_cost[wider_mode] + shift_cost[mode][size-1] < max_cost)
3108     {
3109       tem = expand_binop (wider_mode, moptab, op0, op1, 0,
3110                           unsignedp, OPTAB_WIDEN);
3111       if (tem)
3112         return extract_high_half (mode, tem);
3113     }
3114
3115   /* Try widening multiplication of opposite signedness, and adjust.  */
3116   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3117   if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3118       && size - 1 < BITS_PER_WORD
3119       && (mul_widen_cost[wider_mode] + 2 * shift_cost[mode][size-1]
3120           + 4 * add_cost[mode] < max_cost))
3121     {
3122       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3123                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3124       if (tem != 0)
3125         {
3126           tem = extract_high_half (mode, tem);
3127           /* We used the wrong signedness.  Adjust the result.  */
3128           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3129                                               target, unsignedp);
3130         }
3131     }
3132
3133   return 0;
3134 }
3135
3136 /* Emit code to multiply OP0 and CNST1, putting the high half of the result
3137    in TARGET if that is convenient, and return where the result is.  If the
3138    operation can not be performed, 0 is returned.
3139
3140    MODE is the mode of operation and result.
3141
3142    UNSIGNEDP nonzero means unsigned multiply.
3143
3144    MAX_COST is the total allowed cost for the expanded RTL.  */
3145
3146 rtx
3147 expand_mult_highpart (enum machine_mode mode, rtx op0,
3148                       unsigned HOST_WIDE_INT cnst1, rtx target,
3149                       int unsignedp, int max_cost)
3150 {
3151   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3152   int extra_cost;
3153   bool sign_adjust = false;
3154   enum mult_variant variant;
3155   struct algorithm alg;
3156   rtx op1, tem;
3157
3158   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3159   if (GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3160     abort ();
3161
3162   op1 = gen_int_mode (cnst1, wider_mode);
3163   cnst1 &= GET_MODE_MASK (mode);
3164
3165   /* We can't optimize modes wider than BITS_PER_WORD.
3166      ??? We might be able to perform double-word arithmetic if
3167      mode == word_mode, however all the cost calculations in
3168      synth_mult etc. assume single-word operations.  */
3169   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3170     return expand_mult_highpart_optab (mode, op0, op1, target,
3171                                        unsignedp, max_cost);
3172
3173   extra_cost = shift_cost[mode][GET_MODE_BITSIZE (mode) - 1];
3174
3175   /* Check whether we try to multiply by a negative constant.  */
3176   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3177     {
3178       sign_adjust = true;
3179       extra_cost += add_cost[mode];
3180     }
3181
3182   /* See whether shift/add multiplication is cheap enough.  */
3183   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3184                            max_cost - extra_cost))
3185     {
3186       /* See whether the specialized multiplication optabs are
3187          cheaper than the shift/add version.  */
3188       tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3189                                         alg.cost.cost + extra_cost);
3190       if (tem)
3191         return tem;
3192
3193       tem = convert_to_mode (wider_mode, op0, unsignedp);
3194       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3195       tem = extract_high_half (mode, tem);
3196
3197       /* Adjust result for signedness.  */
3198       if (sign_adjust)
3199         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3200
3201       return tem;
3202     }
3203   return expand_mult_highpart_optab (mode, op0, op1, target,
3204                                      unsignedp, max_cost);
3205 }
3206
3207
3208 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3209
3210 static rtx
3211 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3212 {
3213   unsigned HOST_WIDE_INT mask;
3214   rtx result, temp, shift, label;
3215   int logd;
3216
3217   logd = floor_log2 (d);
3218   result = gen_reg_rtx (mode);
3219
3220   /* Avoid conditional branches when they're expensive.  */
3221   if (BRANCH_COST >= 2
3222       && !optimize_size)
3223     {
3224       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3225                                       mode, 0, -1);
3226       if (signmask)
3227         {
3228           signmask = force_reg (mode, signmask);
3229           mask = ((HOST_WIDE_INT) 1 << logd) - 1;
3230           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3231
3232           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3233              which instruction sequence to use.  If logical right shifts
3234              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3235              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3236
3237           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3238           if (lshr_optab->handlers[mode].insn_code == CODE_FOR_nothing
3239               || rtx_cost (temp, SET) > COSTS_N_INSNS (2))
3240             {
3241               temp = expand_binop (mode, xor_optab, op0, signmask,
3242                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3243               temp = expand_binop (mode, sub_optab, temp, signmask,
3244                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3245               temp = expand_binop (mode, and_optab, temp, GEN_INT (mask),
3246                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3247               temp = expand_binop (mode, xor_optab, temp, signmask,
3248                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3249               temp = expand_binop (mode, sub_optab, temp, signmask,
3250                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3251             }
3252           else
3253             {
3254               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3255                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3256               signmask = force_reg (mode, signmask);
3257
3258               temp = expand_binop (mode, add_optab, op0, signmask,
3259                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3260               temp = expand_binop (mode, and_optab, temp, GEN_INT (mask),
3261                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3262               temp = expand_binop (mode, sub_optab, temp, signmask,
3263                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3264             }
3265           return temp;
3266         }
3267     }
3268
3269   /* Mask contains the mode's signbit and the significant bits of the
3270      modulus.  By including the signbit in the operation, many targets
3271      can avoid an explicit compare operation in the following comparison
3272      against zero.  */
3273
3274   mask = (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1)
3275          | (((HOST_WIDE_INT) 1 << logd) - 1);
3276
3277   temp = expand_binop (mode, and_optab, op0, GEN_INT (mask), result,
3278                        1, OPTAB_LIB_WIDEN);
3279   if (temp != result)
3280     emit_move_insn (result, temp);
3281
3282   label = gen_label_rtx ();
3283   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3284
3285   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3286                        0, OPTAB_LIB_WIDEN);
3287   mask = (HOST_WIDE_INT) -1 << logd;
3288   temp = expand_binop (mode, ior_optab, temp, GEN_INT (mask), result,
3289                        1, OPTAB_LIB_WIDEN);
3290   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3291                        0, OPTAB_LIB_WIDEN);
3292   if (temp != result)
3293     emit_move_insn (result, temp);
3294   emit_label (label);
3295   return result;
3296 }
3297
3298 /* Expand signed division of OP0 by a power of two D in mode MODE.
3299    This routine is only called for positive values of D.  */
3300
3301 static rtx
3302 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3303 {
3304   rtx temp, label;
3305   tree shift;
3306   int logd;
3307
3308   logd = floor_log2 (d);
3309   shift = build_int_cst (NULL_TREE, logd);
3310
3311   if (d == 2 && BRANCH_COST >= 1)
3312     {
3313       temp = gen_reg_rtx (mode);
3314       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3315       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3316                            0, OPTAB_LIB_WIDEN);
3317       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3318     }
3319
3320 #ifdef HAVE_conditional_move
3321   if (BRANCH_COST >= 2)
3322     {
3323       rtx temp2;
3324
3325       start_sequence ();
3326       temp2 = copy_to_mode_reg (mode, op0);
3327       temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3328                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3329       temp = force_reg (mode, temp);
3330
3331       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3332       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3333                                      mode, temp, temp2, mode, 0);
3334       if (temp2)
3335         {
3336           rtx seq = get_insns ();
3337           end_sequence ();
3338           emit_insn (seq);
3339           return expand_shift (RSHIFT_EXPR, mode, temp2, shift, NULL_RTX, 0);
3340         }
3341       end_sequence ();
3342     }
3343 #endif
3344
3345   if (BRANCH_COST >= 2)
3346     {
3347       int ushift = GET_MODE_BITSIZE (mode) - logd;
3348
3349       temp = gen_reg_rtx (mode);
3350       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3351       if (shift_cost[mode][ushift] > COSTS_N_INSNS (1))
3352         temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3353                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3354       else
3355         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3356                              build_int_cst (NULL_TREE, ushift),
3357                              NULL_RTX, 1);
3358       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3359                            0, OPTAB_LIB_WIDEN);
3360       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3361     }
3362
3363   label = gen_label_rtx ();
3364   temp = copy_to_mode_reg (mode, op0);
3365   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3366   expand_inc (temp, GEN_INT (d - 1));
3367   emit_label (label);
3368   return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3369 }
3370 \f
3371 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3372    if that is convenient, and returning where the result is.
3373    You may request either the quotient or the remainder as the result;
3374    specify REM_FLAG nonzero to get the remainder.
3375
3376    CODE is the expression code for which kind of division this is;
3377    it controls how rounding is done.  MODE is the machine mode to use.
3378    UNSIGNEDP nonzero means do unsigned division.  */
3379
3380 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3381    and then correct it by or'ing in missing high bits
3382    if result of ANDI is nonzero.
3383    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3384    This could optimize to a bfexts instruction.
3385    But C doesn't use these operations, so their optimizations are
3386    left for later.  */
3387 /* ??? For modulo, we don't actually need the highpart of the first product,
3388    the low part will do nicely.  And for small divisors, the second multiply
3389    can also be a low-part only multiply or even be completely left out.
3390    E.g. to calculate the remainder of a division by 3 with a 32 bit
3391    multiply, multiply with 0x55555556 and extract the upper two bits;
3392    the result is exact for inputs up to 0x1fffffff.
3393    The input range can be reduced by using cross-sum rules.
3394    For odd divisors >= 3, the following table gives right shift counts
3395    so that if a number is shifted by an integer multiple of the given
3396    amount, the remainder stays the same:
3397    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3398    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3399    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3400    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3401    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3402
3403    Cross-sum rules for even numbers can be derived by leaving as many bits
3404    to the right alone as the divisor has zeros to the right.
3405    E.g. if x is an unsigned 32 bit number:
3406    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3407    */
3408
3409 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
3410
3411 rtx
3412 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3413                rtx op0, rtx op1, rtx target, int unsignedp)
3414 {
3415   enum machine_mode compute_mode;
3416   rtx tquotient;
3417   rtx quotient = 0, remainder = 0;
3418   rtx last;
3419   int size;
3420   rtx insn, set;
3421   optab optab1, optab2;
3422   int op1_is_constant, op1_is_pow2 = 0;
3423   int max_cost, extra_cost;
3424   static HOST_WIDE_INT last_div_const = 0;
3425   static HOST_WIDE_INT ext_op1;
3426
3427   op1_is_constant = GET_CODE (op1) == CONST_INT;
3428   if (op1_is_constant)
3429     {
3430       ext_op1 = INTVAL (op1);
3431       if (unsignedp)
3432         ext_op1 &= GET_MODE_MASK (mode);
3433       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3434                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3435     }
3436
3437   /*
3438      This is the structure of expand_divmod:
3439
3440      First comes code to fix up the operands so we can perform the operations
3441      correctly and efficiently.
3442
3443      Second comes a switch statement with code specific for each rounding mode.
3444      For some special operands this code emits all RTL for the desired
3445      operation, for other cases, it generates only a quotient and stores it in
3446      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3447      to indicate that it has not done anything.
3448
3449      Last comes code that finishes the operation.  If QUOTIENT is set and
3450      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3451      QUOTIENT is not set, it is computed using trunc rounding.
3452
3453      We try to generate special code for division and remainder when OP1 is a
3454      constant.  If |OP1| = 2**n we can use shifts and some other fast
3455      operations.  For other values of OP1, we compute a carefully selected
3456      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3457      by m.
3458
3459      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3460      half of the product.  Different strategies for generating the product are
3461      implemented in expand_mult_highpart.
3462
3463      If what we actually want is the remainder, we generate that by another
3464      by-constant multiplication and a subtraction.  */
3465
3466   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3467      code below will malfunction if we are, so check here and handle
3468      the special case if so.  */
3469   if (op1 == const1_rtx)
3470     return rem_flag ? const0_rtx : op0;
3471
3472     /* When dividing by -1, we could get an overflow.
3473      negv_optab can handle overflows.  */
3474   if (! unsignedp && op1 == constm1_rtx)
3475     {
3476       if (rem_flag)
3477         return const0_rtx;
3478       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3479                           ? negv_optab : neg_optab, op0, target, 0);
3480     }
3481
3482   if (target
3483       /* Don't use the function value register as a target
3484          since we have to read it as well as write it,
3485          and function-inlining gets confused by this.  */
3486       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3487           /* Don't clobber an operand while doing a multi-step calculation.  */
3488           || ((rem_flag || op1_is_constant)
3489               && (reg_mentioned_p (target, op0)
3490                   || (MEM_P (op0) && MEM_P (target))))
3491           || reg_mentioned_p (target, op1)
3492           || (MEM_P (op1) && MEM_P (target))))
3493     target = 0;
3494
3495   /* Get the mode in which to perform this computation.  Normally it will
3496      be MODE, but sometimes we can't do the desired operation in MODE.
3497      If so, pick a wider mode in which we can do the operation.  Convert
3498      to that mode at the start to avoid repeated conversions.
3499
3500      First see what operations we need.  These depend on the expression
3501      we are evaluating.  (We assume that divxx3 insns exist under the
3502      same conditions that modxx3 insns and that these insns don't normally
3503      fail.  If these assumptions are not correct, we may generate less
3504      efficient code in some cases.)
3505
3506      Then see if we find a mode in which we can open-code that operation
3507      (either a division, modulus, or shift).  Finally, check for the smallest
3508      mode for which we can do the operation with a library call.  */
3509
3510   /* We might want to refine this now that we have division-by-constant
3511      optimization.  Since expand_mult_highpart tries so many variants, it is
3512      not straightforward to generalize this.  Maybe we should make an array
3513      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3514
3515   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3516             ? (unsignedp ? lshr_optab : ashr_optab)
3517             : (unsignedp ? udiv_optab : sdiv_optab));
3518   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3519             ? optab1
3520             : (unsignedp ? udivmod_optab : sdivmod_optab));
3521
3522   for (compute_mode = mode; compute_mode != VOIDmode;
3523        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3524     if (optab1->handlers[compute_mode].insn_code != CODE_FOR_nothing
3525         || optab2->handlers[compute_mode].insn_code != CODE_FOR_nothing)
3526       break;
3527
3528   if (compute_mode == VOIDmode)
3529     for (compute_mode = mode; compute_mode != VOIDmode;
3530          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3531       if (optab1->handlers[compute_mode].libfunc
3532           || optab2->handlers[compute_mode].libfunc)
3533         break;
3534
3535   /* If we still couldn't find a mode, use MODE, but we'll probably abort
3536      in expand_binop.  */
3537   if (compute_mode == VOIDmode)
3538     compute_mode = mode;
3539
3540   if (target && GET_MODE (target) == compute_mode)
3541     tquotient = target;
3542   else
3543     tquotient = gen_reg_rtx (compute_mode);
3544
3545   size = GET_MODE_BITSIZE (compute_mode);
3546 #if 0
3547   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3548      (mode), and thereby get better code when OP1 is a constant.  Do that
3549      later.  It will require going over all usages of SIZE below.  */
3550   size = GET_MODE_BITSIZE (mode);
3551 #endif
3552
3553   /* Only deduct something for a REM if the last divide done was
3554      for a different constant.   Then set the constant of the last
3555      divide.  */
3556   max_cost = div_cost[compute_mode]
3557     - (rem_flag && ! (last_div_const != 0 && op1_is_constant
3558                       && INTVAL (op1) == last_div_const)
3559        ? mul_cost[compute_mode] + add_cost[compute_mode]
3560        : 0);
3561
3562   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3563
3564   /* Now convert to the best mode to use.  */
3565   if (compute_mode != mode)
3566     {
3567       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3568       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3569
3570       /* convert_modes may have placed op1 into a register, so we
3571          must recompute the following.  */
3572       op1_is_constant = GET_CODE (op1) == CONST_INT;
3573       op1_is_pow2 = (op1_is_constant
3574                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3575                           || (! unsignedp
3576                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3577     }
3578
3579   /* If one of the operands is a volatile MEM, copy it into a register.  */
3580
3581   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3582     op0 = force_reg (compute_mode, op0);
3583   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3584     op1 = force_reg (compute_mode, op1);
3585
3586   /* If we need the remainder or if OP1 is constant, we need to
3587      put OP0 in a register in case it has any queued subexpressions.  */
3588   if (rem_flag || op1_is_constant)
3589     op0 = force_reg (compute_mode, op0);
3590
3591   last = get_last_insn ();
3592
3593   /* Promote floor rounding to trunc rounding for unsigned operations.  */
3594   if (unsignedp)
3595     {
3596       if (code == FLOOR_DIV_EXPR)
3597         code = TRUNC_DIV_EXPR;
3598       if (code == FLOOR_MOD_EXPR)
3599         code = TRUNC_MOD_EXPR;
3600       if (code == EXACT_DIV_EXPR && op1_is_pow2)
3601         code = TRUNC_DIV_EXPR;
3602     }
3603
3604   if (op1 != const0_rtx)
3605     switch (code)
3606       {
3607       case TRUNC_MOD_EXPR:
3608       case TRUNC_DIV_EXPR:
3609         if (op1_is_constant)
3610           {
3611             if (unsignedp)
3612               {
3613                 unsigned HOST_WIDE_INT mh, ml;
3614                 int pre_shift, post_shift;
3615                 int dummy;
3616                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
3617                                             & GET_MODE_MASK (compute_mode));
3618
3619                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3620                   {
3621                     pre_shift = floor_log2 (d);
3622                     if (rem_flag)
3623                       {
3624                         remainder
3625                           = expand_binop (compute_mode, and_optab, op0,
3626                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3627                                           remainder, 1,
3628                                           OPTAB_LIB_WIDEN);
3629                         if (remainder)
3630                           return gen_lowpart (mode, remainder);
3631                       }
3632                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3633                                              build_int_cst (NULL_TREE,
3634                                                             pre_shift),
3635                                              tquotient, 1);
3636                   }
3637                 else if (size <= HOST_BITS_PER_WIDE_INT)
3638                   {
3639                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
3640                       {
3641                         /* Most significant bit of divisor is set; emit an scc
3642                            insn.  */
3643                         quotient = emit_store_flag (tquotient, GEU, op0, op1,
3644                                                     compute_mode, 1, 1);
3645                         if (quotient == 0)
3646                           goto fail1;
3647                       }
3648                     else
3649                       {
3650                         /* Find a suitable multiplier and right shift count
3651                            instead of multiplying with D.  */
3652
3653                         mh = choose_multiplier (d, size, size,
3654                                                 &ml, &post_shift, &dummy);
3655
3656                         /* If the suggested multiplier is more than SIZE bits,
3657                            we can do better for even divisors, using an
3658                            initial right shift.  */
3659                         if (mh != 0 && (d & 1) == 0)
3660                           {
3661                             pre_shift = floor_log2 (d & -d);
3662                             mh = choose_multiplier (d >> pre_shift, size,
3663                                                     size - pre_shift,
3664                                                     &ml, &post_shift, &dummy);
3665                             if (mh)
3666                               abort ();
3667                           }
3668                         else
3669                           pre_shift = 0;
3670
3671                         if (mh != 0)
3672                           {
3673                             rtx t1, t2, t3, t4;
3674
3675                             if (post_shift - 1 >= BITS_PER_WORD)
3676                               goto fail1;
3677
3678                             extra_cost
3679                               = (shift_cost[compute_mode][post_shift - 1]
3680                                  + shift_cost[compute_mode][1]
3681                                  + 2 * add_cost[compute_mode]);
3682                             t1 = expand_mult_highpart (compute_mode, op0, ml,
3683                                                        NULL_RTX, 1,
3684                                                        max_cost - extra_cost);
3685                             if (t1 == 0)
3686                               goto fail1;
3687                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
3688                                                                op0, t1),
3689                                                 NULL_RTX);
3690                             t3 = expand_shift
3691                               (RSHIFT_EXPR, compute_mode, t2,
3692                                build_int_cst (NULL_TREE, 1),
3693                                NULL_RTX,1);
3694                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
3695                                                               t1, t3),
3696                                                 NULL_RTX);
3697                             quotient = expand_shift
3698                               (RSHIFT_EXPR, compute_mode, t4,
3699                                build_int_cst (NULL_TREE, post_shift - 1),
3700                                tquotient, 1);
3701                           }
3702                         else
3703                           {
3704                             rtx t1, t2;
3705
3706                             if (pre_shift >= BITS_PER_WORD
3707                                 || post_shift >= BITS_PER_WORD)
3708                               goto fail1;
3709
3710                             t1 = expand_shift
3711                               (RSHIFT_EXPR, compute_mode, op0,
3712                                build_int_cst (NULL_TREE, pre_shift),
3713                                NULL_RTX, 1);
3714                             extra_cost
3715                               = (shift_cost[compute_mode][pre_shift]
3716                                  + shift_cost[compute_mode][post_shift]);
3717                             t2 = expand_mult_highpart (compute_mode, t1, ml,
3718                                                        NULL_RTX, 1,
3719                                                        max_cost - extra_cost);
3720                             if (t2 == 0)
3721                               goto fail1;
3722                             quotient = expand_shift
3723                               (RSHIFT_EXPR, compute_mode, t2,
3724                                build_int_cst (NULL_TREE, post_shift),
3725                                tquotient, 1);
3726                           }
3727                       }
3728                   }
3729                 else            /* Too wide mode to use tricky code */
3730                   break;
3731
3732                 insn = get_last_insn ();
3733                 if (insn != last
3734                     && (set = single_set (insn)) != 0
3735                     && SET_DEST (set) == quotient)
3736                   set_unique_reg_note (insn,
3737                                        REG_EQUAL,
3738                                        gen_rtx_UDIV (compute_mode, op0, op1));
3739               }
3740             else                /* TRUNC_DIV, signed */
3741               {
3742                 unsigned HOST_WIDE_INT ml;
3743                 int lgup, post_shift;
3744                 HOST_WIDE_INT d = INTVAL (op1);
3745                 unsigned HOST_WIDE_INT abs_d = d >= 0 ? d : -d;
3746
3747                 /* n rem d = n rem -d */
3748                 if (rem_flag && d < 0)
3749                   {
3750                     d = abs_d;
3751                     op1 = gen_int_mode (abs_d, compute_mode);
3752                   }
3753
3754                 if (d == 1)
3755                   quotient = op0;
3756                 else if (d == -1)
3757                   quotient = expand_unop (compute_mode, neg_optab, op0,
3758                                           tquotient, 0);
3759                 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
3760                   {
3761                     /* This case is not handled correctly below.  */
3762                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
3763                                                 compute_mode, 1, 1);
3764                     if (quotient == 0)
3765                       goto fail1;
3766                   }
3767                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
3768                          && (rem_flag ? smod_pow2_cheap[compute_mode]
3769                                       : sdiv_pow2_cheap[compute_mode])
3770                          /* We assume that cheap metric is true if the
3771                             optab has an expander for this mode.  */
3772                          && (((rem_flag ? smod_optab : sdiv_optab)
3773                               ->handlers[compute_mode].insn_code
3774                               != CODE_FOR_nothing)
3775                              || (sdivmod_optab->handlers[compute_mode]
3776                                  .insn_code != CODE_FOR_nothing)))
3777                   ;
3778                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
3779                   {
3780                     if (rem_flag)
3781                       {
3782                         remainder = expand_smod_pow2 (compute_mode, op0, d);
3783                         if (remainder)
3784                           return gen_lowpart (mode, remainder);
3785                       }
3786                     quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
3787
3788                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
3789                        negate the quotient.  */
3790                     if (d < 0)
3791                       {
3792                         insn = get_last_insn ();
3793                         if (insn != last
3794                             && (set = single_set (insn)) != 0
3795                             && SET_DEST (set) == quotient
3796                             && abs_d < ((unsigned HOST_WIDE_INT) 1
3797                                         << (HOST_BITS_PER_WIDE_INT - 1)))
3798                           set_unique_reg_note (insn,
3799                                                REG_EQUAL,
3800                                                gen_rtx_DIV (compute_mode,
3801                                                             op0,
3802                                                             GEN_INT
3803                                                             (trunc_int_for_mode
3804                                                              (abs_d,
3805                                                               compute_mode))));
3806
3807                         quotient = expand_unop (compute_mode, neg_optab,
3808                                                 quotient, quotient, 0);
3809                       }
3810                   }
3811                 else if (size <= HOST_BITS_PER_WIDE_INT)
3812                   {
3813                     choose_multiplier (abs_d, size, size - 1,
3814                                        &ml, &post_shift, &lgup);
3815                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
3816                       {
3817                         rtx t1, t2, t3;
3818
3819                         if (post_shift >= BITS_PER_WORD
3820                             || size - 1 >= BITS_PER_WORD)
3821                           goto fail1;
3822
3823                         extra_cost = (shift_cost[compute_mode][post_shift]
3824                                       + shift_cost[compute_mode][size - 1]
3825                                       + add_cost[compute_mode]);
3826                         t1 = expand_mult_highpart (compute_mode, op0, ml,
3827                                                    NULL_RTX, 0,
3828                                                    max_cost - extra_cost);
3829                         if (t1 == 0)
3830                           goto fail1;
3831                         t2 = expand_shift
3832                           (RSHIFT_EXPR, compute_mode, t1,
3833                            build_int_cst (NULL_TREE, post_shift),
3834                            NULL_RTX, 0);
3835                         t3 = expand_shift
3836                           (RSHIFT_EXPR, compute_mode, op0,
3837                            build_int_cst (NULL_TREE, size - 1),
3838                            NULL_RTX, 0);
3839                         if (d < 0)
3840                           quotient
3841                             = force_operand (gen_rtx_MINUS (compute_mode,
3842                                                             t3, t2),
3843                                              tquotient);
3844                         else
3845                           quotient
3846                             = force_operand (gen_rtx_MINUS (compute_mode,
3847                                                             t2, t3),
3848                                              tquotient);
3849                       }
3850                     else
3851                       {
3852                         rtx t1, t2, t3, t4;
3853
3854                         if (post_shift >= BITS_PER_WORD
3855                             || size - 1 >= BITS_PER_WORD)
3856                           goto fail1;
3857
3858                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
3859                         extra_cost = (shift_cost[compute_mode][post_shift]
3860                                       + shift_cost[compute_mode][size - 1]
3861                                       + 2 * add_cost[compute_mode]);
3862                         t1 = expand_mult_highpart (compute_mode, op0, ml,
3863                                                    NULL_RTX, 0,
3864                                                    max_cost - extra_cost);
3865                         if (t1 == 0)
3866                           goto fail1;
3867                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
3868                                                           t1, op0),
3869                                             NULL_RTX);
3870                         t3 = expand_shift
3871                           (RSHIFT_EXPR, compute_mode, t2,
3872                            build_int_cst (NULL_TREE, post_shift),
3873                            NULL_RTX, 0);
3874                         t4 = expand_shift
3875                           (RSHIFT_EXPR, compute_mode, op0,
3876                            build_int_cst (NULL_TREE, size - 1),
3877                            NULL_RTX, 0);
3878                         if (d < 0)
3879                           quotient
3880                             = force_operand (gen_rtx_MINUS (compute_mode,
3881                                                             t4, t3),
3882                                              tquotient);
3883                         else
3884                           quotient
3885                             = force_operand (gen_rtx_MINUS (compute_mode,
3886                                                             t3, t4),
3887                                              tquotient);
3888                       }
3889                   }
3890                 else            /* Too wide mode to use tricky code */
3891                   break;
3892
3893                 insn = get_last_insn ();
3894                 if (insn != last
3895                     && (set = single_set (insn)) != 0
3896                     && SET_DEST (set) == quotient)
3897                   set_unique_reg_note (insn,
3898                                        REG_EQUAL,
3899                                        gen_rtx_DIV (compute_mode, op0, op1));
3900               }
3901             break;
3902           }
3903       fail1:
3904         delete_insns_since (last);
3905         break;
3906
3907       case FLOOR_DIV_EXPR:
3908       case FLOOR_MOD_EXPR:
3909       /* We will come here only for signed operations.  */
3910         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
3911           {
3912             unsigned HOST_WIDE_INT mh, ml;
3913             int pre_shift, lgup, post_shift;
3914             HOST_WIDE_INT d = INTVAL (op1);
3915
3916             if (d > 0)
3917               {
3918                 /* We could just as easily deal with negative constants here,
3919                    but it does not seem worth the trouble for GCC 2.6.  */
3920                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3921                   {
3922                     pre_shift = floor_log2 (d);
3923                     if (rem_flag)
3924                       {
3925                         remainder = expand_binop (compute_mode, and_optab, op0,
3926                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3927                                                   remainder, 0, OPTAB_LIB_WIDEN);
3928                         if (remainder)
3929                           return gen_lowpart (mode, remainder);
3930                       }
3931                     quotient = expand_shift
3932                       (RSHIFT_EXPR, compute_mode, op0,
3933                        build_int_cst (NULL_TREE, pre_shift),
3934                        tquotient, 0);
3935                   }
3936                 else
3937                   {
3938                     rtx t1, t2, t3, t4;
3939
3940                     mh = choose_multiplier (d, size, size - 1,
3941                                             &ml, &post_shift, &lgup);
3942                     if (mh)
3943                       abort ();
3944
3945                     if (post_shift < BITS_PER_WORD
3946                         && size - 1 < BITS_PER_WORD)
3947                       {
3948                         t1 = expand_shift
3949                           (RSHIFT_EXPR, compute_mode, op0,
3950                            build_int_cst (NULL_TREE, size - 1),
3951                            NULL_RTX, 0);
3952                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
3953                                            NULL_RTX, 0, OPTAB_WIDEN);
3954                         extra_cost = (shift_cost[compute_mode][post_shift]
3955                                       + shift_cost[compute_mode][size - 1]
3956                                       + 2 * add_cost[compute_mode]);
3957                         t3 = expand_mult_highpart (compute_mode, t2, ml,
3958                                                    NULL_RTX, 1,
3959                                                    max_cost - extra_cost);
3960                         if (t3 != 0)
3961                           {
3962                             t4 = expand_shift
3963                               (RSHIFT_EXPR, compute_mode, t3,
3964                                build_int_cst (NULL_TREE, post_shift),
3965                                NULL_RTX, 1);
3966                             quotient = expand_binop (compute_mode, xor_optab,
3967                                                      t4, t1, tquotient, 0,
3968                                                      OPTAB_WIDEN);
3969                           }
3970                       }
3971                   }
3972               }
3973             else
3974               {
3975                 rtx nsign, t1, t2, t3, t4;
3976                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
3977                                                   op0, constm1_rtx), NULL_RTX);
3978                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
3979                                    0, OPTAB_WIDEN);
3980                 nsign = expand_shift
3981                   (RSHIFT_EXPR, compute_mode, t2,
3982                    build_int_cst (NULL_TREE, size - 1),
3983                    NULL_RTX, 0);
3984                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
3985                                     NULL_RTX);
3986                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
3987                                     NULL_RTX, 0);
3988                 if (t4)
3989                   {
3990                     rtx t5;
3991                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
3992                                       NULL_RTX, 0);
3993                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
3994                                                             t4, t5),
3995                                               tquotient);
3996                   }
3997               }
3998           }
3999
4000         if (quotient != 0)
4001           break;
4002         delete_insns_since (last);
4003
4004         /* Try using an instruction that produces both the quotient and
4005            remainder, using truncation.  We can easily compensate the quotient
4006            or remainder to get floor rounding, once we have the remainder.
4007            Notice that we compute also the final remainder value here,
4008            and return the result right away.  */
4009         if (target == 0 || GET_MODE (target) != compute_mode)
4010           target = gen_reg_rtx (compute_mode);
4011
4012         if (rem_flag)
4013           {
4014             remainder
4015               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4016             quotient = gen_reg_rtx (compute_mode);
4017           }
4018         else
4019           {
4020             quotient
4021               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4022             remainder = gen_reg_rtx (compute_mode);
4023           }
4024
4025         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4026                                  quotient, remainder, 0))
4027           {
4028             /* This could be computed with a branch-less sequence.
4029                Save that for later.  */
4030             rtx tem;
4031             rtx label = gen_label_rtx ();
4032             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4033             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4034                                 NULL_RTX, 0, OPTAB_WIDEN);
4035             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4036             expand_dec (quotient, const1_rtx);
4037             expand_inc (remainder, op1);
4038             emit_label (label);
4039             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4040           }
4041
4042         /* No luck with division elimination or divmod.  Have to do it
4043            by conditionally adjusting op0 *and* the result.  */
4044         {
4045           rtx label1, label2, label3, label4, label5;
4046           rtx adjusted_op0;
4047           rtx tem;
4048
4049           quotient = gen_reg_rtx (compute_mode);
4050           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4051           label1 = gen_label_rtx ();
4052           label2 = gen_label_rtx ();
4053           label3 = gen_label_rtx ();
4054           label4 = gen_label_rtx ();
4055           label5 = gen_label_rtx ();
4056           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4057           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4058           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4059                               quotient, 0, OPTAB_LIB_WIDEN);
4060           if (tem != quotient)
4061             emit_move_insn (quotient, tem);
4062           emit_jump_insn (gen_jump (label5));
4063           emit_barrier ();
4064           emit_label (label1);
4065           expand_inc (adjusted_op0, const1_rtx);
4066           emit_jump_insn (gen_jump (label4));
4067           emit_barrier ();
4068           emit_label (label2);
4069           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4070           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4071                               quotient, 0, OPTAB_LIB_WIDEN);
4072           if (tem != quotient)
4073             emit_move_insn (quotient, tem);
4074           emit_jump_insn (gen_jump (label5));
4075           emit_barrier ();
4076           emit_label (label3);
4077           expand_dec (adjusted_op0, const1_rtx);
4078           emit_label (label4);
4079           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4080                               quotient, 0, OPTAB_LIB_WIDEN);
4081           if (tem != quotient)
4082             emit_move_insn (quotient, tem);
4083           expand_dec (quotient, const1_rtx);
4084           emit_label (label5);
4085         }
4086         break;
4087
4088       case CEIL_DIV_EXPR:
4089       case CEIL_MOD_EXPR:
4090         if (unsignedp)
4091           {
4092             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4093               {
4094                 rtx t1, t2, t3;
4095                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4096                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4097                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4098                                    tquotient, 1);
4099                 t2 = expand_binop (compute_mode, and_optab, op0,
4100                                    GEN_INT (d - 1),
4101                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4102                 t3 = gen_reg_rtx (compute_mode);
4103                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4104                                       compute_mode, 1, 1);
4105                 if (t3 == 0)
4106                   {
4107                     rtx lab;
4108                     lab = gen_label_rtx ();
4109                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4110                     expand_inc (t1, const1_rtx);
4111                     emit_label (lab);
4112                     quotient = t1;
4113                   }
4114                 else
4115                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4116                                                           t1, t3),
4117                                             tquotient);
4118                 break;
4119               }
4120
4121             /* Try using an instruction that produces both the quotient and
4122                remainder, using truncation.  We can easily compensate the
4123                quotient or remainder to get ceiling rounding, once we have the
4124                remainder.  Notice that we compute also the final remainder
4125                value here, and return the result right away.  */
4126             if (target == 0 || GET_MODE (target) != compute_mode)
4127               target = gen_reg_rtx (compute_mode);
4128
4129             if (rem_flag)
4130               {
4131                 remainder = (REG_P (target)
4132                              ? target : gen_reg_rtx (compute_mode));
4133                 quotient = gen_reg_rtx (compute_mode);
4134               }
4135             else
4136               {
4137                 quotient = (REG_P (target)
4138                             ? target : gen_reg_rtx (compute_mode));
4139                 remainder = gen_reg_rtx (compute_mode);
4140               }
4141
4142             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4143                                      remainder, 1))
4144               {
4145                 /* This could be computed with a branch-less sequence.
4146                    Save that for later.  */
4147                 rtx label = gen_label_rtx ();
4148                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4149                                  compute_mode, label);
4150                 expand_inc (quotient, const1_rtx);
4151                 expand_dec (remainder, op1);
4152                 emit_label (label);
4153                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4154               }
4155
4156             /* No luck with division elimination or divmod.  Have to do it
4157                by conditionally adjusting op0 *and* the result.  */
4158             {
4159               rtx label1, label2;
4160               rtx adjusted_op0, tem;
4161
4162               quotient = gen_reg_rtx (compute_mode);
4163               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4164               label1 = gen_label_rtx ();
4165               label2 = gen_label_rtx ();
4166               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4167                                compute_mode, label1);
4168               emit_move_insn  (quotient, const0_rtx);
4169               emit_jump_insn (gen_jump (label2));
4170               emit_barrier ();
4171               emit_label (label1);
4172               expand_dec (adjusted_op0, const1_rtx);
4173               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4174                                   quotient, 1, OPTAB_LIB_WIDEN);
4175               if (tem != quotient)
4176                 emit_move_insn (quotient, tem);
4177               expand_inc (quotient, const1_rtx);
4178               emit_label (label2);
4179             }
4180           }
4181         else /* signed */
4182           {
4183             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4184                 && INTVAL (op1) >= 0)
4185               {
4186                 /* This is extremely similar to the code for the unsigned case
4187                    above.  For 2.7 we should merge these variants, but for
4188                    2.6.1 I don't want to touch the code for unsigned since that
4189                    get used in C.  The signed case will only be used by other
4190                    languages (Ada).  */
4191
4192                 rtx t1, t2, t3;
4193                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4194                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4195                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4196                                    tquotient, 0);
4197                 t2 = expand_binop (compute_mode, and_optab, op0,
4198                                    GEN_INT (d - 1),
4199                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4200                 t3 = gen_reg_rtx (compute_mode);
4201                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4202                                       compute_mode, 1, 1);
4203                 if (t3 == 0)
4204                   {
4205                     rtx lab;
4206                     lab = gen_label_rtx ();
4207                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4208                     expand_inc (t1, const1_rtx);
4209                     emit_label (lab);
4210                     quotient = t1;
4211                   }
4212                 else
4213                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4214                                                           t1, t3),
4215                                             tquotient);
4216                 break;
4217               }
4218
4219             /* Try using an instruction that produces both the quotient and
4220                remainder, using truncation.  We can easily compensate the
4221                quotient or remainder to get ceiling rounding, once we have the
4222                remainder.  Notice that we compute also the final remainder
4223                value here, and return the result right away.  */
4224             if (target == 0 || GET_MODE (target) != compute_mode)
4225               target = gen_reg_rtx (compute_mode);
4226             if (rem_flag)
4227               {
4228                 remainder= (REG_P (target)
4229                             ? target : gen_reg_rtx (compute_mode));
4230                 quotient = gen_reg_rtx (compute_mode);
4231               }
4232             else
4233               {
4234                 quotient = (REG_P (target)
4235                             ? target : gen_reg_rtx (compute_mode));
4236                 remainder = gen_reg_rtx (compute_mode);
4237               }
4238
4239             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4240                                      remainder, 0))
4241               {
4242                 /* This could be computed with a branch-less sequence.
4243                    Save that for later.  */
4244                 rtx tem;
4245                 rtx label = gen_label_rtx ();
4246                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4247                                  compute_mode, label);
4248                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4249                                     NULL_RTX, 0, OPTAB_WIDEN);
4250                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4251                 expand_inc (quotient, const1_rtx);
4252                 expand_dec (remainder, op1);
4253                 emit_label (label);
4254                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4255               }
4256
4257             /* No luck with division elimination or divmod.  Have to do it
4258                by conditionally adjusting op0 *and* the result.  */
4259             {
4260               rtx label1, label2, label3, label4, label5;
4261               rtx adjusted_op0;
4262               rtx tem;
4263
4264               quotient = gen_reg_rtx (compute_mode);
4265               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4266               label1 = gen_label_rtx ();
4267               label2 = gen_label_rtx ();
4268               label3 = gen_label_rtx ();
4269               label4 = gen_label_rtx ();
4270               label5 = gen_label_rtx ();
4271               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4272               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4273                                compute_mode, label1);
4274               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4275                                   quotient, 0, OPTAB_LIB_WIDEN);
4276               if (tem != quotient)
4277                 emit_move_insn (quotient, tem);
4278               emit_jump_insn (gen_jump (label5));
4279               emit_barrier ();
4280               emit_label (label1);
4281               expand_dec (adjusted_op0, const1_rtx);
4282               emit_jump_insn (gen_jump (label4));
4283               emit_barrier ();
4284               emit_label (label2);
4285               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4286                                compute_mode, label3);
4287               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4288                                   quotient, 0, OPTAB_LIB_WIDEN);
4289               if (tem != quotient)
4290                 emit_move_insn (quotient, tem);
4291               emit_jump_insn (gen_jump (label5));
4292               emit_barrier ();
4293               emit_label (label3);
4294               expand_inc (adjusted_op0, const1_rtx);
4295               emit_label (label4);
4296               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4297                                   quotient, 0, OPTAB_LIB_WIDEN);
4298               if (tem != quotient)
4299                 emit_move_insn (quotient, tem);
4300               expand_inc (quotient, const1_rtx);
4301               emit_label (label5);
4302             }
4303           }
4304         break;
4305
4306       case EXACT_DIV_EXPR:
4307         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4308           {
4309             HOST_WIDE_INT d = INTVAL (op1);
4310             unsigned HOST_WIDE_INT ml;
4311             int pre_shift;
4312             rtx t1;
4313
4314             pre_shift = floor_log2 (d & -d);
4315             ml = invert_mod2n (d >> pre_shift, size);
4316             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4317                                build_int_cst (NULL_TREE, pre_shift),
4318                                NULL_RTX, unsignedp);
4319             quotient = expand_mult (compute_mode, t1,
4320                                     gen_int_mode (ml, compute_mode),
4321                                     NULL_RTX, 1);
4322
4323             insn = get_last_insn ();
4324             set_unique_reg_note (insn,
4325                                  REG_EQUAL,
4326                                  gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4327                                                  compute_mode,
4328                                                  op0, op1));
4329           }
4330         break;
4331
4332       case ROUND_DIV_EXPR:
4333       case ROUND_MOD_EXPR:
4334         if (unsignedp)
4335           {
4336             rtx tem;
4337             rtx label;
4338             label = gen_label_rtx ();
4339             quotient = gen_reg_rtx (compute_mode);
4340             remainder = gen_reg_rtx (compute_mode);
4341             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4342               {
4343                 rtx tem;
4344                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4345                                          quotient, 1, OPTAB_LIB_WIDEN);
4346                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4347                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4348                                           remainder, 1, OPTAB_LIB_WIDEN);
4349               }
4350             tem = plus_constant (op1, -1);
4351             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4352                                 build_int_cst (NULL_TREE, 1),
4353                                 NULL_RTX, 1);
4354             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4355             expand_inc (quotient, const1_rtx);
4356             expand_dec (remainder, op1);
4357             emit_label (label);
4358           }
4359         else
4360           {
4361             rtx abs_rem, abs_op1, tem, mask;
4362             rtx label;
4363             label = gen_label_rtx ();
4364             quotient = gen_reg_rtx (compute_mode);
4365             remainder = gen_reg_rtx (compute_mode);
4366             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4367               {
4368                 rtx tem;
4369                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4370                                          quotient, 0, OPTAB_LIB_WIDEN);
4371                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4372                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4373                                           remainder, 0, OPTAB_LIB_WIDEN);
4374               }
4375             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4376             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4377             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4378                                 build_int_cst (NULL_TREE, 1),
4379                                 NULL_RTX, 1);
4380             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4381             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4382                                 NULL_RTX, 0, OPTAB_WIDEN);
4383             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4384                                  build_int_cst (NULL_TREE, size - 1),
4385                                  NULL_RTX, 0);
4386             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4387                                 NULL_RTX, 0, OPTAB_WIDEN);
4388             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4389                                 NULL_RTX, 0, OPTAB_WIDEN);
4390             expand_inc (quotient, tem);
4391             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4392                                 NULL_RTX, 0, OPTAB_WIDEN);
4393             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4394                                 NULL_RTX, 0, OPTAB_WIDEN);
4395             expand_dec (remainder, tem);
4396             emit_label (label);
4397           }
4398         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4399
4400       default:
4401         abort ();
4402       }
4403
4404   if (quotient == 0)
4405     {
4406       if (target && GET_MODE (target) != compute_mode)
4407         target = 0;
4408
4409       if (rem_flag)
4410         {
4411           /* Try to produce the remainder without producing the quotient.
4412              If we seem to have a divmod pattern that does not require widening,
4413              don't try widening here.  We should really have a WIDEN argument
4414              to expand_twoval_binop, since what we'd really like to do here is
4415              1) try a mod insn in compute_mode
4416              2) try a divmod insn in compute_mode
4417              3) try a div insn in compute_mode and multiply-subtract to get
4418                 remainder
4419              4) try the same things with widening allowed.  */
4420           remainder
4421             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4422                                  op0, op1, target,
4423                                  unsignedp,
4424                                  ((optab2->handlers[compute_mode].insn_code
4425                                    != CODE_FOR_nothing)
4426                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4427           if (remainder == 0)
4428             {
4429               /* No luck there.  Can we do remainder and divide at once
4430                  without a library call?  */
4431               remainder = gen_reg_rtx (compute_mode);
4432               if (! expand_twoval_binop ((unsignedp
4433                                           ? udivmod_optab
4434                                           : sdivmod_optab),
4435                                          op0, op1,
4436                                          NULL_RTX, remainder, unsignedp))
4437                 remainder = 0;
4438             }
4439
4440           if (remainder)
4441             return gen_lowpart (mode, remainder);
4442         }
4443
4444       /* Produce the quotient.  Try a quotient insn, but not a library call.
4445          If we have a divmod in this mode, use it in preference to widening
4446          the div (for this test we assume it will not fail). Note that optab2
4447          is set to the one of the two optabs that the call below will use.  */
4448       quotient
4449         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4450                              op0, op1, rem_flag ? NULL_RTX : target,
4451                              unsignedp,
4452                              ((optab2->handlers[compute_mode].insn_code
4453                                != CODE_FOR_nothing)
4454                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4455
4456       if (quotient == 0)
4457         {
4458           /* No luck there.  Try a quotient-and-remainder insn,
4459              keeping the quotient alone.  */
4460           quotient = gen_reg_rtx (compute_mode);
4461           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4462                                      op0, op1,
4463                                      quotient, NULL_RTX, unsignedp))
4464             {
4465               quotient = 0;
4466               if (! rem_flag)
4467                 /* Still no luck.  If we are not computing the remainder,
4468                    use a library call for the quotient.  */
4469                 quotient = sign_expand_binop (compute_mode,
4470                                               udiv_optab, sdiv_optab,
4471                                               op0, op1, target,
4472                                               unsignedp, OPTAB_LIB_WIDEN);
4473             }
4474         }
4475     }
4476
4477   if (rem_flag)
4478     {
4479       if (target && GET_MODE (target) != compute_mode)
4480         target = 0;
4481
4482       if (quotient == 0)
4483         {
4484           /* No divide instruction either.  Use library for remainder.  */
4485           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4486                                          op0, op1, target,
4487                                          unsignedp, OPTAB_LIB_WIDEN);
4488           /* No remainder function.  Try a quotient-and-remainder
4489              function, keeping the remainder.  */
4490           if (!remainder)
4491             {
4492               remainder = gen_reg_rtx (compute_mode);
4493               if (!expand_twoval_binop_libfunc
4494                   (unsignedp ? udivmod_optab : sdivmod_optab,
4495                    op0, op1,
4496                    NULL_RTX, remainder,
4497                    unsignedp ? UMOD : MOD))
4498                 remainder = NULL_RTX;
4499             }
4500         }
4501       else
4502         {
4503           /* We divided.  Now finish doing X - Y * (X / Y).  */
4504           remainder = expand_mult (compute_mode, quotient, op1,
4505                                    NULL_RTX, unsignedp);
4506           remainder = expand_binop (compute_mode, sub_optab, op0,
4507                                     remainder, target, unsignedp,
4508                                     OPTAB_LIB_WIDEN);
4509         }
4510     }
4511
4512   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4513 }
4514 \f
4515 /* Return a tree node with data type TYPE, describing the value of X.
4516    Usually this is an VAR_DECL, if there is no obvious better choice.
4517    X may be an expression, however we only support those expressions
4518    generated by loop.c.  */
4519
4520 tree
4521 make_tree (tree type, rtx x)
4522 {
4523   tree t;
4524
4525   switch (GET_CODE (x))
4526     {
4527     case CONST_INT:
4528       {
4529         HOST_WIDE_INT hi = 0;
4530
4531         if (INTVAL (x) < 0
4532             && !(TYPE_UNSIGNED (type)
4533                  && (GET_MODE_BITSIZE (TYPE_MODE (type))
4534                      < HOST_BITS_PER_WIDE_INT)))
4535           hi = -1;
4536
4537         t = build_int_cst_wide (type, INTVAL (x), hi);
4538
4539         return t;
4540       }
4541
4542     case CONST_DOUBLE:
4543       if (GET_MODE (x) == VOIDmode)
4544         t = build_int_cst_wide (type,
4545                                 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4546       else
4547         {
4548           REAL_VALUE_TYPE d;
4549
4550           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4551           t = build_real (type, d);
4552         }
4553
4554       return t;
4555
4556     case CONST_VECTOR:
4557       {
4558         int i, units;
4559         rtx elt;
4560         tree t = NULL_TREE;
4561
4562         units = CONST_VECTOR_NUNITS (x);
4563
4564         /* Build a tree with vector elements.  */
4565         for (i = units - 1; i >= 0; --i)
4566           {
4567             elt = CONST_VECTOR_ELT (x, i);
4568             t = tree_cons (NULL_TREE, make_tree (type, elt), t);
4569           }
4570
4571         return build_vector (type, t);
4572       }
4573
4574     case PLUS:
4575       return fold (build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4576                            make_tree (type, XEXP (x, 1))));
4577
4578     case MINUS:
4579       return fold (build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4580                            make_tree (type, XEXP (x, 1))));
4581
4582     case NEG:
4583       return fold (build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0))));
4584
4585     case MULT:
4586       return fold (build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4587                            make_tree (type, XEXP (x, 1))));
4588
4589     case ASHIFT:
4590       return fold (build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4591                            make_tree (type, XEXP (x, 1))));
4592
4593     case LSHIFTRT:
4594       t = lang_hooks.types.unsigned_type (type);
4595       return fold (convert (type,
4596                             build2 (RSHIFT_EXPR, t,
4597                                     make_tree (t, XEXP (x, 0)),
4598                                     make_tree (type, XEXP (x, 1)))));
4599
4600     case ASHIFTRT:
4601       t = lang_hooks.types.signed_type (type);
4602       return fold (convert (type,
4603                             build2 (RSHIFT_EXPR, t,
4604                                     make_tree (t, XEXP (x, 0)),
4605                                     make_tree (type, XEXP (x, 1)))));
4606
4607     case DIV:
4608       if (TREE_CODE (type) != REAL_TYPE)
4609         t = lang_hooks.types.signed_type (type);
4610       else
4611         t = type;
4612
4613       return fold (convert (type,
4614                             build2 (TRUNC_DIV_EXPR, t,
4615                                     make_tree (t, XEXP (x, 0)),
4616                                     make_tree (t, XEXP (x, 1)))));
4617     case UDIV:
4618       t = lang_hooks.types.unsigned_type (type);
4619       return fold (convert (type,
4620                             build2 (TRUNC_DIV_EXPR, t,
4621                                     make_tree (t, XEXP (x, 0)),
4622                                     make_tree (t, XEXP (x, 1)))));
4623
4624     case SIGN_EXTEND:
4625     case ZERO_EXTEND:
4626       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
4627                                           GET_CODE (x) == ZERO_EXTEND);
4628       return fold (convert (type, make_tree (t, XEXP (x, 0))));
4629
4630     default:
4631       t = build_decl (VAR_DECL, NULL_TREE, type);
4632
4633       /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
4634          ptr_mode.  So convert.  */
4635       if (POINTER_TYPE_P (type))
4636         x = convert_memory_address (TYPE_MODE (type), x);
4637
4638       /* Note that we do *not* use SET_DECL_RTL here, because we do not
4639          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
4640       t->decl.rtl = x;
4641
4642       return t;
4643     }
4644 }
4645
4646 /* Check whether the multiplication X * MULT + ADD overflows.
4647    X, MULT and ADD must be CONST_*.
4648    MODE is the machine mode for the computation.
4649    X and MULT must have mode MODE.  ADD may have a different mode.
4650    So can X (defaults to same as MODE).
4651    UNSIGNEDP is nonzero to do unsigned multiplication.  */
4652
4653 bool
4654 const_mult_add_overflow_p (rtx x, rtx mult, rtx add,
4655                            enum machine_mode mode, int unsignedp)
4656 {
4657   tree type, mult_type, add_type, result;
4658
4659   type = lang_hooks.types.type_for_mode (mode, unsignedp);
4660
4661   /* In order to get a proper overflow indication from an unsigned
4662      type, we have to pretend that it's a sizetype.  */
4663   mult_type = type;
4664   if (unsignedp)
4665     {
4666       /* FIXME:It would be nice if we could step directly from this
4667          type to its sizetype equivalent.  */
4668       mult_type = build_distinct_type_copy (type);
4669       TYPE_IS_SIZETYPE (mult_type) = 1;
4670     }
4671
4672   add_type = (GET_MODE (add) == VOIDmode ? mult_type
4673               : lang_hooks.types.type_for_mode (GET_MODE (add), unsignedp));
4674
4675   result = fold (build2 (PLUS_EXPR, mult_type,
4676                          fold (build2 (MULT_EXPR, mult_type,
4677                                        make_tree (mult_type, x),
4678                                        make_tree (mult_type, mult))),
4679                          make_tree (add_type, add)));
4680
4681   return TREE_CONSTANT_OVERFLOW (result);
4682 }
4683
4684 /* Return an rtx representing the value of X * MULT + ADD.
4685    TARGET is a suggestion for where to store the result (an rtx).
4686    MODE is the machine mode for the computation.
4687    X and MULT must have mode MODE.  ADD may have a different mode.
4688    So can X (defaults to same as MODE).
4689    UNSIGNEDP is nonzero to do unsigned multiplication.
4690    This may emit insns.  */
4691
4692 rtx
4693 expand_mult_add (rtx x, rtx target, rtx mult, rtx add, enum machine_mode mode,
4694                  int unsignedp)
4695 {
4696   tree type = lang_hooks.types.type_for_mode (mode, unsignedp);
4697   tree add_type = (GET_MODE (add) == VOIDmode
4698                    ? type: lang_hooks.types.type_for_mode (GET_MODE (add),
4699                                                            unsignedp));
4700   tree result =  fold (build2 (PLUS_EXPR, type,
4701                                fold (build2 (MULT_EXPR, type,
4702                                              make_tree (type, x),
4703                                              make_tree (type, mult))),
4704                                make_tree (add_type, add)));
4705
4706   return expand_expr (result, target, VOIDmode, 0);
4707 }
4708 \f
4709 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
4710    and returning TARGET.
4711
4712    If TARGET is 0, a pseudo-register or constant is returned.  */
4713
4714 rtx
4715 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
4716 {
4717   rtx tem = 0;
4718
4719   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
4720     tem = simplify_binary_operation (AND, mode, op0, op1);
4721   if (tem == 0)
4722     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
4723
4724   if (target == 0)
4725     target = tem;
4726   else if (tem != target)
4727     emit_move_insn (target, tem);
4728   return target;
4729 }
4730 \f
4731 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
4732    and storing in TARGET.  Normally return TARGET.
4733    Return 0 if that cannot be done.
4734
4735    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
4736    it is VOIDmode, they cannot both be CONST_INT.
4737
4738    UNSIGNEDP is for the case where we have to widen the operands
4739    to perform the operation.  It says to use zero-extension.
4740
4741    NORMALIZEP is 1 if we should convert the result to be either zero
4742    or one.  Normalize is -1 if we should convert the result to be
4743    either zero or -1.  If NORMALIZEP is zero, the result will be left
4744    "raw" out of the scc insn.  */
4745
4746 rtx
4747 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
4748                  enum machine_mode mode, int unsignedp, int normalizep)
4749 {
4750   rtx subtarget;
4751   enum insn_code icode;
4752   enum machine_mode compare_mode;
4753   enum machine_mode target_mode = GET_MODE (target);
4754   rtx tem;
4755   rtx last = get_last_insn ();
4756   rtx pattern, comparison;
4757
4758   if (unsignedp)
4759     code = unsigned_condition (code);
4760
4761   /* If one operand is constant, make it the second one.  Only do this
4762      if the other operand is not constant as well.  */
4763
4764   if (swap_commutative_operands_p (op0, op1))
4765     {
4766       tem = op0;
4767       op0 = op1;
4768       op1 = tem;
4769       code = swap_condition (code);
4770     }
4771
4772   if (mode == VOIDmode)
4773     mode = GET_MODE (op0);
4774
4775   /* For some comparisons with 1 and -1, we can convert this to
4776      comparisons with zero.  This will often produce more opportunities for
4777      store-flag insns.  */
4778
4779   switch (code)
4780     {
4781     case LT:
4782       if (op1 == const1_rtx)
4783         op1 = const0_rtx, code = LE;
4784       break;
4785     case LE:
4786       if (op1 == constm1_rtx)
4787         op1 = const0_rtx, code = LT;
4788       break;
4789     case GE:
4790       if (op1 == const1_rtx)
4791         op1 = const0_rtx, code = GT;
4792       break;
4793     case GT:
4794       if (op1 == constm1_rtx)
4795         op1 = const0_rtx, code = GE;
4796       break;
4797     case GEU:
4798       if (op1 == const1_rtx)
4799         op1 = const0_rtx, code = NE;
4800       break;
4801     case LTU:
4802       if (op1 == const1_rtx)
4803         op1 = const0_rtx, code = EQ;
4804       break;
4805     default:
4806       break;
4807     }
4808
4809   /* If we are comparing a double-word integer with zero or -1, we can
4810      convert the comparison into one involving a single word.  */
4811   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
4812       && GET_MODE_CLASS (mode) == MODE_INT
4813       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
4814     {
4815       if ((code == EQ || code == NE)
4816           && (op1 == const0_rtx || op1 == constm1_rtx))
4817         {
4818           rtx op00, op01, op0both;
4819
4820           /* Do a logical OR or AND of the two words and compare the result.  */
4821           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
4822           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
4823           op0both = expand_binop (word_mode,
4824                                   op1 == const0_rtx ? ior_optab : and_optab,
4825                                   op00, op01, NULL_RTX, unsignedp, OPTAB_DIRECT);
4826
4827           if (op0both != 0)
4828             return emit_store_flag (target, code, op0both, op1, word_mode,
4829                                     unsignedp, normalizep);
4830         }
4831       else if ((code == LT || code == GE) && op1 == const0_rtx)
4832         {
4833           rtx op0h;
4834
4835           /* If testing the sign bit, can just test on high word.  */
4836           op0h = simplify_gen_subreg (word_mode, op0, mode,
4837                                       subreg_highpart_offset (word_mode, mode));
4838           return emit_store_flag (target, code, op0h, op1, word_mode,
4839                                   unsignedp, normalizep);
4840         }
4841     }
4842
4843   /* From now on, we won't change CODE, so set ICODE now.  */
4844   icode = setcc_gen_code[(int) code];
4845
4846   /* If this is A < 0 or A >= 0, we can do this by taking the ones
4847      complement of A (for GE) and shifting the sign bit to the low bit.  */
4848   if (op1 == const0_rtx && (code == LT || code == GE)
4849       && GET_MODE_CLASS (mode) == MODE_INT
4850       && (normalizep || STORE_FLAG_VALUE == 1
4851           || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4852               && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
4853                   == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
4854     {
4855       subtarget = target;
4856
4857       /* If the result is to be wider than OP0, it is best to convert it
4858          first.  If it is to be narrower, it is *incorrect* to convert it
4859          first.  */
4860       if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
4861         {
4862           op0 = convert_modes (target_mode, mode, op0, 0);
4863           mode = target_mode;
4864         }
4865
4866       if (target_mode != mode)
4867         subtarget = 0;
4868
4869       if (code == GE)
4870         op0 = expand_unop (mode, one_cmpl_optab, op0,
4871                            ((STORE_FLAG_VALUE == 1 || normalizep)
4872                             ? 0 : subtarget), 0);
4873
4874       if (STORE_FLAG_VALUE == 1 || normalizep)
4875         /* If we are supposed to produce a 0/1 value, we want to do
4876            a logical shift from the sign bit to the low-order bit; for
4877            a -1/0 value, we do an arithmetic shift.  */
4878         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
4879                             size_int (GET_MODE_BITSIZE (mode) - 1),
4880                             subtarget, normalizep != -1);
4881
4882       if (mode != target_mode)
4883         op0 = convert_modes (target_mode, mode, op0, 0);
4884
4885       return op0;
4886     }
4887
4888   if (icode != CODE_FOR_nothing)
4889     {
4890       insn_operand_predicate_fn pred;
4891
4892       /* We think we may be able to do this with a scc insn.  Emit the
4893          comparison and then the scc insn.  */
4894
4895       do_pending_stack_adjust ();
4896       last = get_last_insn ();
4897
4898       comparison
4899         = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX);
4900       if (CONSTANT_P (comparison))
4901         {
4902           if (GET_CODE (comparison) == CONST_INT)
4903             {
4904               if (comparison == const0_rtx)
4905                 return const0_rtx;
4906             }
4907 #ifdef FLOAT_STORE_FLAG_VALUE
4908           else if (GET_CODE (comparison) == CONST_DOUBLE)
4909             {
4910               if (comparison == CONST0_RTX (GET_MODE (comparison)))
4911                 return const0_rtx;
4912             }
4913 #endif
4914           else
4915             abort ();
4916           if (normalizep == 1)
4917             return const1_rtx;
4918           if (normalizep == -1)
4919             return constm1_rtx;
4920           return const_true_rtx;
4921         }
4922
4923       /* The code of COMPARISON may not match CODE if compare_from_rtx
4924          decided to swap its operands and reverse the original code.
4925
4926          We know that compare_from_rtx returns either a CONST_INT or
4927          a new comparison code, so it is safe to just extract the
4928          code from COMPARISON.  */
4929       code = GET_CODE (comparison);
4930
4931       /* Get a reference to the target in the proper mode for this insn.  */
4932       compare_mode = insn_data[(int) icode].operand[0].mode;
4933       subtarget = target;
4934       pred = insn_data[(int) icode].operand[0].predicate;
4935       if (optimize || ! (*pred) (subtarget, compare_mode))
4936         subtarget = gen_reg_rtx (compare_mode);
4937
4938       pattern = GEN_FCN (icode) (subtarget);
4939       if (pattern)
4940         {
4941           emit_insn (pattern);
4942
4943           /* If we are converting to a wider mode, first convert to
4944              TARGET_MODE, then normalize.  This produces better combining
4945              opportunities on machines that have a SIGN_EXTRACT when we are
4946              testing a single bit.  This mostly benefits the 68k.
4947
4948              If STORE_FLAG_VALUE does not have the sign bit set when
4949              interpreted in COMPARE_MODE, we can do this conversion as
4950              unsigned, which is usually more efficient.  */
4951           if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (compare_mode))
4952             {
4953               convert_move (target, subtarget,
4954                             (GET_MODE_BITSIZE (compare_mode)
4955                              <= HOST_BITS_PER_WIDE_INT)
4956                             && 0 == (STORE_FLAG_VALUE
4957                                      & ((HOST_WIDE_INT) 1
4958                                         << (GET_MODE_BITSIZE (compare_mode) -1))));
4959               op0 = target;
4960               compare_mode = target_mode;
4961             }
4962           else
4963             op0 = subtarget;
4964
4965           /* If we want to keep subexpressions around, don't reuse our
4966              last target.  */
4967
4968           if (optimize)
4969             subtarget = 0;
4970
4971           /* Now normalize to the proper value in COMPARE_MODE.  Sometimes
4972              we don't have to do anything.  */
4973           if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
4974             ;
4975           /* STORE_FLAG_VALUE might be the most negative number, so write
4976              the comparison this way to avoid a compiler-time warning.  */
4977           else if (- normalizep == STORE_FLAG_VALUE)
4978             op0 = expand_unop (compare_mode, neg_optab, op0, subtarget, 0);
4979
4980           /* We don't want to use STORE_FLAG_VALUE < 0 below since this
4981              makes it hard to use a value of just the sign bit due to
4982              ANSI integer constant typing rules.  */
4983           else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_WIDE_INT
4984                    && (STORE_FLAG_VALUE
4985                        & ((HOST_WIDE_INT) 1
4986                           << (GET_MODE_BITSIZE (compare_mode) - 1))))
4987             op0 = expand_shift (RSHIFT_EXPR, compare_mode, op0,
4988                                 size_int (GET_MODE_BITSIZE (compare_mode) - 1),
4989                                 subtarget, normalizep == 1);
4990           else if (STORE_FLAG_VALUE & 1)
4991             {
4992               op0 = expand_and (compare_mode, op0, const1_rtx, subtarget);
4993               if (normalizep == -1)
4994                 op0 = expand_unop (compare_mode, neg_optab, op0, op0, 0);
4995             }
4996           else
4997             abort ();
4998
4999           /* If we were converting to a smaller mode, do the
5000              conversion now.  */
5001           if (target_mode != compare_mode)
5002             {
5003               convert_move (target, op0, 0);
5004               return target;
5005             }
5006           else
5007             return op0;
5008         }
5009     }
5010
5011   delete_insns_since (last);
5012
5013   /* If optimizing, use different pseudo registers for each insn, instead
5014      of reusing the same pseudo.  This leads to better CSE, but slows
5015      down the compiler, since there are more pseudos */
5016   subtarget = (!optimize
5017                && (target_mode == mode)) ? target : NULL_RTX;
5018
5019   /* If we reached here, we can't do this with a scc insn.  However, there
5020      are some comparisons that can be done directly.  For example, if
5021      this is an equality comparison of integers, we can try to exclusive-or
5022      (or subtract) the two operands and use a recursive call to try the
5023      comparison with zero.  Don't do any of these cases if branches are
5024      very cheap.  */
5025
5026   if (BRANCH_COST > 0
5027       && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
5028       && op1 != const0_rtx)
5029     {
5030       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5031                           OPTAB_WIDEN);
5032
5033       if (tem == 0)
5034         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5035                             OPTAB_WIDEN);
5036       if (tem != 0)
5037         tem = emit_store_flag (target, code, tem, const0_rtx,
5038                                mode, unsignedp, normalizep);
5039       if (tem == 0)
5040         delete_insns_since (last);
5041       return tem;
5042     }
5043
5044   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5045      the constant zero.  Reject all other comparisons at this point.  Only
5046      do LE and GT if branches are expensive since they are expensive on
5047      2-operand machines.  */
5048
5049   if (BRANCH_COST == 0
5050       || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
5051       || (code != EQ && code != NE
5052           && (BRANCH_COST <= 1 || (code != LE && code != GT))))
5053     return 0;
5054
5055   /* See what we need to return.  We can only return a 1, -1, or the
5056      sign bit.  */
5057
5058   if (normalizep == 0)
5059     {
5060       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5061         normalizep = STORE_FLAG_VALUE;
5062
5063       else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5064                && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5065                    == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
5066         ;
5067       else
5068         return 0;
5069     }
5070
5071   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5072      do the necessary operation below.  */
5073
5074   tem = 0;
5075
5076   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5077      the sign bit set.  */
5078
5079   if (code == LE)
5080     {
5081       /* This is destructive, so SUBTARGET can't be OP0.  */
5082       if (rtx_equal_p (subtarget, op0))
5083         subtarget = 0;
5084
5085       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5086                           OPTAB_WIDEN);
5087       if (tem)
5088         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5089                             OPTAB_WIDEN);
5090     }
5091
5092   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5093      number of bits in the mode of OP0, minus one.  */
5094
5095   if (code == GT)
5096     {
5097       if (rtx_equal_p (subtarget, op0))
5098         subtarget = 0;
5099
5100       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5101                           size_int (GET_MODE_BITSIZE (mode) - 1),
5102                           subtarget, 0);
5103       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5104                           OPTAB_WIDEN);
5105     }
5106
5107   if (code == EQ || code == NE)
5108     {
5109       /* For EQ or NE, one way to do the comparison is to apply an operation
5110          that converts the operand into a positive number if it is nonzero
5111          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5112          for NE we negate.  This puts the result in the sign bit.  Then we
5113          normalize with a shift, if needed.
5114
5115          Two operations that can do the above actions are ABS and FFS, so try
5116          them.  If that doesn't work, and MODE is smaller than a full word,
5117          we can use zero-extension to the wider mode (an unsigned conversion)
5118          as the operation.  */
5119
5120       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5121          that is compensated by the subsequent overflow when subtracting
5122          one / negating.  */
5123
5124       if (abs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
5125         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5126       else if (ffs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
5127         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5128       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5129         {
5130           tem = convert_modes (word_mode, mode, op0, 1);
5131           mode = word_mode;
5132         }
5133
5134       if (tem != 0)
5135         {
5136           if (code == EQ)
5137             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5138                                 0, OPTAB_WIDEN);
5139           else
5140             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5141         }
5142
5143       /* If we couldn't do it that way, for NE we can "or" the two's complement
5144          of the value with itself.  For EQ, we take the one's complement of
5145          that "or", which is an extra insn, so we only handle EQ if branches
5146          are expensive.  */
5147
5148       if (tem == 0 && (code == NE || BRANCH_COST > 1))
5149         {
5150           if (rtx_equal_p (subtarget, op0))
5151             subtarget = 0;
5152
5153           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5154           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5155                               OPTAB_WIDEN);
5156
5157           if (tem && code == EQ)
5158             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5159         }
5160     }
5161
5162   if (tem && normalizep)
5163     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5164                         size_int (GET_MODE_BITSIZE (mode) - 1),
5165                         subtarget, normalizep == 1);
5166
5167   if (tem)
5168     {
5169       if (GET_MODE (tem) != target_mode)
5170         {
5171           convert_move (target, tem, 0);
5172           tem = target;
5173         }
5174       else if (!subtarget)
5175         {
5176           emit_move_insn (target, tem);
5177           tem = target;
5178         }
5179     }
5180   else
5181     delete_insns_since (last);
5182
5183   return tem;
5184 }
5185
5186 /* Like emit_store_flag, but always succeeds.  */
5187
5188 rtx
5189 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5190                        enum machine_mode mode, int unsignedp, int normalizep)
5191 {
5192   rtx tem, label;
5193
5194   /* First see if emit_store_flag can do the job.  */
5195   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5196   if (tem != 0)
5197     return tem;
5198
5199   if (normalizep == 0)
5200     normalizep = 1;
5201
5202   /* If this failed, we have to do this with set/compare/jump/set code.  */
5203
5204   if (!REG_P (target)
5205       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5206     target = gen_reg_rtx (GET_MODE (target));
5207
5208   emit_move_insn (target, const1_rtx);
5209   label = gen_label_rtx ();
5210   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5211                            NULL_RTX, label);
5212
5213   emit_move_insn (target, const0_rtx);
5214   emit_label (label);
5215
5216   return target;
5217 }
5218 \f
5219 /* Perform possibly multi-word comparison and conditional jump to LABEL
5220    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE
5221
5222    The algorithm is based on the code in expr.c:do_jump.
5223
5224    Note that this does not perform a general comparison.  Only variants
5225    generated within expmed.c are correctly handled, others abort (but could
5226    be handled if needed).  */
5227
5228 static void
5229 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5230                  rtx label)
5231 {
5232   /* If this mode is an integer too wide to compare properly,
5233      compare word by word.  Rely on cse to optimize constant cases.  */
5234
5235   if (GET_MODE_CLASS (mode) == MODE_INT
5236       && ! can_compare_p (op, mode, ccp_jump))
5237     {
5238       rtx label2 = gen_label_rtx ();
5239
5240       switch (op)
5241         {
5242         case LTU:
5243           do_jump_by_parts_greater_rtx (mode, 1, arg2, arg1, label2, label);
5244           break;
5245
5246         case LEU:
5247           do_jump_by_parts_greater_rtx (mode, 1, arg1, arg2, label, label2);
5248           break;
5249
5250         case LT:
5251           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label2, label);
5252           break;
5253
5254         case GT:
5255           do_jump_by_parts_greater_rtx (mode, 0, arg1, arg2, label2, label);
5256           break;
5257
5258         case GE:
5259           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label, label2);
5260           break;
5261
5262           /* do_jump_by_parts_equality_rtx compares with zero.  Luckily
5263              that's the only equality operations we do */
5264         case EQ:
5265           if (arg2 != const0_rtx || mode != GET_MODE(arg1))
5266             abort ();
5267           do_jump_by_parts_equality_rtx (arg1, label2, label);
5268           break;
5269
5270         case NE:
5271           if (arg2 != const0_rtx || mode != GET_MODE(arg1))
5272             abort ();
5273           do_jump_by_parts_equality_rtx (arg1, label, label2);
5274           break;
5275
5276         default:
5277           abort ();
5278         }
5279
5280       emit_label (label2);
5281     }
5282   else
5283     emit_cmp_and_jump_insns (arg1, arg2, op, NULL_RTX, mode, 0, label);
5284 }