gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 2, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING.  If not, write to the Free
  20 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  21 02111-1307, USA.  */
  22
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "coretypes.h"
  27 #include "tm.h"
  28 #include "toplev.h"
  29 #include "rtl.h"
  30 #include "tree.h"
  31 #include "tm_p.h"
  32 #include "flags.h"
  33 #include "insn-config.h"
  34 #include "expr.h"
  35 #include "optabs.h"
  36 #include "real.h"
  37 #include "recog.h"
  38 #include "langhooks.h"
  39
  40 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  41                                    unsigned HOST_WIDE_INT,
  42                                    unsigned HOST_WIDE_INT, rtx);
  43 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  44                                    unsigned HOST_WIDE_INT, rtx);
  45 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  46                                     unsigned HOST_WIDE_INT,
  47                                     unsigned HOST_WIDE_INT,
  48                                     unsigned HOST_WIDE_INT, rtx, int);
  49 static rtx mask_rtx (enum machine_mode, int, int, int);
  50 static rtx lshift_value (enum machine_mode, rtx, int, int);
  51 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  52                                     unsigned HOST_WIDE_INT, int);
  53 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  54 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  55 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  56
  57 /* Nonzero means divides or modulus operations are relatively cheap for
  58    powers of two, so don't use branches; emit the operation instead.
  59    Usually, this will mean that the MD file will emit non-branch
  60    sequences.  */
  61
  62 static int sdiv_pow2_cheap[NUM_MACHINE_MODES];
  63 static int smod_pow2_cheap[NUM_MACHINE_MODES];
  64
  65 #ifndef SLOW_UNALIGNED_ACCESS
  66 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  67 #endif
  68
  69 /* For compilers that support multiple targets with different word sizes,
  70    MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
  71    is the H8/300(H) compiler.  */
  72
  73 #ifndef MAX_BITS_PER_WORD
  74 #define MAX_BITS_PER_WORD BITS_PER_WORD
  75 #endif
  76
  77 /* Reduce conditional compilation elsewhere.  */
  78 #ifndef HAVE_insv
  79 #define HAVE_insv       0
  80 #define CODE_FOR_insv   CODE_FOR_nothing
  81 #define gen_insv(a,b,c,d) NULL_RTX
  82 #endif
  83 #ifndef HAVE_extv
  84 #define HAVE_extv       0
  85 #define CODE_FOR_extv   CODE_FOR_nothing
  86 #define gen_extv(a,b,c,d) NULL_RTX
  87 #endif
  88 #ifndef HAVE_extzv
  89 #define HAVE_extzv      0
  90 #define CODE_FOR_extzv  CODE_FOR_nothing
  91 #define gen_extzv(a,b,c,d) NULL_RTX
  92 #endif
  93
  94 /* Cost of various pieces of RTL.  Note that some of these are indexed by
  95    shift count and some by mode.  */
  96 static int zero_cost;
  97 static int add_cost[NUM_MACHINE_MODES];
  98 static int neg_cost[NUM_MACHINE_MODES];
  99 static int shift_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 100 static int shiftadd_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 101 static int shiftsub_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 102 static int mul_cost[NUM_MACHINE_MODES];
 103 static int div_cost[NUM_MACHINE_MODES];
 104 static int mul_widen_cost[NUM_MACHINE_MODES];
 105 static int mul_highpart_cost[NUM_MACHINE_MODES];
 106
 107 void
 108 init_expmed (void)
 109 {
 110   struct
 111   {
 112     struct rtx_def reg;
 113     struct rtx_def plus;        rtunion plus_fld1;
 114     struct rtx_def neg;
 115     struct rtx_def udiv;        rtunion udiv_fld1;
 116     struct rtx_def mult;        rtunion mult_fld1;
 117     struct rtx_def div;         rtunion div_fld1;
 118     struct rtx_def mod;         rtunion mod_fld1;
 119     struct rtx_def zext;
 120     struct rtx_def wide_mult;   rtunion wide_mult_fld1;
 121     struct rtx_def wide_lshr;   rtunion wide_lshr_fld1;
 122     struct rtx_def wide_trunc;
 123     struct rtx_def shift;       rtunion shift_fld1;
 124     struct rtx_def shift_mult;  rtunion shift_mult_fld1;
 125     struct rtx_def shift_add;   rtunion shift_add_fld1;
 126     struct rtx_def shift_sub;   rtunion shift_sub_fld1;
 127   } all;
 128
 129   rtx pow2[MAX_BITS_PER_WORD];
 130   rtx cint[MAX_BITS_PER_WORD];
 131   int m, n;
 132   enum machine_mode mode, wider_mode;
 133
 134   zero_cost = rtx_cost (const0_rtx, 0);
 135
 136   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 137     {
 138       pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 139       cint[m] = GEN_INT (m);
 140     }
 141
 142   memset (&all, 0, sizeof all);
 143
 144   PUT_CODE (&all.reg, REG);
 145   REGNO (&all.reg) = 10000;
 146
 147   PUT_CODE (&all.plus, PLUS);
 148   XEXP (&all.plus, 0) = &all.reg;
 149   XEXP (&all.plus, 1) = &all.reg;
 150
 151   PUT_CODE (&all.neg, NEG);
 152   XEXP (&all.neg, 0) = &all.reg;
 153
 154   PUT_CODE (&all.udiv, UDIV);
 155   XEXP (&all.udiv, 0) = &all.reg;
 156   XEXP (&all.udiv, 1) = &all.reg;
 157
 158   PUT_CODE (&all.mult, MULT);
 159   XEXP (&all.mult, 0) = &all.reg;
 160   XEXP (&all.mult, 1) = &all.reg;
 161
 162   PUT_CODE (&all.div, DIV);
 163   XEXP (&all.div, 0) = &all.reg;
 164   XEXP (&all.div, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
 165
 166   PUT_CODE (&all.mod, MOD);
 167   XEXP (&all.mod, 0) = &all.reg;
 168   XEXP (&all.mod, 1) = XEXP (&all.div, 1);
 169
 170   PUT_CODE (&all.zext, ZERO_EXTEND);
 171   XEXP (&all.zext, 0) = &all.reg;
 172
 173   PUT_CODE (&all.wide_mult, MULT);
 174   XEXP (&all.wide_mult, 0) = &all.zext;
 175   XEXP (&all.wide_mult, 1) = &all.zext;
 176
 177   PUT_CODE (&all.wide_lshr, LSHIFTRT);
 178   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
 179
 180   PUT_CODE (&all.wide_trunc, TRUNCATE);
 181   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
 182
 183   PUT_CODE (&all.shift, ASHIFT);
 184   XEXP (&all.shift, 0) = &all.reg;
 185
 186   PUT_CODE (&all.shift_mult, MULT);
 187   XEXP (&all.shift_mult, 0) = &all.reg;
 188
 189   PUT_CODE (&all.shift_add, PLUS);
 190   XEXP (&all.shift_add, 0) = &all.shift_mult;
 191   XEXP (&all.shift_add, 1) = &all.reg;
 192
 193   PUT_CODE (&all.shift_sub, MINUS);
 194   XEXP (&all.shift_sub, 0) = &all.shift_mult;
 195   XEXP (&all.shift_sub, 1) = &all.reg;
 196
 197   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 198        mode != VOIDmode;
 199        mode = GET_MODE_WIDER_MODE (mode))
 200     {
 201       PUT_MODE (&all.reg, mode);
 202       PUT_MODE (&all.plus, mode);
 203       PUT_MODE (&all.neg, mode);
 204       PUT_MODE (&all.udiv, mode);
 205       PUT_MODE (&all.mult, mode);
 206       PUT_MODE (&all.div, mode);
 207       PUT_MODE (&all.mod, mode);
 208       PUT_MODE (&all.wide_trunc, mode);
 209       PUT_MODE (&all.shift, mode);
 210       PUT_MODE (&all.shift_mult, mode);
 211       PUT_MODE (&all.shift_add, mode);
 212       PUT_MODE (&all.shift_sub, mode);
 213
 214       add_cost[mode] = rtx_cost (&all.plus, SET);
 215       neg_cost[mode] = rtx_cost (&all.neg, SET);
 216       div_cost[mode] = rtx_cost (&all.udiv, SET);
 217       mul_cost[mode] = rtx_cost (&all.mult, SET);
 218
 219       sdiv_pow2_cheap[mode] = (rtx_cost (&all.div, SET) <= 2 * add_cost[mode]);
 220       smod_pow2_cheap[mode] = (rtx_cost (&all.mod, SET) <= 4 * add_cost[mode]);
 221
 222       wider_mode = GET_MODE_WIDER_MODE (mode);
 223       if (wider_mode != VOIDmode)
 224         {
 225           PUT_MODE (&all.zext, wider_mode);
 226           PUT_MODE (&all.wide_mult, wider_mode);
 227           PUT_MODE (&all.wide_lshr, wider_mode);
 228           XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
 229
 230           mul_widen_cost[wider_mode] = rtx_cost (&all.wide_mult, SET);
 231           mul_highpart_cost[mode] = rtx_cost (&all.wide_trunc, SET);
 232         }
 233
 234       shift_cost[mode][0] = 0;
 235       shiftadd_cost[mode][0] = shiftsub_cost[mode][0] = add_cost[mode];
 236
 237       n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
 238       for (m = 1; m < n; m++)
 239         {
 240           XEXP (&all.shift, 1) = cint[m];
 241           XEXP (&all.shift_mult, 1) = pow2[m];
 242
 243           shift_cost[mode][m] = rtx_cost (&all.shift, SET);
 244           shiftadd_cost[mode][m] = rtx_cost (&all.shift_add, SET);
 245           shiftsub_cost[mode][m] = rtx_cost (&all.shift_sub, SET);
 246         }
 247     }
 248 }
 249
 250 /* Return an rtx representing minus the value of X.
 251    MODE is the intended mode of the result,
 252    useful if X is a CONST_INT.  */
 253
 254 rtx
 255 negate_rtx (enum machine_mode mode, rtx x)
 256 {
 257   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 258
 259   if (result == 0)
 260     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 261
 262   return result;
 263 }
 264
 265 /* Report on the availability of insv/extv/extzv and the desired mode
 266    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
 267    is false; else the mode of the specified operand.  If OPNO is -1,
 268    all the caller cares about is whether the insn is available.  */
 269 enum machine_mode
 270 mode_for_extraction (enum extraction_pattern pattern, int opno)
 271 {
 272   const struct insn_data *data;
 273
 274   switch (pattern)
 275     {
 276     case EP_insv:
 277       if (HAVE_insv)
 278         {
 279           data = &insn_data[CODE_FOR_insv];
 280           break;
 281         }
 282       return MAX_MACHINE_MODE;
 283
 284     case EP_extv:
 285       if (HAVE_extv)
 286         {
 287           data = &insn_data[CODE_FOR_extv];
 288           break;
 289         }
 290       return MAX_MACHINE_MODE;
 291
 292     case EP_extzv:
 293       if (HAVE_extzv)
 294         {
 295           data = &insn_data[CODE_FOR_extzv];
 296           break;
 297         }
 298       return MAX_MACHINE_MODE;
 299
 300     default:
 301       abort ();
 302     }
 303
 304   if (opno == -1)
 305     return VOIDmode;
 306
 307   /* Everyone who uses this function used to follow it with
 308      if (result == VOIDmode) result = word_mode; */
 309   if (data->operand[opno].mode == VOIDmode)
 310     return word_mode;
 311   return data->operand[opno].mode;
 312 }
 313
 314 \f
 315 /* Generate code to store value from rtx VALUE
 316    into a bit-field within structure STR_RTX
 317    containing BITSIZE bits starting at bit BITNUM.
 318    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
 319    ALIGN is the alignment that STR_RTX is known to have.
 320    TOTAL_SIZE is the size of the structure in bytes, or -1 if varying.  */
 321
 322 /* ??? Note that there are two different ideas here for how
 323    to determine the size to count bits within, for a register.
 324    One is BITS_PER_WORD, and the other is the size of operand 3
 325    of the insv pattern.
 326
 327    If operand 3 of the insv pattern is VOIDmode, then we will use BITS_PER_WORD
 328    else, we use the mode of operand 3.  */
 329
 330 rtx
 331 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 332                  unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
 333                  rtx value)
 334 {
 335   unsigned int unit
 336     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
 337   unsigned HOST_WIDE_INT offset = bitnum / unit;
 338   unsigned HOST_WIDE_INT bitpos = bitnum % unit;
 339   rtx op0 = str_rtx;
 340   int byte_offset;
 341
 342   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
 343
 344   while (GET_CODE (op0) == SUBREG)
 345     {
 346       /* The following line once was done only if WORDS_BIG_ENDIAN,
 347          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 348          meaningful at a much higher level; when structures are copied
 349          between memory and regs, the higher-numbered regs
 350          always get higher addresses.  */
 351       offset += (SUBREG_BYTE (op0) / UNITS_PER_WORD);
 352       /* We used to adjust BITPOS here, but now we do the whole adjustment
 353          right after the loop.  */
 354       op0 = SUBREG_REG (op0);
 355     }
 356
 357   /* Use vec_set patterns for inserting parts of vectors whenever
 358      available.  */
 359   if (VECTOR_MODE_P (GET_MODE (op0))
 360       && !MEM_P (op0)
 361       && (vec_set_optab->handlers[GET_MODE (op0)].insn_code
 362           != CODE_FOR_nothing)
 363       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 364       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 365       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 366     {
 367       enum machine_mode outermode = GET_MODE (op0);
 368       enum machine_mode innermode = GET_MODE_INNER (outermode);
 369       int icode = (int) vec_set_optab->handlers[outermode].insn_code;
 370       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 371       rtx rtxpos = GEN_INT (pos);
 372       rtx src = value;
 373       rtx dest = op0;
 374       rtx pat, seq;
 375       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
 376       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
 377       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
 378
 379       start_sequence ();
 380
 381       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
 382         src = copy_to_mode_reg (mode1, src);
 383
 384       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
 385         rtxpos = copy_to_mode_reg (mode1, rtxpos);
 386
 387       /* We could handle this, but we should always be called with a pseudo
 388          for our targets and all insns should take them as outputs.  */
 389       if (! (*insn_data[icode].operand[0].predicate) (dest, mode0)
 390           || ! (*insn_data[icode].operand[1].predicate) (src, mode1)
 391           || ! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
 392         abort ();
 393       pat = GEN_FCN (icode) (dest, src, rtxpos);
 394       seq = get_insns ();
 395       end_sequence ();
 396       if (pat)
 397         {
 398           emit_insn (seq);
 399           emit_insn (pat);
 400           return dest;
 401         }
 402     }
 403
 404   if (flag_force_mem)
 405     {
 406       int old_generating_concat_p = generating_concat_p;
 407       generating_concat_p = 0;
 408       value = force_not_mem (value);
 409       generating_concat_p = old_generating_concat_p;
 410     }
 411
 412   /* If the target is a register, overwriting the entire object, or storing
 413      a full-word or multi-word field can be done with just a SUBREG.
 414
 415      If the target is memory, storing any naturally aligned field can be
 416      done with a simple store.  For targets that support fast unaligned
 417      memory, any naturally sized, unit aligned field can be done directly.  */
 418
 419   byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 420                 + (offset * UNITS_PER_WORD);
 421
 422   if (bitpos == 0
 423       && bitsize == GET_MODE_BITSIZE (fieldmode)
 424       && (!MEM_P (op0)
 425           ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 426              || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
 427              && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
 428           : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 429              || (offset * BITS_PER_UNIT % bitsize == 0
 430                  && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
 431     {
 432       if (GET_MODE (op0) != fieldmode)
 433         {
 434           if (GET_CODE (op0) == SUBREG)
 435             {
 436               if (GET_MODE (SUBREG_REG (op0)) == fieldmode
 437                   || GET_MODE_CLASS (fieldmode) == MODE_INT
 438                   || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT)
 439                 op0 = SUBREG_REG (op0);
 440               else
 441                 /* Else we've got some float mode source being extracted into
 442                    a different float mode destination -- this combination of
 443                    subregs results in Severe Tire Damage.  */
 444                 abort ();
 445             }
 446           if (REG_P (op0))
 447             op0 = gen_rtx_SUBREG (fieldmode, op0, byte_offset);
 448           else
 449             op0 = adjust_address (op0, fieldmode, offset);
 450         }
 451       emit_move_insn (op0, value);
 452       return value;
 453     }
 454
 455   /* Make sure we are playing with integral modes.  Pun with subregs
 456      if we aren't.  This must come after the entire register case above,
 457      since that case is valid for any mode.  The following cases are only
 458      valid for integral modes.  */
 459   {
 460     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 461     if (imode != GET_MODE (op0))
 462       {
 463         if (MEM_P (op0))
 464           op0 = adjust_address (op0, imode, 0);
 465         else if (imode != BLKmode)
 466           op0 = gen_lowpart (imode, op0);
 467         else
 468           abort ();
 469       }
 470   }
 471
 472   /* We may be accessing data outside the field, which means
 473      we can alias adjacent data.  */
 474   if (MEM_P (op0))
 475     {
 476       op0 = shallow_copy_rtx (op0);
 477       set_mem_alias_set (op0, 0);
 478       set_mem_expr (op0, 0);
 479     }
 480
 481   /* If OP0 is a register, BITPOS must count within a word.
 482      But as we have it, it counts within whatever size OP0 now has.
 483      On a bigendian machine, these are not the same, so convert.  */
 484   if (BYTES_BIG_ENDIAN
 485       && !MEM_P (op0)
 486       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 487     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 488
 489   /* Storing an lsb-aligned field in a register
 490      can be done with a movestrict instruction.  */
 491
 492   if (!MEM_P (op0)
 493       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 494       && bitsize == GET_MODE_BITSIZE (fieldmode)
 495       && (movstrict_optab->handlers[fieldmode].insn_code
 496           != CODE_FOR_nothing))
 497     {
 498       int icode = movstrict_optab->handlers[fieldmode].insn_code;
 499
 500       /* Get appropriate low part of the value being stored.  */
 501       if (GET_CODE (value) == CONST_INT || REG_P (value))
 502         value = gen_lowpart (fieldmode, value);
 503       else if (!(GET_CODE (value) == SYMBOL_REF
 504                  || GET_CODE (value) == LABEL_REF
 505                  || GET_CODE (value) == CONST))
 506         value = convert_to_mode (fieldmode, value, 0);
 507
 508       if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
 509         value = copy_to_mode_reg (fieldmode, value);
 510
 511       if (GET_CODE (op0) == SUBREG)
 512         {
 513           if (GET_MODE (SUBREG_REG (op0)) == fieldmode
 514               || GET_MODE_CLASS (fieldmode) == MODE_INT
 515               || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT)
 516             op0 = SUBREG_REG (op0);
 517           else
 518             /* Else we've got some float mode source being extracted into
 519                a different float mode destination -- this combination of
 520                subregs results in Severe Tire Damage.  */
 521             abort ();
 522         }
 523
 524       emit_insn (GEN_FCN (icode)
 525                  (gen_rtx_SUBREG (fieldmode, op0,
 526                                   (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 527                                   + (offset * UNITS_PER_WORD)),
 528                                   value));
 529
 530       return value;
 531     }
 532
 533   /* Handle fields bigger than a word.  */
 534
 535   if (bitsize > BITS_PER_WORD)
 536     {
 537       /* Here we transfer the words of the field
 538          in the order least significant first.
 539          This is because the most significant word is the one which may
 540          be less than full.
 541          However, only do that if the value is not BLKmode.  */
 542
 543       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 544       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 545       unsigned int i;
 546
 547       /* This is the mode we must force value to, so that there will be enough
 548          subwords to extract.  Note that fieldmode will often (always?) be
 549          VOIDmode, because that is what store_field uses to indicate that this
 550          is a bit field, but passing VOIDmode to operand_subword_force will
 551          result in an abort.  */
 552       fieldmode = GET_MODE (value);
 553       if (fieldmode == VOIDmode)
 554         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 555
 556       for (i = 0; i < nwords; i++)
 557         {
 558           /* If I is 0, use the low-order word in both field and target;
 559              if I is 1, use the next to lowest word; and so on.  */
 560           unsigned int wordnum = (backwards ? nwords - i - 1 : i);
 561           unsigned int bit_offset = (backwards
 562                                      ? MAX ((int) bitsize - ((int) i + 1)
 563                                             * BITS_PER_WORD,
 564                                             0)
 565                                      : (int) i * BITS_PER_WORD);
 566
 567           store_bit_field (op0, MIN (BITS_PER_WORD,
 568                                      bitsize - i * BITS_PER_WORD),
 569                            bitnum + bit_offset, word_mode,
 570                            operand_subword_force (value, wordnum, fieldmode));
 571         }
 572       return value;
 573     }
 574
 575   /* From here on we can assume that the field to be stored in is
 576      a full-word (whatever type that is), since it is shorter than a word.  */
 577
 578   /* OFFSET is the number of words or bytes (UNIT says which)
 579      from STR_RTX to the first word or byte containing part of the field.  */
 580
 581   if (!MEM_P (op0))
 582     {
 583       if (offset != 0
 584           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 585         {
 586           if (!REG_P (op0))
 587             {
 588               /* Since this is a destination (lvalue), we can't copy it to a
 589                  pseudo.  We can trivially remove a SUBREG that does not
 590                  change the size of the operand.  Such a SUBREG may have been
 591                  added above.  Otherwise, abort.  */
 592               if (GET_CODE (op0) == SUBREG
 593                   && (GET_MODE_SIZE (GET_MODE (op0))
 594                       == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))))
 595                 op0 = SUBREG_REG (op0);
 596               else
 597                 abort ();
 598             }
 599           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 600                                 op0, (offset * UNITS_PER_WORD));
 601         }
 602       offset = 0;
 603     }
 604
 605   /* If VALUE is a floating-point mode, access it as an integer of the
 606      corresponding size.  This can occur on a machine with 64 bit registers
 607      that uses SFmode for float.  This can also occur for unaligned float
 608      structure fields.  */
 609   if (GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 610       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 611     value = gen_lowpart ((GET_MODE (value) == VOIDmode
 612                           ? word_mode : int_mode_for_mode (GET_MODE (value))),
 613                          value);
 614
 615   /* Now OFFSET is nonzero only if OP0 is memory
 616      and is therefore always measured in bytes.  */
 617
 618   if (HAVE_insv
 619       && GET_MODE (value) != BLKmode
 620       && !(bitsize == 1 && GET_CODE (value) == CONST_INT)
 621       /* Ensure insv's size is wide enough for this field.  */
 622       && (GET_MODE_BITSIZE (op_mode) >= bitsize)
 623       && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
 624             && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))))
 625     {
 626       int xbitpos = bitpos;
 627       rtx value1;
 628       rtx xop0 = op0;
 629       rtx last = get_last_insn ();
 630       rtx pat;
 631       enum machine_mode maxmode = mode_for_extraction (EP_insv, 3);
 632       int save_volatile_ok = volatile_ok;
 633
 634       volatile_ok = 1;
 635
 636       /* If this machine's insv can only insert into a register, copy OP0
 637          into a register and save it back later.  */
 638       /* This used to check flag_force_mem, but that was a serious
 639          de-optimization now that flag_force_mem is enabled by -O2.  */
 640       if (MEM_P (op0)
 641           && ! ((*insn_data[(int) CODE_FOR_insv].operand[0].predicate)
 642                 (op0, VOIDmode)))
 643         {
 644           rtx tempreg;
 645           enum machine_mode bestmode;
 646
 647           /* Get the mode to use for inserting into this field.  If OP0 is
 648              BLKmode, get the smallest mode consistent with the alignment. If
 649              OP0 is a non-BLKmode object that is no wider than MAXMODE, use its
 650              mode. Otherwise, use the smallest mode containing the field.  */
 651
 652           if (GET_MODE (op0) == BLKmode
 653               || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (maxmode))
 654             bestmode
 655               = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0), maxmode,
 656                                MEM_VOLATILE_P (op0));
 657           else
 658             bestmode = GET_MODE (op0);
 659
 660           if (bestmode == VOIDmode
 661               || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
 662                   && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
 663             goto insv_loses;
 664
 665           /* Adjust address to point to the containing unit of that mode.
 666              Compute offset as multiple of this unit, counting in bytes.  */
 667           unit = GET_MODE_BITSIZE (bestmode);
 668           offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 669           bitpos = bitnum % unit;
 670           op0 = adjust_address (op0, bestmode,  offset);
 671
 672           /* Fetch that unit, store the bitfield in it, then store
 673              the unit.  */
 674           tempreg = copy_to_reg (op0);
 675           store_bit_field (tempreg, bitsize, bitpos, fieldmode, value);
 676           emit_move_insn (op0, tempreg);
 677           return value;
 678         }
 679       volatile_ok = save_volatile_ok;
 680
 681       /* Add OFFSET into OP0's address.  */
 682       if (MEM_P (xop0))
 683         xop0 = adjust_address (xop0, byte_mode, offset);
 684
 685       /* If xop0 is a register, we need it in MAXMODE
 686          to make it acceptable to the format of insv.  */
 687       if (GET_CODE (xop0) == SUBREG)
 688         /* We can't just change the mode, because this might clobber op0,
 689            and we will need the original value of op0 if insv fails.  */
 690         xop0 = gen_rtx_SUBREG (maxmode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 691       if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
 692         xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
 693
 694       /* On big-endian machines, we count bits from the most significant.
 695          If the bit field insn does not, we must invert.  */
 696
 697       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 698         xbitpos = unit - bitsize - xbitpos;
 699
 700       /* We have been counting XBITPOS within UNIT.
 701          Count instead within the size of the register.  */
 702       if (BITS_BIG_ENDIAN && !MEM_P (xop0))
 703         xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
 704
 705       unit = GET_MODE_BITSIZE (maxmode);
 706
 707       /* Convert VALUE to maxmode (which insv insn wants) in VALUE1.  */
 708       value1 = value;
 709       if (GET_MODE (value) != maxmode)
 710         {
 711           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 712             {
 713               /* Optimization: Don't bother really extending VALUE
 714                  if it has all the bits we will actually use.  However,
 715                  if we must narrow it, be sure we do it correctly.  */
 716
 717               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (maxmode))
 718                 {
 719                   rtx tmp;
 720
 721                   tmp = simplify_subreg (maxmode, value1, GET_MODE (value), 0);
 722                   if (! tmp)
 723                     tmp = simplify_gen_subreg (maxmode,
 724                                                force_reg (GET_MODE (value),
 725                                                           value1),
 726                                                GET_MODE (value), 0);
 727                   value1 = tmp;
 728                 }
 729               else
 730                 value1 = gen_lowpart (maxmode, value1);
 731             }
 732           else if (GET_CODE (value) == CONST_INT)
 733             value1 = gen_int_mode (INTVAL (value), maxmode);
 734           else if (!CONSTANT_P (value))
 735             /* Parse phase is supposed to make VALUE's data type
 736                match that of the component reference, which is a type
 737                at least as wide as the field; so VALUE should have
 738                a mode that corresponds to that type.  */
 739             abort ();
 740         }
 741
 742       /* If this machine's insv insists on a register,
 743          get VALUE1 into a register.  */
 744       if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
 745              (value1, maxmode)))
 746         value1 = force_reg (maxmode, value1);
 747
 748       pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
 749       if (pat)
 750         emit_insn (pat);
 751       else
 752         {
 753           delete_insns_since (last);
 754           store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 755         }
 756     }
 757   else
 758     insv_loses:
 759     /* Insv is not available; store using shifts and boolean ops.  */
 760     store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 761   return value;
 762 }
 763 \f
 764 /* Use shifts and boolean operations to store VALUE
 765    into a bit field of width BITSIZE
 766    in a memory location specified by OP0 except offset by OFFSET bytes.
 767      (OFFSET must be 0 if OP0 is a register.)
 768    The field starts at position BITPOS within the byte.
 769     (If OP0 is a register, it may be a full word or a narrower mode,
 770      but BITPOS still counts within a full word,
 771      which is significant on bigendian machines.)  */
 772
 773 static void
 774 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
 775                        unsigned HOST_WIDE_INT bitsize,
 776                        unsigned HOST_WIDE_INT bitpos, rtx value)
 777 {
 778   enum machine_mode mode;
 779   unsigned int total_bits = BITS_PER_WORD;
 780   rtx subtarget, temp;
 781   int all_zero = 0;
 782   int all_one = 0;
 783
 784   /* There is a case not handled here:
 785      a structure with a known alignment of just a halfword
 786      and a field split across two aligned halfwords within the structure.
 787      Or likewise a structure with a known alignment of just a byte
 788      and a field split across two bytes.
 789      Such cases are not supposed to be able to occur.  */
 790
 791   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 792     {
 793       if (offset != 0)
 794         abort ();
 795       /* Special treatment for a bit field split across two registers.  */
 796       if (bitsize + bitpos > BITS_PER_WORD)
 797         {
 798           store_split_bit_field (op0, bitsize, bitpos, value);
 799           return;
 800         }
 801     }
 802   else
 803     {
 804       /* Get the proper mode to use for this field.  We want a mode that
 805          includes the entire field.  If such a mode would be larger than
 806          a word, we won't be doing the extraction the normal way.
 807          We don't want a mode bigger than the destination.  */
 808
 809       mode = GET_MODE (op0);
 810       if (GET_MODE_BITSIZE (mode) == 0
 811           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 812         mode = word_mode;
 813       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 814                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 815
 816       if (mode == VOIDmode)
 817         {
 818           /* The only way this should occur is if the field spans word
 819              boundaries.  */
 820           store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
 821                                  value);
 822           return;
 823         }
 824
 825       total_bits = GET_MODE_BITSIZE (mode);
 826
 827       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 828          be in the range 0 to total_bits-1, and put any excess bytes in
 829          OFFSET.  */
 830       if (bitpos >= total_bits)
 831         {
 832           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 833           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 834                      * BITS_PER_UNIT);
 835         }
 836
 837       /* Get ref to an aligned byte, halfword, or word containing the field.
 838          Adjust BITPOS to be position within a word,
 839          and OFFSET to be the offset of that word.
 840          Then alter OP0 to refer to that word.  */
 841       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 842       offset -= (offset % (total_bits / BITS_PER_UNIT));
 843       op0 = adjust_address (op0, mode, offset);
 844     }
 845
 846   mode = GET_MODE (op0);
 847
 848   /* Now MODE is either some integral mode for a MEM as OP0,
 849      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 850      The bit field is contained entirely within OP0.
 851      BITPOS is the starting bit number within OP0.
 852      (OP0's mode may actually be narrower than MODE.)  */
 853
 854   if (BYTES_BIG_ENDIAN)
 855       /* BITPOS is the distance between our msb
 856          and that of the containing datum.
 857          Convert it to the distance from the lsb.  */
 858       bitpos = total_bits - bitsize - bitpos;
 859
 860   /* Now BITPOS is always the distance between our lsb
 861      and that of OP0.  */
 862
 863   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 864      we must first convert its mode to MODE.  */
 865
 866   if (GET_CODE (value) == CONST_INT)
 867     {
 868       HOST_WIDE_INT v = INTVAL (value);
 869
 870       if (bitsize < HOST_BITS_PER_WIDE_INT)
 871         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 872
 873       if (v == 0)
 874         all_zero = 1;
 875       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 876                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 877                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 878         all_one = 1;
 879
 880       value = lshift_value (mode, value, bitpos, bitsize);
 881     }
 882   else
 883     {
 884       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 885                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 886
 887       if (GET_MODE (value) != mode)
 888         {
 889           if ((REG_P (value) || GET_CODE (value) == SUBREG)
 890               && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
 891             value = gen_lowpart (mode, value);
 892           else
 893             value = convert_to_mode (mode, value, 1);
 894         }
 895
 896       if (must_and)
 897         value = expand_binop (mode, and_optab, value,
 898                               mask_rtx (mode, 0, bitsize, 0),
 899                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
 900       if (bitpos > 0)
 901         value = expand_shift (LSHIFT_EXPR, mode, value,
 902                               build_int_2 (bitpos, 0), NULL_RTX, 1);
 903     }
 904
 905   /* Now clear the chosen bits in OP0,
 906      except that if VALUE is -1 we need not bother.  */
 907
 908   subtarget = (REG_P (op0) || ! flag_force_mem) ? op0 : 0;
 909
 910   if (! all_one)
 911     {
 912       temp = expand_binop (mode, and_optab, op0,
 913                            mask_rtx (mode, bitpos, bitsize, 1),
 914                            subtarget, 1, OPTAB_LIB_WIDEN);
 915       subtarget = temp;
 916     }
 917   else
 918     temp = op0;
 919
 920   /* Now logical-or VALUE into OP0, unless it is zero.  */
 921
 922   if (! all_zero)
 923     temp = expand_binop (mode, ior_optab, temp, value,
 924                          subtarget, 1, OPTAB_LIB_WIDEN);
 925   if (op0 != temp)
 926     emit_move_insn (op0, temp);
 927 }
 928 \f
 929 /* Store a bit field that is split across multiple accessible memory objects.
 930
 931    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
 932    BITSIZE is the field width; BITPOS the position of its first bit
 933    (within the word).
 934    VALUE is the value to store.
 935
 936    This does not yet handle fields wider than BITS_PER_WORD.  */
 937
 938 static void
 939 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
 940                        unsigned HOST_WIDE_INT bitpos, rtx value)
 941 {
 942   unsigned int unit;
 943   unsigned int bitsdone = 0;
 944
 945   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
 946      much at a time.  */
 947   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 948     unit = BITS_PER_WORD;
 949   else
 950     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
 951
 952   /* If VALUE is a constant other than a CONST_INT, get it into a register in
 953      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
 954      that VALUE might be a floating-point constant.  */
 955   if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
 956     {
 957       rtx word = gen_lowpart_common (word_mode, value);
 958
 959       if (word && (value != word))
 960         value = word;
 961       else
 962         value = gen_lowpart_common (word_mode,
 963                                     force_reg (GET_MODE (value) != VOIDmode
 964                                                ? GET_MODE (value)
 965                                                : word_mode, value));
 966     }
 967
 968   while (bitsdone < bitsize)
 969     {
 970       unsigned HOST_WIDE_INT thissize;
 971       rtx part, word;
 972       unsigned HOST_WIDE_INT thispos;
 973       unsigned HOST_WIDE_INT offset;
 974
 975       offset = (bitpos + bitsdone) / unit;
 976       thispos = (bitpos + bitsdone) % unit;
 977
 978       /* THISSIZE must not overrun a word boundary.  Otherwise,
 979          store_fixed_bit_field will call us again, and we will mutually
 980          recurse forever.  */
 981       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
 982       thissize = MIN (thissize, unit - thispos);
 983
 984       if (BYTES_BIG_ENDIAN)
 985         {
 986           int total_bits;
 987
 988           /* We must do an endian conversion exactly the same way as it is
 989              done in extract_bit_field, so that the two calls to
 990              extract_fixed_bit_field will have comparable arguments.  */
 991           if (!MEM_P (value) || GET_MODE (value) == BLKmode)
 992             total_bits = BITS_PER_WORD;
 993           else
 994             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
 995
 996           /* Fetch successively less significant portions.  */
 997           if (GET_CODE (value) == CONST_INT)
 998             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 999                              >> (bitsize - bitsdone - thissize))
1000                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1001           else
1002             /* The args are chosen so that the last part includes the
1003                lsb.  Give extract_bit_field the value it needs (with
1004                endianness compensation) to fetch the piece we want.  */
1005             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1006                                             total_bits - bitsize + bitsdone,
1007                                             NULL_RTX, 1);
1008         }
1009       else
1010         {
1011           /* Fetch successively more significant portions.  */
1012           if (GET_CODE (value) == CONST_INT)
1013             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1014                              >> bitsdone)
1015                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1016           else
1017             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1018                                             bitsdone, NULL_RTX, 1);
1019         }
1020
1021       /* If OP0 is a register, then handle OFFSET here.
1022
1023          When handling multiword bitfields, extract_bit_field may pass
1024          down a word_mode SUBREG of a larger REG for a bitfield that actually
1025          crosses a word boundary.  Thus, for a SUBREG, we must find
1026          the current word starting from the base register.  */
1027       if (GET_CODE (op0) == SUBREG)
1028         {
1029           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1030           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1031                                         GET_MODE (SUBREG_REG (op0)));
1032           offset = 0;
1033         }
1034       else if (REG_P (op0))
1035         {
1036           word = operand_subword_force (op0, offset, GET_MODE (op0));
1037           offset = 0;
1038         }
1039       else
1040         word = op0;
1041
1042       /* OFFSET is in UNITs, and UNIT is in bits.
1043          store_fixed_bit_field wants offset in bytes.  */
1044       store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1045                              thispos, part);
1046       bitsdone += thissize;
1047     }
1048 }
1049 \f
1050 /* Generate code to extract a byte-field from STR_RTX
1051    containing BITSIZE bits, starting at BITNUM,
1052    and put it in TARGET if possible (if TARGET is nonzero).
1053    Regardless of TARGET, we return the rtx for where the value is placed.
1054
1055    STR_RTX is the structure containing the byte (a REG or MEM).
1056    UNSIGNEDP is nonzero if this is an unsigned bit field.
1057    MODE is the natural mode of the field value once extracted.
1058    TMODE is the mode the caller would like the value to have;
1059    but the value may be returned with type MODE instead.
1060
1061    TOTAL_SIZE is the size in bytes of the containing structure,
1062    or -1 if varying.
1063
1064    If a TARGET is specified and we can store in it at no extra cost,
1065    we do so, and return TARGET.
1066    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1067    if they are equally easy.  */
1068
1069 rtx
1070 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1071                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1072                    enum machine_mode mode, enum machine_mode tmode)
1073 {
1074   unsigned int unit
1075     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1076   unsigned HOST_WIDE_INT offset = bitnum / unit;
1077   unsigned HOST_WIDE_INT bitpos = bitnum % unit;
1078   rtx op0 = str_rtx;
1079   rtx spec_target = target;
1080   rtx spec_target_subreg = 0;
1081   enum machine_mode int_mode;
1082   enum machine_mode extv_mode = mode_for_extraction (EP_extv, 0);
1083   enum machine_mode extzv_mode = mode_for_extraction (EP_extzv, 0);
1084   enum machine_mode mode1;
1085   int byte_offset;
1086
1087   if (tmode == VOIDmode)
1088     tmode = mode;
1089
1090   while (GET_CODE (op0) == SUBREG)
1091     {
1092       bitpos += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1093       if (bitpos > unit)
1094         {
1095           offset += (bitpos / unit);
1096           bitpos %= unit;
1097         }
1098       op0 = SUBREG_REG (op0);
1099     }
1100
1101   if (REG_P (op0)
1102       && mode == GET_MODE (op0)
1103       && bitnum == 0
1104       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1105     {
1106       /* We're trying to extract a full register from itself.  */
1107       return op0;
1108     }
1109
1110   /* Use vec_extract patterns for extracting parts of vectors whenever
1111      available.  */
1112   if (VECTOR_MODE_P (GET_MODE (op0))
1113       && !MEM_P (op0)
1114       && (vec_extract_optab->handlers[GET_MODE (op0)].insn_code
1115           != CODE_FOR_nothing)
1116       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1117           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1118     {
1119       enum machine_mode outermode = GET_MODE (op0);
1120       enum machine_mode innermode = GET_MODE_INNER (outermode);
1121       int icode = (int) vec_extract_optab->handlers[outermode].insn_code;
1122       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1123       rtx rtxpos = GEN_INT (pos);
1124       rtx src = op0;
1125       rtx dest = NULL, pat, seq;
1126       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
1127       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
1128       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
1129
1130       if (innermode == tmode || innermode == mode)
1131         dest = target;
1132
1133       if (!dest)
1134         dest = gen_reg_rtx (innermode);
1135
1136       start_sequence ();
1137
1138       if (! (*insn_data[icode].operand[0].predicate) (dest, mode0))
1139         dest = copy_to_mode_reg (mode0, dest);
1140
1141       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
1142         src = copy_to_mode_reg (mode1, src);
1143
1144       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1145         rtxpos = copy_to_mode_reg (mode1, rtxpos);
1146
1147       /* We could handle this, but we should always be called with a pseudo
1148          for our targets and all insns should take them as outputs.  */
1149       if (! (*insn_data[icode].operand[0].predicate) (dest, mode0)
1150           || ! (*insn_data[icode].operand[1].predicate) (src, mode1)
1151           || ! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1152         abort ();
1153
1154       pat = GEN_FCN (icode) (dest, src, rtxpos);
1155       seq = get_insns ();
1156       end_sequence ();
1157       if (pat)
1158         {
1159           emit_insn (seq);
1160           emit_insn (pat);
1161           return dest;
1162         }
1163     }
1164
1165   /* Make sure we are playing with integral modes.  Pun with subregs
1166      if we aren't.  */
1167   {
1168     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1169     if (imode != GET_MODE (op0))
1170       {
1171         if (MEM_P (op0))
1172           op0 = adjust_address (op0, imode, 0);
1173         else if (imode != BLKmode)
1174           op0 = gen_lowpart (imode, op0);
1175         else
1176           abort ();
1177       }
1178   }
1179
1180   /* We may be accessing data outside the field, which means
1181      we can alias adjacent data.  */
1182   if (MEM_P (op0))
1183     {
1184       op0 = shallow_copy_rtx (op0);
1185       set_mem_alias_set (op0, 0);
1186       set_mem_expr (op0, 0);
1187     }
1188
1189   /* Extraction of a full-word or multi-word value from a structure
1190      in a register or aligned memory can be done with just a SUBREG.
1191      A subword value in the least significant part of a register
1192      can also be extracted with a SUBREG.  For this, we need the
1193      byte offset of the value in op0.  */
1194
1195   byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1196
1197   /* If OP0 is a register, BITPOS must count within a word.
1198      But as we have it, it counts within whatever size OP0 now has.
1199      On a bigendian machine, these are not the same, so convert.  */
1200   if (BYTES_BIG_ENDIAN
1201       && !MEM_P (op0)
1202       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1203     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1204
1205   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1206      If that's wrong, the solution is to test for it and set TARGET to 0
1207      if needed.  */
1208
1209   /* Only scalar integer modes can be converted via subregs.  There is an
1210      additional problem for FP modes here in that they can have a precision
1211      which is different from the size.  mode_for_size uses precision, but
1212      we want a mode based on the size, so we must avoid calling it for FP
1213      modes.  */
1214   mode1  = (SCALAR_INT_MODE_P (tmode)
1215             ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1216             : mode);
1217
1218   if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1219         && bitpos % BITS_PER_WORD == 0)
1220        || (mode1 != BLKmode
1221            /* ??? The big endian test here is wrong.  This is correct
1222               if the value is in a register, and if mode_for_size is not
1223               the same mode as op0.  This causes us to get unnecessarily
1224               inefficient code from the Thumb port when -mbig-endian.  */
1225            && (BYTES_BIG_ENDIAN
1226                ? bitpos + bitsize == BITS_PER_WORD
1227                : bitpos == 0)))
1228       && ((!MEM_P (op0)
1229            && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
1230                                      GET_MODE_BITSIZE (GET_MODE (op0)))
1231            && GET_MODE_SIZE (mode1) != 0
1232            && byte_offset % GET_MODE_SIZE (mode1) == 0)
1233           || (MEM_P (op0)
1234               && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1235                   || (offset * BITS_PER_UNIT % bitsize == 0
1236                       && MEM_ALIGN (op0) % bitsize == 0)))))
1237     {
1238       if (mode1 != GET_MODE (op0))
1239         {
1240           if (GET_CODE (op0) == SUBREG)
1241             {
1242               if (GET_MODE (SUBREG_REG (op0)) == mode1
1243                   || GET_MODE_CLASS (mode1) == MODE_INT
1244                   || GET_MODE_CLASS (mode1) == MODE_PARTIAL_INT)
1245                 op0 = SUBREG_REG (op0);
1246               else
1247                 /* Else we've got some float mode source being extracted into
1248                    a different float mode destination -- this combination of
1249                    subregs results in Severe Tire Damage.  */
1250                 goto no_subreg_mode_swap;
1251             }
1252           if (REG_P (op0))
1253             op0 = gen_rtx_SUBREG (mode1, op0, byte_offset);
1254           else
1255             op0 = adjust_address (op0, mode1, offset);
1256         }
1257       if (mode1 != mode)
1258         return convert_to_mode (tmode, op0, unsignedp);
1259       return op0;
1260     }
1261  no_subreg_mode_swap:
1262
1263   /* Handle fields bigger than a word.  */
1264
1265   if (bitsize > BITS_PER_WORD)
1266     {
1267       /* Here we transfer the words of the field
1268          in the order least significant first.
1269          This is because the most significant word is the one which may
1270          be less than full.  */
1271
1272       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1273       unsigned int i;
1274
1275       if (target == 0 || !REG_P (target))
1276         target = gen_reg_rtx (mode);
1277
1278       /* Indicate for flow that the entire target reg is being set.  */
1279       emit_insn (gen_rtx_CLOBBER (VOIDmode, target));
1280
1281       for (i = 0; i < nwords; i++)
1282         {
1283           /* If I is 0, use the low-order word in both field and target;
1284              if I is 1, use the next to lowest word; and so on.  */
1285           /* Word number in TARGET to use.  */
1286           unsigned int wordnum
1287             = (WORDS_BIG_ENDIAN
1288                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1289                : i);
1290           /* Offset from start of field in OP0.  */
1291           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1292                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1293                                                 * (int) BITS_PER_WORD))
1294                                      : (int) i * BITS_PER_WORD);
1295           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1296           rtx result_part
1297             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1298                                            bitsize - i * BITS_PER_WORD),
1299                                  bitnum + bit_offset, 1, target_part, mode,
1300                                  word_mode);
1301
1302           if (target_part == 0)
1303             abort ();
1304
1305           if (result_part != target_part)
1306             emit_move_insn (target_part, result_part);
1307         }
1308
1309       if (unsignedp)
1310         {
1311           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1312              need to be zero'd out.  */
1313           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1314             {
1315               unsigned int i, total_words;
1316
1317               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1318               for (i = nwords; i < total_words; i++)
1319                 emit_move_insn
1320                   (operand_subword (target,
1321                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1322                                     1, VOIDmode),
1323                    const0_rtx);
1324             }
1325           return target;
1326         }
1327
1328       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1329       target = expand_shift (LSHIFT_EXPR, mode, target,
1330                              build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1331                              NULL_RTX, 0);
1332       return expand_shift (RSHIFT_EXPR, mode, target,
1333                            build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1334                            NULL_RTX, 0);
1335     }
1336
1337   /* From here on we know the desired field is smaller than a word.  */
1338
1339   /* Check if there is a correspondingly-sized integer field, so we can
1340      safely extract it as one size of integer, if necessary; then
1341      truncate or extend to the size that is wanted; then use SUBREGs or
1342      convert_to_mode to get one of the modes we really wanted.  */
1343
1344   int_mode = int_mode_for_mode (tmode);
1345   if (int_mode == BLKmode)
1346     int_mode = int_mode_for_mode (mode);
1347   if (int_mode == BLKmode)
1348     abort ();    /* Should probably push op0 out to memory and then
1349                     do a load.  */
1350
1351   /* OFFSET is the number of words or bytes (UNIT says which)
1352      from STR_RTX to the first word or byte containing part of the field.  */
1353
1354   if (!MEM_P (op0))
1355     {
1356       if (offset != 0
1357           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1358         {
1359           if (!REG_P (op0))
1360             op0 = copy_to_reg (op0);
1361           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1362                                 op0, (offset * UNITS_PER_WORD));
1363         }
1364       offset = 0;
1365     }
1366
1367   /* Now OFFSET is nonzero only for memory operands.  */
1368
1369   if (unsignedp)
1370     {
1371       if (HAVE_extzv
1372           && (GET_MODE_BITSIZE (extzv_mode) >= bitsize)
1373           && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1374                 && (bitsize + bitpos > GET_MODE_BITSIZE (extzv_mode))))
1375         {
1376           unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1377           rtx bitsize_rtx, bitpos_rtx;
1378           rtx last = get_last_insn ();
1379           rtx xop0 = op0;
1380           rtx xtarget = target;
1381           rtx xspec_target = spec_target;
1382           rtx xspec_target_subreg = spec_target_subreg;
1383           rtx pat;
1384           enum machine_mode maxmode = mode_for_extraction (EP_extzv, 0);
1385
1386           if (MEM_P (xop0))
1387             {
1388               int save_volatile_ok = volatile_ok;
1389               volatile_ok = 1;
1390
1391               /* Is the memory operand acceptable?  */
1392               if (! ((*insn_data[(int) CODE_FOR_extzv].operand[1].predicate)
1393                      (xop0, GET_MODE (xop0))))
1394                 {
1395                   /* No, load into a reg and extract from there.  */
1396                   enum machine_mode bestmode;
1397
1398                   /* Get the mode to use for inserting into this field.  If
1399                      OP0 is BLKmode, get the smallest mode consistent with the
1400                      alignment. If OP0 is a non-BLKmode object that is no
1401                      wider than MAXMODE, use its mode. Otherwise, use the
1402                      smallest mode containing the field.  */
1403
1404                   if (GET_MODE (xop0) == BLKmode
1405                       || (GET_MODE_SIZE (GET_MODE (op0))
1406                           > GET_MODE_SIZE (maxmode)))
1407                     bestmode = get_best_mode (bitsize, bitnum,
1408                                               MEM_ALIGN (xop0), maxmode,
1409                                               MEM_VOLATILE_P (xop0));
1410                   else
1411                     bestmode = GET_MODE (xop0);
1412
1413                   if (bestmode == VOIDmode
1414                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1415                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1416                     goto extzv_loses;
1417
1418                   /* Compute offset as multiple of this unit,
1419                      counting in bytes.  */
1420                   unit = GET_MODE_BITSIZE (bestmode);
1421                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1422                   xbitpos = bitnum % unit;
1423                   xop0 = adjust_address (xop0, bestmode, xoffset);
1424
1425                   /* Fetch it to a register in that size.  */
1426                   xop0 = force_reg (bestmode, xop0);
1427
1428                   /* XBITPOS counts within UNIT, which is what is expected.  */
1429                 }
1430               else
1431                 /* Get ref to first byte containing part of the field.  */
1432                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1433
1434               volatile_ok = save_volatile_ok;
1435             }
1436
1437           /* If op0 is a register, we need it in MAXMODE (which is usually
1438              SImode). to make it acceptable to the format of extzv.  */
1439           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1440             goto extzv_loses;
1441           if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1442             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1443
1444           /* On big-endian machines, we count bits from the most significant.
1445              If the bit field insn does not, we must invert.  */
1446           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1447             xbitpos = unit - bitsize - xbitpos;
1448
1449           /* Now convert from counting within UNIT to counting in MAXMODE.  */
1450           if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1451             xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
1452
1453           unit = GET_MODE_BITSIZE (maxmode);
1454
1455           if (xtarget == 0
1456               || (flag_force_mem && MEM_P (xtarget)))
1457             xtarget = xspec_target = gen_reg_rtx (tmode);
1458
1459           if (GET_MODE (xtarget) != maxmode)
1460             {
1461               if (REG_P (xtarget))
1462                 {
1463                   int wider = (GET_MODE_SIZE (maxmode)
1464                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1465                   xtarget = gen_lowpart (maxmode, xtarget);
1466                   if (wider)
1467                     xspec_target_subreg = xtarget;
1468                 }
1469               else
1470                 xtarget = gen_reg_rtx (maxmode);
1471             }
1472
1473           /* If this machine's extzv insists on a register target,
1474              make sure we have one.  */
1475           if (! ((*insn_data[(int) CODE_FOR_extzv].operand[0].predicate)
1476                  (xtarget, maxmode)))
1477             xtarget = gen_reg_rtx (maxmode);
1478
1479           bitsize_rtx = GEN_INT (bitsize);
1480           bitpos_rtx = GEN_INT (xbitpos);
1481
1482           pat = gen_extzv (xtarget, xop0, bitsize_rtx, bitpos_rtx);
1483           if (pat)
1484             {
1485               emit_insn (pat);
1486               target = xtarget;
1487               spec_target = xspec_target;
1488               spec_target_subreg = xspec_target_subreg;
1489             }
1490           else
1491             {
1492               delete_insns_since (last);
1493               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1494                                                 bitpos, target, 1);
1495             }
1496         }
1497       else
1498       extzv_loses:
1499         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1500                                           bitpos, target, 1);
1501     }
1502   else
1503     {
1504       if (HAVE_extv
1505           && (GET_MODE_BITSIZE (extv_mode) >= bitsize)
1506           && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1507                 && (bitsize + bitpos > GET_MODE_BITSIZE (extv_mode))))
1508         {
1509           int xbitpos = bitpos, xoffset = offset;
1510           rtx bitsize_rtx, bitpos_rtx;
1511           rtx last = get_last_insn ();
1512           rtx xop0 = op0, xtarget = target;
1513           rtx xspec_target = spec_target;
1514           rtx xspec_target_subreg = spec_target_subreg;
1515           rtx pat;
1516           enum machine_mode maxmode = mode_for_extraction (EP_extv, 0);
1517
1518           if (MEM_P (xop0))
1519             {
1520               /* Is the memory operand acceptable?  */
1521               if (! ((*insn_data[(int) CODE_FOR_extv].operand[1].predicate)
1522                      (xop0, GET_MODE (xop0))))
1523                 {
1524                   /* No, load into a reg and extract from there.  */
1525                   enum machine_mode bestmode;
1526
1527                   /* Get the mode to use for inserting into this field.  If
1528                      OP0 is BLKmode, get the smallest mode consistent with the
1529                      alignment. If OP0 is a non-BLKmode object that is no
1530                      wider than MAXMODE, use its mode. Otherwise, use the
1531                      smallest mode containing the field.  */
1532
1533                   if (GET_MODE (xop0) == BLKmode
1534                       || (GET_MODE_SIZE (GET_MODE (op0))
1535                           > GET_MODE_SIZE (maxmode)))
1536                     bestmode = get_best_mode (bitsize, bitnum,
1537                                               MEM_ALIGN (xop0), maxmode,
1538                                               MEM_VOLATILE_P (xop0));
1539                   else
1540                     bestmode = GET_MODE (xop0);
1541
1542                   if (bestmode == VOIDmode
1543                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1544                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1545                     goto extv_loses;
1546
1547                   /* Compute offset as multiple of this unit,
1548                      counting in bytes.  */
1549                   unit = GET_MODE_BITSIZE (bestmode);
1550                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1551                   xbitpos = bitnum % unit;
1552                   xop0 = adjust_address (xop0, bestmode, xoffset);
1553
1554                   /* Fetch it to a register in that size.  */
1555                   xop0 = force_reg (bestmode, xop0);
1556
1557                   /* XBITPOS counts within UNIT, which is what is expected.  */
1558                 }
1559               else
1560                 /* Get ref to first byte containing part of the field.  */
1561                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1562             }
1563
1564           /* If op0 is a register, we need it in MAXMODE (which is usually
1565              SImode) to make it acceptable to the format of extv.  */
1566           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1567             goto extv_loses;
1568           if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1569             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1570
1571           /* On big-endian machines, we count bits from the most significant.
1572              If the bit field insn does not, we must invert.  */
1573           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1574             xbitpos = unit - bitsize - xbitpos;
1575
1576           /* XBITPOS counts within a size of UNIT.
1577              Adjust to count within a size of MAXMODE.  */
1578           if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1579             xbitpos += (GET_MODE_BITSIZE (maxmode) - unit);
1580
1581           unit = GET_MODE_BITSIZE (maxmode);
1582
1583           if (xtarget == 0
1584               || (flag_force_mem && MEM_P (xtarget)))
1585             xtarget = xspec_target = gen_reg_rtx (tmode);
1586
1587           if (GET_MODE (xtarget) != maxmode)
1588             {
1589               if (REG_P (xtarget))
1590                 {
1591                   int wider = (GET_MODE_SIZE (maxmode)
1592                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1593                   xtarget = gen_lowpart (maxmode, xtarget);
1594                   if (wider)
1595                     xspec_target_subreg = xtarget;
1596                 }
1597               else
1598                 xtarget = gen_reg_rtx (maxmode);
1599             }
1600
1601           /* If this machine's extv insists on a register target,
1602              make sure we have one.  */
1603           if (! ((*insn_data[(int) CODE_FOR_extv].operand[0].predicate)
1604                  (xtarget, maxmode)))
1605             xtarget = gen_reg_rtx (maxmode);
1606
1607           bitsize_rtx = GEN_INT (bitsize);
1608           bitpos_rtx = GEN_INT (xbitpos);
1609
1610           pat = gen_extv (xtarget, xop0, bitsize_rtx, bitpos_rtx);
1611           if (pat)
1612             {
1613               emit_insn (pat);
1614               target = xtarget;
1615               spec_target = xspec_target;
1616               spec_target_subreg = xspec_target_subreg;
1617             }
1618           else
1619             {
1620               delete_insns_since (last);
1621               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1622                                                 bitpos, target, 0);
1623             }
1624         }
1625       else
1626       extv_loses:
1627         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1628                                           bitpos, target, 0);
1629     }
1630   if (target == spec_target)
1631     return target;
1632   if (target == spec_target_subreg)
1633     return spec_target;
1634   if (GET_MODE (target) != tmode && GET_MODE (target) != mode)
1635     {
1636       /* If the target mode is floating-point, first convert to the
1637          integer mode of that size and then access it as a floating-point
1638          value via a SUBREG.  */
1639       if (GET_MODE_CLASS (tmode) != MODE_INT
1640           && GET_MODE_CLASS (tmode) != MODE_PARTIAL_INT)
1641         {
1642           target = convert_to_mode (mode_for_size (GET_MODE_BITSIZE (tmode),
1643                                                    MODE_INT, 0),
1644                                     target, unsignedp);
1645           return gen_lowpart (tmode, target);
1646         }
1647       else
1648         return convert_to_mode (tmode, target, unsignedp);
1649     }
1650   return target;
1651 }
1652 \f
1653 /* Extract a bit field using shifts and boolean operations
1654    Returns an rtx to represent the value.
1655    OP0 addresses a register (word) or memory (byte).
1656    BITPOS says which bit within the word or byte the bit field starts in.
1657    OFFSET says how many bytes farther the bit field starts;
1658     it is 0 if OP0 is a register.
1659    BITSIZE says how many bits long the bit field is.
1660     (If OP0 is a register, it may be narrower than a full word,
1661      but BITPOS still counts within a full word,
1662      which is significant on bigendian machines.)
1663
1664    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1665    If TARGET is nonzero, attempts to store the value there
1666    and return TARGET, but this is not guaranteed.
1667    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1668
1669 static rtx
1670 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1671                          unsigned HOST_WIDE_INT offset,
1672                          unsigned HOST_WIDE_INT bitsize,
1673                          unsigned HOST_WIDE_INT bitpos, rtx target,
1674                          int unsignedp)
1675 {
1676   unsigned int total_bits = BITS_PER_WORD;
1677   enum machine_mode mode;
1678
1679   if (GET_CODE (op0) == SUBREG || REG_P (op0))
1680     {
1681       /* Special treatment for a bit field split across two registers.  */
1682       if (bitsize + bitpos > BITS_PER_WORD)
1683         return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1684     }
1685   else
1686     {
1687       /* Get the proper mode to use for this field.  We want a mode that
1688          includes the entire field.  If such a mode would be larger than
1689          a word, we won't be doing the extraction the normal way.  */
1690
1691       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1692                             MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1693
1694       if (mode == VOIDmode)
1695         /* The only way this should occur is if the field spans word
1696            boundaries.  */
1697         return extract_split_bit_field (op0, bitsize,
1698                                         bitpos + offset * BITS_PER_UNIT,
1699                                         unsignedp);
1700
1701       total_bits = GET_MODE_BITSIZE (mode);
1702
1703       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1704          be in the range 0 to total_bits-1, and put any excess bytes in
1705          OFFSET.  */
1706       if (bitpos >= total_bits)
1707         {
1708           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1709           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1710                      * BITS_PER_UNIT);
1711         }
1712
1713       /* Get ref to an aligned byte, halfword, or word containing the field.
1714          Adjust BITPOS to be position within a word,
1715          and OFFSET to be the offset of that word.
1716          Then alter OP0 to refer to that word.  */
1717       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1718       offset -= (offset % (total_bits / BITS_PER_UNIT));
1719       op0 = adjust_address (op0, mode, offset);
1720     }
1721
1722   mode = GET_MODE (op0);
1723
1724   if (BYTES_BIG_ENDIAN)
1725     /* BITPOS is the distance between our msb and that of OP0.
1726        Convert it to the distance from the lsb.  */
1727     bitpos = total_bits - bitsize - bitpos;
1728
1729   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1730      We have reduced the big-endian case to the little-endian case.  */
1731
1732   if (unsignedp)
1733     {
1734       if (bitpos)
1735         {
1736           /* If the field does not already start at the lsb,
1737              shift it so it does.  */
1738           tree amount = build_int_2 (bitpos, 0);
1739           /* Maybe propagate the target for the shift.  */
1740           /* But not if we will return it--could confuse integrate.c.  */
1741           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1742           if (tmode != mode) subtarget = 0;
1743           op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1744         }
1745       /* Convert the value to the desired mode.  */
1746       if (mode != tmode)
1747         op0 = convert_to_mode (tmode, op0, 1);
1748
1749       /* Unless the msb of the field used to be the msb when we shifted,
1750          mask out the upper bits.  */
1751
1752       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1753         return expand_binop (GET_MODE (op0), and_optab, op0,
1754                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1755                              target, 1, OPTAB_LIB_WIDEN);
1756       return op0;
1757     }
1758
1759   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1760      then arithmetic-shift its lsb to the lsb of the word.  */
1761   op0 = force_reg (mode, op0);
1762   if (mode != tmode)
1763     target = 0;
1764
1765   /* Find the narrowest integer mode that contains the field.  */
1766
1767   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1768        mode = GET_MODE_WIDER_MODE (mode))
1769     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1770       {
1771         op0 = convert_to_mode (mode, op0, 0);
1772         break;
1773       }
1774
1775   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1776     {
1777       tree amount
1778         = build_int_2 (GET_MODE_BITSIZE (mode) - (bitsize + bitpos), 0);
1779       /* Maybe propagate the target for the shift.  */
1780       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1781       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1782     }
1783
1784   return expand_shift (RSHIFT_EXPR, mode, op0,
1785                        build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1786                        target, 0);
1787 }
1788 \f
1789 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1790    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1791    complement of that if COMPLEMENT.  The mask is truncated if
1792    necessary to the width of mode MODE.  The mask is zero-extended if
1793    BITSIZE+BITPOS is too small for MODE.  */
1794
1795 static rtx
1796 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1797 {
1798   HOST_WIDE_INT masklow, maskhigh;
1799
1800   if (bitsize == 0)
1801     masklow = 0;
1802   else if (bitpos < HOST_BITS_PER_WIDE_INT)
1803     masklow = (HOST_WIDE_INT) -1 << bitpos;
1804   else
1805     masklow = 0;
1806
1807   if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1808     masklow &= ((unsigned HOST_WIDE_INT) -1
1809                 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1810
1811   if (bitpos <= HOST_BITS_PER_WIDE_INT)
1812     maskhigh = -1;
1813   else
1814     maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1815
1816   if (bitsize == 0)
1817     maskhigh = 0;
1818   else if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1819     maskhigh &= ((unsigned HOST_WIDE_INT) -1
1820                  >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1821   else
1822     maskhigh = 0;
1823
1824   if (complement)
1825     {
1826       maskhigh = ~maskhigh;
1827       masklow = ~masklow;
1828     }
1829
1830   return immed_double_const (masklow, maskhigh, mode);
1831 }
1832
1833 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1834    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1835
1836 static rtx
1837 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1838 {
1839   unsigned HOST_WIDE_INT v = INTVAL (value);
1840   HOST_WIDE_INT low, high;
1841
1842   if (bitsize < HOST_BITS_PER_WIDE_INT)
1843     v &= ~((HOST_WIDE_INT) -1 << bitsize);
1844
1845   if (bitpos < HOST_BITS_PER_WIDE_INT)
1846     {
1847       low = v << bitpos;
1848       high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1849     }
1850   else
1851     {
1852       low = 0;
1853       high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1854     }
1855
1856   return immed_double_const (low, high, mode);
1857 }
1858 \f
1859 /* Extract a bit field that is split across two words
1860    and return an RTX for the result.
1861
1862    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1863    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1864    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1865
1866 static rtx
1867 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1868                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1869 {
1870   unsigned int unit;
1871   unsigned int bitsdone = 0;
1872   rtx result = NULL_RTX;
1873   int first = 1;
1874
1875   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1876      much at a time.  */
1877   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1878     unit = BITS_PER_WORD;
1879   else
1880     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1881
1882   while (bitsdone < bitsize)
1883     {
1884       unsigned HOST_WIDE_INT thissize;
1885       rtx part, word;
1886       unsigned HOST_WIDE_INT thispos;
1887       unsigned HOST_WIDE_INT offset;
1888
1889       offset = (bitpos + bitsdone) / unit;
1890       thispos = (bitpos + bitsdone) % unit;
1891
1892       /* THISSIZE must not overrun a word boundary.  Otherwise,
1893          extract_fixed_bit_field will call us again, and we will mutually
1894          recurse forever.  */
1895       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1896       thissize = MIN (thissize, unit - thispos);
1897
1898       /* If OP0 is a register, then handle OFFSET here.
1899
1900          When handling multiword bitfields, extract_bit_field may pass
1901          down a word_mode SUBREG of a larger REG for a bitfield that actually
1902          crosses a word boundary.  Thus, for a SUBREG, we must find
1903          the current word starting from the base register.  */
1904       if (GET_CODE (op0) == SUBREG)
1905         {
1906           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1907           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1908                                         GET_MODE (SUBREG_REG (op0)));
1909           offset = 0;
1910         }
1911       else if (REG_P (op0))
1912         {
1913           word = operand_subword_force (op0, offset, GET_MODE (op0));
1914           offset = 0;
1915         }
1916       else
1917         word = op0;
1918
1919       /* Extract the parts in bit-counting order,
1920          whose meaning is determined by BYTES_PER_UNIT.
1921          OFFSET is in UNITs, and UNIT is in bits.
1922          extract_fixed_bit_field wants offset in bytes.  */
1923       part = extract_fixed_bit_field (word_mode, word,
1924                                       offset * unit / BITS_PER_UNIT,
1925                                       thissize, thispos, 0, 1);
1926       bitsdone += thissize;
1927
1928       /* Shift this part into place for the result.  */
1929       if (BYTES_BIG_ENDIAN)
1930         {
1931           if (bitsize != bitsdone)
1932             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1933                                  build_int_2 (bitsize - bitsdone, 0), 0, 1);
1934         }
1935       else
1936         {
1937           if (bitsdone != thissize)
1938             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1939                                  build_int_2 (bitsdone - thissize, 0), 0, 1);
1940         }
1941
1942       if (first)
1943         result = part;
1944       else
1945         /* Combine the parts with bitwise or.  This works
1946            because we extracted each part as an unsigned bit field.  */
1947         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1948                                OPTAB_LIB_WIDEN);
1949
1950       first = 0;
1951     }
1952
1953   /* Unsigned bit field: we are done.  */
1954   if (unsignedp)
1955     return result;
1956   /* Signed bit field: sign-extend with two arithmetic shifts.  */
1957   result = expand_shift (LSHIFT_EXPR, word_mode, result,
1958                          build_int_2 (BITS_PER_WORD - bitsize, 0),
1959                          NULL_RTX, 0);
1960   return expand_shift (RSHIFT_EXPR, word_mode, result,
1961                        build_int_2 (BITS_PER_WORD - bitsize, 0), NULL_RTX, 0);
1962 }
1963 \f
1964 /* Add INC into TARGET.  */
1965
1966 void
1967 expand_inc (rtx target, rtx inc)
1968 {
1969   rtx value = expand_binop (GET_MODE (target), add_optab,
1970                             target, inc,
1971                             target, 0, OPTAB_LIB_WIDEN);
1972   if (value != target)
1973     emit_move_insn (target, value);
1974 }
1975
1976 /* Subtract DEC from TARGET.  */
1977
1978 void
1979 expand_dec (rtx target, rtx dec)
1980 {
1981   rtx value = expand_binop (GET_MODE (target), sub_optab,
1982                             target, dec,
1983                             target, 0, OPTAB_LIB_WIDEN);
1984   if (value != target)
1985     emit_move_insn (target, value);
1986 }
1987 \f
1988 /* Output a shift instruction for expression code CODE,
1989    with SHIFTED being the rtx for the value to shift,
1990    and AMOUNT the tree for the amount to shift by.
1991    Store the result in the rtx TARGET, if that is convenient.
1992    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
1993    Return the rtx for where the value is.  */
1994
1995 rtx
1996 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
1997               tree amount, rtx target, int unsignedp)
1998 {
1999   rtx op1, temp = 0;
2000   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2001   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2002   int try;
2003
2004   /* Previously detected shift-counts computed by NEGATE_EXPR
2005      and shifted in the other direction; but that does not work
2006      on all machines.  */
2007
2008   op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0);
2009
2010   if (SHIFT_COUNT_TRUNCATED)
2011     {
2012       if (GET_CODE (op1) == CONST_INT
2013           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2014               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2015         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2016                        % GET_MODE_BITSIZE (mode));
2017       else if (GET_CODE (op1) == SUBREG
2018                && subreg_lowpart_p (op1))
2019         op1 = SUBREG_REG (op1);
2020     }
2021
2022   if (op1 == const0_rtx)
2023     return shifted;
2024
2025   /* Check whether its cheaper to implement a left shift by a constant
2026      bit count by a sequence of additions.  */
2027   if (code == LSHIFT_EXPR
2028       && GET_CODE (op1) == CONST_INT
2029       && INTVAL (op1) > 0
2030       && INTVAL (op1) < GET_MODE_BITSIZE (mode)
2031       && shift_cost[mode][INTVAL (op1)] > INTVAL (op1) * add_cost[mode])
2032     {
2033       int i;
2034       for (i = 0; i < INTVAL (op1); i++)
2035         {
2036           temp = force_reg (mode, shifted);
2037           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2038                                   unsignedp, OPTAB_LIB_WIDEN);
2039         }
2040       return shifted;
2041     }
2042
2043   for (try = 0; temp == 0 && try < 3; try++)
2044     {
2045       enum optab_methods methods;
2046
2047       if (try == 0)
2048         methods = OPTAB_DIRECT;
2049       else if (try == 1)
2050         methods = OPTAB_WIDEN;
2051       else
2052         methods = OPTAB_LIB_WIDEN;
2053
2054       if (rotate)
2055         {
2056           /* Widening does not work for rotation.  */
2057           if (methods == OPTAB_WIDEN)
2058             continue;
2059           else if (methods == OPTAB_LIB_WIDEN)
2060             {
2061               /* If we have been unable to open-code this by a rotation,
2062                  do it as the IOR of two shifts.  I.e., to rotate A
2063                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2064                  where C is the bitsize of A.
2065
2066                  It is theoretically possible that the target machine might
2067                  not be able to perform either shift and hence we would
2068                  be making two libcalls rather than just the one for the
2069                  shift (similarly if IOR could not be done).  We will allow
2070                  this extremely unlikely lossage to avoid complicating the
2071                  code below.  */
2072
2073               rtx subtarget = target == shifted ? 0 : target;
2074               rtx temp1;
2075               tree type = TREE_TYPE (amount);
2076               tree new_amount = make_tree (type, op1);
2077               tree other_amount
2078                 = fold (build2 (MINUS_EXPR, type,
2079                                 convert (type,
2080                                          build_int_2 (GET_MODE_BITSIZE (mode),
2081                                                       0)),
2082                                 amount));
2083
2084               shifted = force_reg (mode, shifted);
2085
2086               temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2087                                    mode, shifted, new_amount, subtarget, 1);
2088               temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2089                                     mode, shifted, other_amount, 0, 1);
2090               return expand_binop (mode, ior_optab, temp, temp1, target,
2091                                    unsignedp, methods);
2092             }
2093
2094           temp = expand_binop (mode,
2095                                left ? rotl_optab : rotr_optab,
2096                                shifted, op1, target, unsignedp, methods);
2097
2098           /* If we don't have the rotate, but we are rotating by a constant
2099              that is in range, try a rotate in the opposite direction.  */
2100
2101           if (temp == 0 && GET_CODE (op1) == CONST_INT
2102               && INTVAL (op1) > 0
2103               && (unsigned int) INTVAL (op1) < GET_MODE_BITSIZE (mode))
2104             temp = expand_binop (mode,
2105                                  left ? rotr_optab : rotl_optab,
2106                                  shifted,
2107                                  GEN_INT (GET_MODE_BITSIZE (mode)
2108                                           - INTVAL (op1)),
2109                                  target, unsignedp, methods);
2110         }
2111       else if (unsignedp)
2112         temp = expand_binop (mode,
2113                              left ? ashl_optab : lshr_optab,
2114                              shifted, op1, target, unsignedp, methods);
2115
2116       /* Do arithmetic shifts.
2117          Also, if we are going to widen the operand, we can just as well
2118          use an arithmetic right-shift instead of a logical one.  */
2119       if (temp == 0 && ! rotate
2120           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2121         {
2122           enum optab_methods methods1 = methods;
2123
2124           /* If trying to widen a log shift to an arithmetic shift,
2125              don't accept an arithmetic shift of the same size.  */
2126           if (unsignedp)
2127             methods1 = OPTAB_MUST_WIDEN;
2128
2129           /* Arithmetic shift */
2130
2131           temp = expand_binop (mode,
2132                                left ? ashl_optab : ashr_optab,
2133                                shifted, op1, target, unsignedp, methods1);
2134         }
2135
2136       /* We used to try extzv here for logical right shifts, but that was
2137          only useful for one machine, the VAX, and caused poor code
2138          generation there for lshrdi3, so the code was deleted and a
2139          define_expand for lshrsi3 was added to vax.md.  */
2140     }
2141
2142   if (temp == 0)
2143     abort ();
2144   return temp;
2145 }
2146 \f
2147 enum alg_code { alg_zero, alg_m, alg_shift,
2148                   alg_add_t_m2, alg_sub_t_m2,
2149                   alg_add_factor, alg_sub_factor,
2150                   alg_add_t2_m, alg_sub_t2_m,
2151                   alg_add, alg_subtract, alg_factor, alg_shiftop };
2152
2153 /* This structure records a sequence of operations.
2154    `ops' is the number of operations recorded.
2155    `cost' is their total cost.
2156    The operations are stored in `op' and the corresponding
2157    logarithms of the integer coefficients in `log'.
2158
2159    These are the operations:
2160    alg_zero             total := 0;
2161    alg_m                total := multiplicand;
2162    alg_shift            total := total * coeff
2163    alg_add_t_m2         total := total + multiplicand * coeff;
2164    alg_sub_t_m2         total := total - multiplicand * coeff;
2165    alg_add_factor       total := total * coeff + total;
2166    alg_sub_factor       total := total * coeff - total;
2167    alg_add_t2_m         total := total * coeff + multiplicand;
2168    alg_sub_t2_m         total := total * coeff - multiplicand;
2169
2170    The first operand must be either alg_zero or alg_m.  */
2171
2172 struct algorithm
2173 {
2174   short cost;
2175   short ops;
2176   /* The size of the OP and LOG fields are not directly related to the
2177      word size, but the worst-case algorithms will be if we have few
2178      consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2179      In that case we will generate shift-by-2, add, shift-by-2, add,...,
2180      in total wordsize operations.  */
2181   enum alg_code op[MAX_BITS_PER_WORD];
2182   char log[MAX_BITS_PER_WORD];
2183 };
2184
2185 /* Indicates the type of fixup needed after a constant multiplication.
2186    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2187    the result should be negated, and ADD_VARIANT means that the
2188    multiplicand should be added to the result.  */
2189 enum mult_variant {basic_variant, negate_variant, add_variant};
2190
2191 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2192                         int, enum machine_mode mode);
2193 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2194                                  struct algorithm *, enum mult_variant *, int);
2195 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2196                               const struct algorithm *, enum mult_variant);
2197 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2198                                                  int, unsigned HOST_WIDE_INT *,
2199                                                  int *, int *);
2200 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2201 static rtx extract_high_half (enum machine_mode, rtx);
2202 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2203                                        int, int);
2204 /* Compute and return the best algorithm for multiplying by T.
2205    The algorithm must cost less than cost_limit
2206    If retval.cost >= COST_LIMIT, no algorithm was found and all
2207    other field of the returned struct are undefined.
2208    MODE is the machine mode of the multiplication.  */
2209
2210 static void
2211 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2212             int cost_limit, enum machine_mode mode)
2213 {
2214   int m;
2215   struct algorithm *alg_in, *best_alg;
2216   int cost;
2217   unsigned HOST_WIDE_INT q;
2218   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2219
2220   /* Indicate that no algorithm is yet found.  If no algorithm
2221      is found, this value will be returned and indicate failure.  */
2222   alg_out->cost = cost_limit;
2223
2224   if (cost_limit <= 0)
2225     return;
2226
2227   /* Restrict the bits of "t" to the multiplication's mode.  */
2228   t &= GET_MODE_MASK (mode);
2229
2230   /* t == 1 can be done in zero cost.  */
2231   if (t == 1)
2232     {
2233       alg_out->ops = 1;
2234       alg_out->cost = 0;
2235       alg_out->op[0] = alg_m;
2236       return;
2237     }
2238
2239   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2240      fail now.  */
2241   if (t == 0)
2242     {
2243       if (zero_cost >= cost_limit)
2244         return;
2245       else
2246         {
2247           alg_out->ops = 1;
2248           alg_out->cost = zero_cost;
2249           alg_out->op[0] = alg_zero;
2250           return;
2251         }
2252     }
2253
2254   /* We'll be needing a couple extra algorithm structures now.  */
2255
2256   alg_in = alloca (sizeof (struct algorithm));
2257   best_alg = alloca (sizeof (struct algorithm));
2258
2259   /* If we have a group of zero bits at the low-order part of T, try
2260      multiplying by the remaining bits and then doing a shift.  */
2261
2262   if ((t & 1) == 0)
2263     {
2264       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2265       if (m < maxm)
2266         {
2267           q = t >> m;
2268           /* The function expand_shift will choose between a shift and
2269              a sequence of additions, so the observed cost is given as
2270              MIN (m * add_cost[mode], shift_cost[mode][m]).  */
2271           cost = m * add_cost[mode];
2272           if (shift_cost[mode][m] < cost)
2273             cost = shift_cost[mode][m];
2274           synth_mult (alg_in, q, cost_limit - cost, mode);
2275
2276           cost += alg_in->cost;
2277           if (cost < cost_limit)
2278             {
2279               struct algorithm *x;
2280               x = alg_in, alg_in = best_alg, best_alg = x;
2281               best_alg->log[best_alg->ops] = m;
2282               best_alg->op[best_alg->ops] = alg_shift;
2283               cost_limit = cost;
2284             }
2285         }
2286     }
2287
2288   /* If we have an odd number, add or subtract one.  */
2289   if ((t & 1) != 0)
2290     {
2291       unsigned HOST_WIDE_INT w;
2292
2293       for (w = 1; (w & t) != 0; w <<= 1)
2294         ;
2295       /* If T was -1, then W will be zero after the loop.  This is another
2296          case where T ends with ...111.  Handling this with (T + 1) and
2297          subtract 1 produces slightly better code and results in algorithm
2298          selection much faster than treating it like the ...0111 case
2299          below.  */
2300       if (w == 0
2301           || (w > 2
2302               /* Reject the case where t is 3.
2303                  Thus we prefer addition in that case.  */
2304               && t != 3))
2305         {
2306           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2307
2308           cost = add_cost[mode];
2309           synth_mult (alg_in, t + 1, cost_limit - cost, mode);
2310
2311           cost += alg_in->cost;
2312           if (cost < cost_limit)
2313             {
2314               struct algorithm *x;
2315               x = alg_in, alg_in = best_alg, best_alg = x;
2316               best_alg->log[best_alg->ops] = 0;
2317               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2318               cost_limit = cost;
2319             }
2320         }
2321       else
2322         {
2323           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2324
2325           cost = add_cost[mode];
2326           synth_mult (alg_in, t - 1, cost_limit - cost, mode);
2327
2328           cost += alg_in->cost;
2329           if (cost < cost_limit)
2330             {
2331               struct algorithm *x;
2332               x = alg_in, alg_in = best_alg, best_alg = x;
2333               best_alg->log[best_alg->ops] = 0;
2334               best_alg->op[best_alg->ops] = alg_add_t_m2;
2335               cost_limit = cost;
2336             }
2337         }
2338     }
2339
2340   /* Look for factors of t of the form
2341      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2342      If we find such a factor, we can multiply by t using an algorithm that
2343      multiplies by q, shift the result by m and add/subtract it to itself.
2344
2345      We search for large factors first and loop down, even if large factors
2346      are less probable than small; if we find a large factor we will find a
2347      good sequence quickly, and therefore be able to prune (by decreasing
2348      COST_LIMIT) the search.  */
2349
2350   for (m = floor_log2 (t - 1); m >= 2; m--)
2351     {
2352       unsigned HOST_WIDE_INT d;
2353
2354       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2355       if (t % d == 0 && t > d && m < maxm)
2356         {
2357           cost = add_cost[mode] + shift_cost[mode][m];
2358           if (shiftadd_cost[mode][m] < cost)
2359             cost = shiftadd_cost[mode][m];
2360           synth_mult (alg_in, t / d, cost_limit - cost, mode);
2361
2362           cost += alg_in->cost;
2363           if (cost < cost_limit)
2364             {
2365               struct algorithm *x;
2366               x = alg_in, alg_in = best_alg, best_alg = x;
2367               best_alg->log[best_alg->ops] = m;
2368               best_alg->op[best_alg->ops] = alg_add_factor;
2369               cost_limit = cost;
2370             }
2371           /* Other factors will have been taken care of in the recursion.  */
2372           break;
2373         }
2374
2375       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2376       if (t % d == 0 && t > d && m < maxm)
2377         {
2378           cost = add_cost[mode] + shift_cost[mode][m];
2379           if (shiftsub_cost[mode][m] < cost)
2380             cost = shiftsub_cost[mode][m];
2381           synth_mult (alg_in, t / d, cost_limit - cost, mode);
2382
2383           cost += alg_in->cost;
2384           if (cost < cost_limit)
2385             {
2386               struct algorithm *x;
2387               x = alg_in, alg_in = best_alg, best_alg = x;
2388               best_alg->log[best_alg->ops] = m;
2389               best_alg->op[best_alg->ops] = alg_sub_factor;
2390               cost_limit = cost;
2391             }
2392           break;
2393         }
2394     }
2395
2396   /* Try shift-and-add (load effective address) instructions,
2397      i.e. do a*3, a*5, a*9.  */
2398   if ((t & 1) != 0)
2399     {
2400       q = t - 1;
2401       q = q & -q;
2402       m = exact_log2 (q);
2403       if (m >= 0 && m < maxm)
2404         {
2405           cost = shiftadd_cost[mode][m];
2406           synth_mult (alg_in, (t - 1) >> m, cost_limit - cost, mode);
2407
2408           cost += alg_in->cost;
2409           if (cost < cost_limit)
2410             {
2411               struct algorithm *x;
2412               x = alg_in, alg_in = best_alg, best_alg = x;
2413               best_alg->log[best_alg->ops] = m;
2414               best_alg->op[best_alg->ops] = alg_add_t2_m;
2415               cost_limit = cost;
2416             }
2417         }
2418
2419       q = t + 1;
2420       q = q & -q;
2421       m = exact_log2 (q);
2422       if (m >= 0 && m < maxm)
2423         {
2424           cost = shiftsub_cost[mode][m];
2425           synth_mult (alg_in, (t + 1) >> m, cost_limit - cost, mode);
2426
2427           cost += alg_in->cost;
2428           if (cost < cost_limit)
2429             {
2430               struct algorithm *x;
2431               x = alg_in, alg_in = best_alg, best_alg = x;
2432               best_alg->log[best_alg->ops] = m;
2433               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2434               cost_limit = cost;
2435             }
2436         }
2437     }
2438
2439   /* If cost_limit has not decreased since we stored it in alg_out->cost,
2440      we have not found any algorithm.  */
2441   if (cost_limit == alg_out->cost)
2442     return;
2443
2444   /* If we are getting a too long sequence for `struct algorithm'
2445      to record, make this search fail.  */
2446   if (best_alg->ops == MAX_BITS_PER_WORD)
2447     return;
2448
2449   /* Copy the algorithm from temporary space to the space at alg_out.
2450      We avoid using structure assignment because the majority of
2451      best_alg is normally undefined, and this is a critical function.  */
2452   alg_out->ops = best_alg->ops + 1;
2453   alg_out->cost = cost_limit;
2454   memcpy (alg_out->op, best_alg->op,
2455           alg_out->ops * sizeof *alg_out->op);
2456   memcpy (alg_out->log, best_alg->log,
2457           alg_out->ops * sizeof *alg_out->log);
2458 }
2459 \f
2460 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2461    Try three variations:
2462
2463        - a shift/add sequence based on VAL itself
2464        - a shift/add sequence based on -VAL, followed by a negation
2465        - a shift/add sequence based on VAL - 1, followed by an addition.
2466
2467    Return true if the cheapest of these cost less than MULT_COST,
2468    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2469
2470 static bool
2471 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2472                      struct algorithm *alg, enum mult_variant *variant,
2473                      int mult_cost)
2474 {
2475   struct algorithm alg2;
2476
2477   *variant = basic_variant;
2478   synth_mult (alg, val, mult_cost, mode);
2479
2480   /* This works only if the inverted value actually fits in an
2481      `unsigned int' */
2482   if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2483     {
2484       synth_mult (&alg2, -val, MIN (alg->cost, mult_cost) - neg_cost[mode],
2485                   mode);
2486       alg2.cost += neg_cost[mode];
2487       if (alg2.cost < alg->cost)
2488         *alg = alg2, *variant = negate_variant;
2489     }
2490
2491   /* This proves very useful for division-by-constant.  */
2492   synth_mult (&alg2, val - 1, MIN (alg->cost, mult_cost) - add_cost[mode],
2493               mode);
2494   alg2.cost += add_cost[mode];
2495   if (alg2.cost < alg->cost)
2496     *alg = alg2, *variant = add_variant;
2497
2498   return alg->cost < mult_cost;
2499 }
2500
2501 /* A subroutine of expand_mult, used for constant multiplications.
2502    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2503    convenient.  Use the shift/add sequence described by ALG and apply
2504    the final fixup specified by VARIANT.  */
2505
2506 static rtx
2507 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2508                    rtx target, const struct algorithm *alg,
2509                    enum mult_variant variant)
2510 {
2511   HOST_WIDE_INT val_so_far;
2512   rtx insn, accum, tem;
2513   int opno;
2514   enum machine_mode nmode;
2515
2516   /* Avoid referencing memory over and over.
2517      For speed, but also for correctness when mem is volatile.  */
2518   if (MEM_P (op0))
2519     op0 = force_reg (mode, op0);
2520
2521   /* ACCUM starts out either as OP0 or as a zero, depending on
2522      the first operation.  */
2523
2524   if (alg->op[0] == alg_zero)
2525     {
2526       accum = copy_to_mode_reg (mode, const0_rtx);
2527       val_so_far = 0;
2528     }
2529   else if (alg->op[0] == alg_m)
2530     {
2531       accum = copy_to_mode_reg (mode, op0);
2532       val_so_far = 1;
2533     }
2534   else
2535     abort ();
2536
2537   for (opno = 1; opno < alg->ops; opno++)
2538     {
2539       int log = alg->log[opno];
2540       int preserve = preserve_subexpressions_p ();
2541       rtx shift_subtarget = preserve ? 0 : accum;
2542       rtx add_target
2543         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2544            && ! preserve)
2545           ? target : 0;
2546       rtx accum_target = preserve ? 0 : accum;
2547
2548       switch (alg->op[opno])
2549         {
2550         case alg_shift:
2551           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2552                                 build_int_2 (log, 0), NULL_RTX, 0);
2553           val_so_far <<= log;
2554           break;
2555
2556         case alg_add_t_m2:
2557           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2558                               build_int_2 (log, 0), NULL_RTX, 0);
2559           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2560                                  add_target ? add_target : accum_target);
2561           val_so_far += (HOST_WIDE_INT) 1 << log;
2562           break;
2563
2564         case alg_sub_t_m2:
2565           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2566                               build_int_2 (log, 0), NULL_RTX, 0);
2567           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2568                                  add_target ? add_target : accum_target);
2569           val_so_far -= (HOST_WIDE_INT) 1 << log;
2570           break;
2571
2572         case alg_add_t2_m:
2573           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2574                                 build_int_2 (log, 0), shift_subtarget,
2575                                 0);
2576           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2577                                  add_target ? add_target : accum_target);
2578           val_so_far = (val_so_far << log) + 1;
2579           break;
2580
2581         case alg_sub_t2_m:
2582           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2583                                 build_int_2 (log, 0), shift_subtarget, 0);
2584           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2585                                  add_target ? add_target : accum_target);
2586           val_so_far = (val_so_far << log) - 1;
2587           break;
2588
2589         case alg_add_factor:
2590           tem = expand_shift (LSHIFT_EXPR, mode, accum,
2591                               build_int_2 (log, 0), NULL_RTX, 0);
2592           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2593                                  add_target ? add_target : accum_target);
2594           val_so_far += val_so_far << log;
2595           break;
2596
2597         case alg_sub_factor:
2598           tem = expand_shift (LSHIFT_EXPR, mode, accum,
2599                               build_int_2 (log, 0), NULL_RTX, 0);
2600           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2601                                  (add_target ? add_target
2602                                   : preserve ? 0 : tem));
2603           val_so_far = (val_so_far << log) - val_so_far;
2604           break;
2605
2606         default:
2607           abort ();
2608         }
2609
2610       /* Write a REG_EQUAL note on the last insn so that we can cse
2611          multiplication sequences.  Note that if ACCUM is a SUBREG,
2612          we've set the inner register and must properly indicate
2613          that.  */
2614
2615       tem = op0, nmode = mode;
2616       if (GET_CODE (accum) == SUBREG)
2617         {
2618           nmode = GET_MODE (SUBREG_REG (accum));
2619           tem = gen_lowpart (nmode, op0);
2620         }
2621
2622       insn = get_last_insn ();
2623       set_unique_reg_note (insn, REG_EQUAL,
2624                            gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)));
2625     }
2626
2627   if (variant == negate_variant)
2628     {
2629       val_so_far = -val_so_far;
2630       accum = expand_unop (mode, neg_optab, accum, target, 0);
2631     }
2632   else if (variant == add_variant)
2633     {
2634       val_so_far = val_so_far + 1;
2635       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
2636     }
2637
2638   /* Compare only the bits of val and val_so_far that are significant
2639      in the result mode, to avoid sign-/zero-extension confusion.  */
2640   val &= GET_MODE_MASK (mode);
2641   val_so_far &= GET_MODE_MASK (mode);
2642   if (val != val_so_far)
2643     abort ();
2644
2645   return accum;
2646 }
2647
2648 /* Perform a multiplication and return an rtx for the result.
2649    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
2650    TARGET is a suggestion for where to store the result (an rtx).
2651
2652    We check specially for a constant integer as OP1.
2653    If you want this check for OP0 as well, then before calling
2654    you should swap the two operands if OP0 would be constant.  */
2655
2656 rtx
2657 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
2658              int unsignedp)
2659 {
2660   rtx const_op1 = op1;
2661   enum mult_variant variant;
2662   struct algorithm algorithm;
2663
2664   /* synth_mult does an `unsigned int' multiply.  As long as the mode is
2665      less than or equal in size to `unsigned int' this doesn't matter.
2666      If the mode is larger than `unsigned int', then synth_mult works only
2667      if the constant value exactly fits in an `unsigned int' without any
2668      truncation.  This means that multiplying by negative values does
2669      not work; results are off by 2^32 on a 32 bit machine.  */
2670
2671   /* If we are multiplying in DImode, it may still be a win
2672      to try to work with shifts and adds.  */
2673   if (GET_CODE (op1) == CONST_DOUBLE
2674       && GET_MODE_CLASS (GET_MODE (op1)) == MODE_INT
2675       && HOST_BITS_PER_INT >= BITS_PER_WORD
2676       && CONST_DOUBLE_HIGH (op1) == 0)
2677     const_op1 = GEN_INT (CONST_DOUBLE_LOW (op1));
2678   else if (HOST_BITS_PER_INT < GET_MODE_BITSIZE (mode)
2679            && GET_CODE (op1) == CONST_INT
2680            && INTVAL (op1) < 0)
2681     const_op1 = 0;
2682
2683   /* We used to test optimize here, on the grounds that it's better to
2684      produce a smaller program when -O is not used.
2685      But this causes such a terrible slowdown sometimes
2686      that it seems better to use synth_mult always.  */
2687
2688   if (const_op1 && GET_CODE (const_op1) == CONST_INT
2689       && (unsignedp || !flag_trapv))
2690     {
2691       int mult_cost = rtx_cost (gen_rtx_MULT (mode, op0, op1), SET);
2692
2693       if (choose_mult_variant (mode, INTVAL (const_op1), &algorithm, &variant,
2694                                mult_cost))
2695         return expand_mult_const (mode, op0, INTVAL (const_op1), target,
2696                                   &algorithm, variant);
2697     }
2698
2699   if (GET_CODE (op0) == CONST_DOUBLE)
2700     {
2701       rtx temp = op0;
2702       op0 = op1;
2703       op1 = temp;
2704     }
2705
2706   /* Expand x*2.0 as x+x.  */
2707   if (GET_CODE (op1) == CONST_DOUBLE
2708       && GET_MODE_CLASS (mode) == MODE_FLOAT)
2709     {
2710       REAL_VALUE_TYPE d;
2711       REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
2712
2713       if (REAL_VALUES_EQUAL (d, dconst2))
2714         {
2715           op0 = force_reg (GET_MODE (op0), op0);
2716           return expand_binop (mode, add_optab, op0, op0,
2717                                target, unsignedp, OPTAB_LIB_WIDEN);
2718         }
2719     }
2720
2721   /* This used to use umul_optab if unsigned, but for non-widening multiply
2722      there is no difference between signed and unsigned.  */
2723   op0 = expand_binop (mode,
2724                       ! unsignedp
2725                       && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
2726                       ? smulv_optab : smul_optab,
2727                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
2728   if (op0 == 0)
2729     abort ();
2730   return op0;
2731 }
2732 \f
2733 /* Return the smallest n such that 2**n >= X.  */
2734
2735 int
2736 ceil_log2 (unsigned HOST_WIDE_INT x)
2737 {
2738   return floor_log2 (x - 1) + 1;
2739 }
2740
2741 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
2742    replace division by D, and put the least significant N bits of the result
2743    in *MULTIPLIER_PTR and return the most significant bit.
2744
2745    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
2746    needed precision is in PRECISION (should be <= N).
2747
2748    PRECISION should be as small as possible so this function can choose
2749    multiplier more freely.
2750
2751    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
2752    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
2753
2754    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
2755    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
2756
2757 static
2758 unsigned HOST_WIDE_INT
2759 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
2760                    unsigned HOST_WIDE_INT *multiplier_ptr,
2761                    int *post_shift_ptr, int *lgup_ptr)
2762 {
2763   HOST_WIDE_INT mhigh_hi, mlow_hi;
2764   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
2765   int lgup, post_shift;
2766   int pow, pow2;
2767   unsigned HOST_WIDE_INT nl, dummy1;
2768   HOST_WIDE_INT nh, dummy2;
2769
2770   /* lgup = ceil(log2(divisor)); */
2771   lgup = ceil_log2 (d);
2772
2773   if (lgup > n)
2774     abort ();
2775
2776   pow = n + lgup;
2777   pow2 = n + lgup - precision;
2778
2779   if (pow == 2 * HOST_BITS_PER_WIDE_INT)
2780     {
2781       /* We could handle this with some effort, but this case is much better
2782          handled directly with a scc insn, so rely on caller using that.  */
2783       abort ();
2784     }
2785
2786   /* mlow = 2^(N + lgup)/d */
2787  if (pow >= HOST_BITS_PER_WIDE_INT)
2788     {
2789       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
2790       nl = 0;
2791     }
2792   else
2793     {
2794       nh = 0;
2795       nl = (unsigned HOST_WIDE_INT) 1 << pow;
2796     }
2797   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2798                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
2799
2800   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
2801   if (pow2 >= HOST_BITS_PER_WIDE_INT)
2802     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
2803   else
2804     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
2805   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2806                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
2807
2808   if (mhigh_hi && nh - d >= d)
2809     abort ();
2810   if (mhigh_hi > 1 || mlow_hi > 1)
2811     abort ();
2812   /* Assert that mlow < mhigh.  */
2813   if (! (mlow_hi < mhigh_hi || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo)))
2814     abort ();
2815
2816   /* If precision == N, then mlow, mhigh exceed 2^N
2817      (but they do not exceed 2^(N+1)).  */
2818
2819   /* Reduce to lowest terms.  */
2820   for (post_shift = lgup; post_shift > 0; post_shift--)
2821     {
2822       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
2823       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
2824       if (ml_lo >= mh_lo)
2825         break;
2826
2827       mlow_hi = 0;
2828       mlow_lo = ml_lo;
2829       mhigh_hi = 0;
2830       mhigh_lo = mh_lo;
2831     }
2832
2833   *post_shift_ptr = post_shift;
2834   *lgup_ptr = lgup;
2835   if (n < HOST_BITS_PER_WIDE_INT)
2836     {
2837       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
2838       *multiplier_ptr = mhigh_lo & mask;
2839       return mhigh_lo >= mask;
2840     }
2841   else
2842     {
2843       *multiplier_ptr = mhigh_lo;
2844       return mhigh_hi;
2845     }
2846 }
2847
2848 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
2849    congruent to 1 (mod 2**N).  */
2850
2851 static unsigned HOST_WIDE_INT
2852 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
2853 {
2854   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
2855
2856   /* The algorithm notes that the choice y = x satisfies
2857      x*y == 1 mod 2^3, since x is assumed odd.
2858      Each iteration doubles the number of bits of significance in y.  */
2859
2860   unsigned HOST_WIDE_INT mask;
2861   unsigned HOST_WIDE_INT y = x;
2862   int nbit = 3;
2863
2864   mask = (n == HOST_BITS_PER_WIDE_INT
2865           ? ~(unsigned HOST_WIDE_INT) 0
2866           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
2867
2868   while (nbit < n)
2869     {
2870       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
2871       nbit *= 2;
2872     }
2873   return y;
2874 }
2875
2876 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
2877    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
2878    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
2879    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
2880    become signed.
2881
2882    The result is put in TARGET if that is convenient.
2883
2884    MODE is the mode of operation.  */
2885
2886 rtx
2887 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
2888                              rtx op1, rtx target, int unsignedp)
2889 {
2890   rtx tem;
2891   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
2892
2893   tem = expand_shift (RSHIFT_EXPR, mode, op0,
2894                       build_int_2 (GET_MODE_BITSIZE (mode) - 1, 0),
2895                       NULL_RTX, 0);
2896   tem = expand_and (mode, tem, op1, NULL_RTX);
2897   adj_operand
2898     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
2899                      adj_operand);
2900
2901   tem = expand_shift (RSHIFT_EXPR, mode, op1,
2902                       build_int_2 (GET_MODE_BITSIZE (mode) - 1, 0),
2903                       NULL_RTX, 0);
2904   tem = expand_and (mode, tem, op0, NULL_RTX);
2905   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
2906                           target);
2907
2908   return target;
2909 }
2910
2911 /* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */
2912
2913 static rtx
2914 extract_high_half (enum machine_mode mode, rtx op)
2915 {
2916   enum machine_mode wider_mode;
2917
2918   if (mode == word_mode)
2919     return gen_highpart (mode, op);
2920
2921   wider_mode = GET_MODE_WIDER_MODE (mode);
2922   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
2923                      build_int_2 (GET_MODE_BITSIZE (mode), 0), 0, 1);
2924   return convert_modes (mode, wider_mode, op, 0);
2925 }
2926
2927 /* Like expand_mult_highpart, but only consider using a multiplication
2928    optab.  OP1 is an rtx for the constant operand.  */
2929
2930 static rtx
2931 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
2932                             rtx target, int unsignedp, int max_cost)
2933 {
2934   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
2935   enum machine_mode wider_mode;
2936   optab moptab;
2937   rtx tem;
2938   int size;
2939
2940   wider_mode = GET_MODE_WIDER_MODE (mode);
2941   size = GET_MODE_BITSIZE (mode);
2942
2943   /* Firstly, try using a multiplication insn that only generates the needed
2944      high part of the product, and in the sign flavor of unsignedp.  */
2945   if (mul_highpart_cost[mode] < max_cost)
2946     {
2947       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
2948       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
2949                           unsignedp, OPTAB_DIRECT);
2950       if (tem)
2951         return tem;
2952     }
2953
2954   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
2955      Need to adjust the result after the multiplication.  */
2956   if (size - 1 < BITS_PER_WORD
2957       && (mul_highpart_cost[mode] + 2 * shift_cost[mode][size-1]
2958           + 4 * add_cost[mode] < max_cost))
2959     {
2960       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
2961       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
2962                           unsignedp, OPTAB_DIRECT);
2963       if (tem)
2964         /* We used the wrong signedness.  Adjust the result.  */
2965         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
2966                                             tem, unsignedp);
2967     }
2968
2969   /* Try widening multiplication.  */
2970   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
2971   if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
2972       && mul_widen_cost[wider_mode] < max_cost)
2973     {
2974       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
2975                           unsignedp, OPTAB_WIDEN);
2976       if (tem)
2977         return extract_high_half (mode, tem);
2978     }
2979
2980   /* Try widening the mode and perform a non-widening multiplication.  */
2981   moptab = smul_optab;
2982   if (smul_optab->handlers[wider_mode].insn_code != CODE_FOR_nothing
2983       && size - 1 < BITS_PER_WORD
2984       && mul_cost[wider_mode] + shift_cost[mode][size-1] < max_cost)
2985     {
2986       tem = expand_binop (wider_mode, moptab, op0, op1, 0,
2987                           unsignedp, OPTAB_WIDEN);
2988       if (tem)
2989         return extract_high_half (mode, tem);
2990     }
2991
2992   /* Try widening multiplication of opposite signedness, and adjust.  */
2993   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
2994   if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
2995       && size - 1 < BITS_PER_WORD
2996       && (mul_widen_cost[wider_mode] + 2 * shift_cost[mode][size-1]
2997           + 4 * add_cost[mode] < max_cost))
2998     {
2999       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3000                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3001       if (tem != 0)
3002         {
3003           tem = extract_high_half (mode, tem);
3004           /* We used the wrong signedness.  Adjust the result.  */
3005           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3006                                               target, unsignedp);
3007         }
3008     }
3009
3010   return 0;
3011 }
3012
3013 /* Emit code to multiply OP0 and CNST1, putting the high half of the result
3014    in TARGET if that is convenient, and return where the result is.  If the
3015    operation can not be performed, 0 is returned.
3016
3017    MODE is the mode of operation and result.
3018
3019    UNSIGNEDP nonzero means unsigned multiply.
3020
3021    MAX_COST is the total allowed cost for the expanded RTL.  */
3022
3023 rtx
3024 expand_mult_highpart (enum machine_mode mode, rtx op0,
3025                       unsigned HOST_WIDE_INT cnst1, rtx target,
3026                       int unsignedp, int max_cost)
3027 {
3028   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3029   int extra_cost;
3030   bool sign_adjust = false;
3031   enum mult_variant variant;
3032   struct algorithm alg;
3033   rtx op1, tem;
3034
3035   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3036   if (GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3037     abort ();
3038
3039   op1 = gen_int_mode (cnst1, wider_mode);
3040   cnst1 &= GET_MODE_MASK (mode);
3041
3042   /* We can't optimize modes wider than BITS_PER_WORD.
3043      ??? We might be able to perform double-word arithmetic if
3044      mode == word_mode, however all the cost calculations in
3045      synth_mult etc. assume single-word operations.  */
3046   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3047     return expand_mult_highpart_optab (mode, op0, op1, target,
3048                                        unsignedp, max_cost);
3049
3050   extra_cost = shift_cost[mode][GET_MODE_BITSIZE (mode) - 1];
3051
3052   /* Check whether we try to multiply by a negative constant.  */
3053   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3054     {
3055       sign_adjust = true;
3056       extra_cost += add_cost[mode];
3057     }
3058
3059   /* See whether shift/add multiplication is cheap enough.  */
3060   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3061                            max_cost - extra_cost))
3062     {
3063       /* See whether the specialized multiplication optabs are
3064          cheaper than the shift/add version.  */
3065       tem = expand_mult_highpart_optab (mode, op0, op1, target,
3066                                         unsignedp, alg.cost + extra_cost);
3067       if (tem)
3068         return tem;
3069
3070       tem = convert_to_mode (wider_mode, op0, unsignedp);
3071       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3072       tem = extract_high_half (mode, tem);
3073
3074       /* Adjust result for signedness.  */
3075       if (sign_adjust)
3076         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3077
3078       return tem;
3079     }
3080   return expand_mult_highpart_optab (mode, op0, op1, target,
3081                                      unsignedp, max_cost);
3082 }
3083
3084
3085 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3086
3087 static rtx
3088 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3089 {
3090   unsigned HOST_WIDE_INT mask;
3091   rtx result, temp, shift, label;
3092   int logd;
3093
3094   logd = floor_log2 (d);
3095   result = gen_reg_rtx (mode);
3096
3097   /* Avoid conditional branches when they're expensive.  */
3098   if (BRANCH_COST >= 2
3099       && !optimize_size)
3100     {
3101       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3102                                       mode, 0, -1);
3103       if (signmask)
3104         {
3105           signmask = force_reg (mode, signmask);
3106           mask = ((HOST_WIDE_INT) 1 << logd) - 1;
3107           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3108
3109           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3110              which instruction sequence to use.  If logical right shifts
3111              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3112              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3113
3114           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3115           if (lshr_optab->handlers[mode].insn_code == CODE_FOR_nothing
3116               || rtx_cost (temp, SET) > COSTS_N_INSNS (2))
3117             {
3118               temp = expand_binop (mode, xor_optab, op0, signmask,
3119                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3120               temp = expand_binop (mode, sub_optab, temp, signmask,
3121                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3122               temp = expand_binop (mode, and_optab, temp, GEN_INT (mask),
3123                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3124               temp = expand_binop (mode, xor_optab, temp, signmask,
3125                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3126               temp = expand_binop (mode, sub_optab, temp, signmask,
3127                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3128             }
3129           else
3130             {
3131               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3132                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3133               signmask = force_reg (mode, signmask);
3134
3135               temp = expand_binop (mode, add_optab, op0, signmask,
3136                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3137               temp = expand_binop (mode, and_optab, temp, GEN_INT (mask),
3138                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3139               temp = expand_binop (mode, sub_optab, temp, signmask,
3140                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3141             }
3142           return temp;
3143         }
3144     }
3145
3146   /* Mask contains the mode's signbit and the significant bits of the
3147      modulus.  By including the signbit in the operation, many targets
3148      can avoid an explicit compare operation in the following comparison
3149      against zero.  */
3150
3151   mask = (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1)
3152          | (((HOST_WIDE_INT) 1 << logd) - 1);
3153
3154   temp = expand_binop (mode, and_optab, op0, GEN_INT (mask), result,
3155                        1, OPTAB_LIB_WIDEN);
3156   if (temp != result)
3157     emit_move_insn (result, temp);
3158
3159   label = gen_label_rtx ();
3160   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3161
3162   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3163                        0, OPTAB_LIB_WIDEN);
3164   mask = (HOST_WIDE_INT) -1 << logd;
3165   temp = expand_binop (mode, ior_optab, temp, GEN_INT (mask), result,
3166                        1, OPTAB_LIB_WIDEN);
3167   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3168                        0, OPTAB_LIB_WIDEN);
3169   if (temp != result)
3170     emit_move_insn (result, temp);
3171   emit_label (label);
3172   return result;
3173 }
3174
3175 /* Expand signed division of OP0 by a power of two D in mode MODE.
3176    This routine is only called for positive values of D.  */
3177
3178 static rtx
3179 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3180 {
3181   rtx temp, label;
3182   tree shift;
3183   int logd;
3184
3185   logd = floor_log2 (d);
3186   shift = build_int_2 (logd, 0);
3187
3188   if (d == 2 && BRANCH_COST >= 1)
3189     {
3190       temp = gen_reg_rtx (mode);
3191       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3192       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3193                            0, OPTAB_LIB_WIDEN);
3194       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3195     }
3196
3197   if (BRANCH_COST >= 2)
3198     {
3199       int ushift = GET_MODE_BITSIZE (mode) - logd;
3200
3201       temp = gen_reg_rtx (mode);
3202       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3203       if (shift_cost[mode][ushift] > COSTS_N_INSNS (1))
3204         temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3205                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3206       else
3207         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3208                              build_int_2 (ushift, 0), NULL_RTX, 1);
3209       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3210                            0, OPTAB_LIB_WIDEN);
3211       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3212     }
3213
3214   label = gen_label_rtx ();
3215   temp = copy_to_mode_reg (mode, op0);
3216   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3217   expand_inc (temp, GEN_INT (d - 1));
3218   emit_label (label);
3219   return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3220 }
3221 \f
3222 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3223    if that is convenient, and returning where the result is.
3224    You may request either the quotient or the remainder as the result;
3225    specify REM_FLAG nonzero to get the remainder.
3226
3227    CODE is the expression code for which kind of division this is;
3228    it controls how rounding is done.  MODE is the machine mode to use.
3229    UNSIGNEDP nonzero means do unsigned division.  */
3230
3231 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3232    and then correct it by or'ing in missing high bits
3233    if result of ANDI is nonzero.
3234    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3235    This could optimize to a bfexts instruction.
3236    But C doesn't use these operations, so their optimizations are
3237    left for later.  */
3238 /* ??? For modulo, we don't actually need the highpart of the first product,
3239    the low part will do nicely.  And for small divisors, the second multiply
3240    can also be a low-part only multiply or even be completely left out.
3241    E.g. to calculate the remainder of a division by 3 with a 32 bit
3242    multiply, multiply with 0x55555556 and extract the upper two bits;
3243    the result is exact for inputs up to 0x1fffffff.
3244    The input range can be reduced by using cross-sum rules.
3245    For odd divisors >= 3, the following table gives right shift counts
3246    so that if a number is shifted by an integer multiple of the given
3247    amount, the remainder stays the same:
3248    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3249    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3250    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3251    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3252    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3253
3254    Cross-sum rules for even numbers can be derived by leaving as many bits
3255    to the right alone as the divisor has zeros to the right.
3256    E.g. if x is an unsigned 32 bit number:
3257    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3258    */
3259
3260 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
3261
3262 rtx
3263 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3264                rtx op0, rtx op1, rtx target, int unsignedp)
3265 {
3266   enum machine_mode compute_mode;
3267   rtx tquotient;
3268   rtx quotient = 0, remainder = 0;
3269   rtx last;
3270   int size;
3271   rtx insn, set;
3272   optab optab1, optab2;
3273   int op1_is_constant, op1_is_pow2 = 0;
3274   int max_cost, extra_cost;
3275   static HOST_WIDE_INT last_div_const = 0;
3276   static HOST_WIDE_INT ext_op1;
3277
3278   op1_is_constant = GET_CODE (op1) == CONST_INT;
3279   if (op1_is_constant)
3280     {
3281       ext_op1 = INTVAL (op1);
3282       if (unsignedp)
3283         ext_op1 &= GET_MODE_MASK (mode);
3284       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3285                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3286     }
3287
3288   /*
3289      This is the structure of expand_divmod:
3290
3291      First comes code to fix up the operands so we can perform the operations
3292      correctly and efficiently.
3293
3294      Second comes a switch statement with code specific for each rounding mode.
3295      For some special operands this code emits all RTL for the desired
3296      operation, for other cases, it generates only a quotient and stores it in
3297      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3298      to indicate that it has not done anything.
3299
3300      Last comes code that finishes the operation.  If QUOTIENT is set and
3301      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3302      QUOTIENT is not set, it is computed using trunc rounding.
3303
3304      We try to generate special code for division and remainder when OP1 is a
3305      constant.  If |OP1| = 2**n we can use shifts and some other fast
3306      operations.  For other values of OP1, we compute a carefully selected
3307      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3308      by m.
3309
3310      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3311      half of the product.  Different strategies for generating the product are
3312      implemented in expand_mult_highpart.
3313
3314      If what we actually want is the remainder, we generate that by another
3315      by-constant multiplication and a subtraction.  */
3316
3317   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3318      code below will malfunction if we are, so check here and handle
3319      the special case if so.  */
3320   if (op1 == const1_rtx)
3321     return rem_flag ? const0_rtx : op0;
3322
3323     /* When dividing by -1, we could get an overflow.
3324      negv_optab can handle overflows.  */
3325   if (! unsignedp && op1 == constm1_rtx)
3326     {
3327       if (rem_flag)
3328         return const0_rtx;
3329       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3330                           ? negv_optab : neg_optab, op0, target, 0);
3331     }
3332
3333   if (target
3334       /* Don't use the function value register as a target
3335          since we have to read it as well as write it,
3336          and function-inlining gets confused by this.  */
3337       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3338           /* Don't clobber an operand while doing a multi-step calculation.  */
3339           || ((rem_flag || op1_is_constant)
3340               && (reg_mentioned_p (target, op0)
3341                   || (MEM_P (op0) && MEM_P (target))))
3342           || reg_mentioned_p (target, op1)
3343           || (MEM_P (op1) && MEM_P (target))))
3344     target = 0;
3345
3346   /* Get the mode in which to perform this computation.  Normally it will
3347      be MODE, but sometimes we can't do the desired operation in MODE.
3348      If so, pick a wider mode in which we can do the operation.  Convert
3349      to that mode at the start to avoid repeated conversions.
3350
3351      First see what operations we need.  These depend on the expression
3352      we are evaluating.  (We assume that divxx3 insns exist under the
3353      same conditions that modxx3 insns and that these insns don't normally
3354      fail.  If these assumptions are not correct, we may generate less
3355      efficient code in some cases.)
3356
3357      Then see if we find a mode in which we can open-code that operation
3358      (either a division, modulus, or shift).  Finally, check for the smallest
3359      mode for which we can do the operation with a library call.  */
3360
3361   /* We might want to refine this now that we have division-by-constant
3362      optimization.  Since expand_mult_highpart tries so many variants, it is
3363      not straightforward to generalize this.  Maybe we should make an array
3364      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3365
3366   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3367             ? (unsignedp ? lshr_optab : ashr_optab)
3368             : (unsignedp ? udiv_optab : sdiv_optab));
3369   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3370             ? optab1
3371             : (unsignedp ? udivmod_optab : sdivmod_optab));
3372
3373   for (compute_mode = mode; compute_mode != VOIDmode;
3374        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3375     if (optab1->handlers[compute_mode].insn_code != CODE_FOR_nothing
3376         || optab2->handlers[compute_mode].insn_code != CODE_FOR_nothing)
3377       break;
3378
3379   if (compute_mode == VOIDmode)
3380     for (compute_mode = mode; compute_mode != VOIDmode;
3381          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3382       if (optab1->handlers[compute_mode].libfunc
3383           || optab2->handlers[compute_mode].libfunc)
3384         break;
3385
3386   /* If we still couldn't find a mode, use MODE, but we'll probably abort
3387      in expand_binop.  */
3388   if (compute_mode == VOIDmode)
3389     compute_mode = mode;
3390
3391   if (target && GET_MODE (target) == compute_mode)
3392     tquotient = target;
3393   else
3394     tquotient = gen_reg_rtx (compute_mode);
3395
3396   size = GET_MODE_BITSIZE (compute_mode);
3397 #if 0
3398   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3399      (mode), and thereby get better code when OP1 is a constant.  Do that
3400      later.  It will require going over all usages of SIZE below.  */
3401   size = GET_MODE_BITSIZE (mode);
3402 #endif
3403
3404   /* Only deduct something for a REM if the last divide done was
3405      for a different constant.   Then set the constant of the last
3406      divide.  */
3407   max_cost = div_cost[compute_mode]
3408     - (rem_flag && ! (last_div_const != 0 && op1_is_constant
3409                       && INTVAL (op1) == last_div_const)
3410        ? mul_cost[compute_mode] + add_cost[compute_mode]
3411        : 0);
3412
3413   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3414
3415   /* Now convert to the best mode to use.  */
3416   if (compute_mode != mode)
3417     {
3418       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3419       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3420
3421       /* convert_modes may have placed op1 into a register, so we
3422          must recompute the following.  */
3423       op1_is_constant = GET_CODE (op1) == CONST_INT;
3424       op1_is_pow2 = (op1_is_constant
3425                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3426                           || (! unsignedp
3427                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3428     }
3429
3430   /* If one of the operands is a volatile MEM, copy it into a register.  */
3431
3432   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3433     op0 = force_reg (compute_mode, op0);
3434   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3435     op1 = force_reg (compute_mode, op1);
3436
3437   /* If we need the remainder or if OP1 is constant, we need to
3438      put OP0 in a register in case it has any queued subexpressions.  */
3439   if (rem_flag || op1_is_constant)
3440     op0 = force_reg (compute_mode, op0);
3441
3442   last = get_last_insn ();
3443
3444   /* Promote floor rounding to trunc rounding for unsigned operations.  */
3445   if (unsignedp)
3446     {
3447       if (code == FLOOR_DIV_EXPR)
3448         code = TRUNC_DIV_EXPR;
3449       if (code == FLOOR_MOD_EXPR)
3450         code = TRUNC_MOD_EXPR;
3451       if (code == EXACT_DIV_EXPR && op1_is_pow2)
3452         code = TRUNC_DIV_EXPR;
3453     }
3454
3455   if (op1 != const0_rtx)
3456     switch (code)
3457       {
3458       case TRUNC_MOD_EXPR:
3459       case TRUNC_DIV_EXPR:
3460         if (op1_is_constant)
3461           {
3462             if (unsignedp)
3463               {
3464                 unsigned HOST_WIDE_INT mh, ml;
3465                 int pre_shift, post_shift;
3466                 int dummy;
3467                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
3468                                             & GET_MODE_MASK (compute_mode));
3469
3470                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3471                   {
3472                     pre_shift = floor_log2 (d);
3473                     if (rem_flag)
3474                       {
3475                         remainder
3476                           = expand_binop (compute_mode, and_optab, op0,
3477                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3478                                           remainder, 1,
3479                                           OPTAB_LIB_WIDEN);
3480                         if (remainder)
3481                           return gen_lowpart (mode, remainder);
3482                       }
3483                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3484                                              build_int_2 (pre_shift, 0),
3485                                              tquotient, 1);
3486                   }
3487                 else if (size <= HOST_BITS_PER_WIDE_INT)
3488                   {
3489                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
3490                       {
3491                         /* Most significant bit of divisor is set; emit an scc
3492                            insn.  */
3493                         quotient = emit_store_flag (tquotient, GEU, op0, op1,
3494                                                     compute_mode, 1, 1);
3495                         if (quotient == 0)
3496                           goto fail1;
3497                       }
3498                     else
3499                       {
3500                         /* Find a suitable multiplier and right shift count
3501                            instead of multiplying with D.  */
3502
3503                         mh = choose_multiplier (d, size, size,
3504                                                 &ml, &post_shift, &dummy);
3505
3506                         /* If the suggested multiplier is more than SIZE bits,
3507                            we can do better for even divisors, using an
3508                            initial right shift.  */
3509                         if (mh != 0 && (d & 1) == 0)
3510                           {
3511                             pre_shift = floor_log2 (d & -d);
3512                             mh = choose_multiplier (d >> pre_shift, size,
3513                                                     size - pre_shift,
3514                                                     &ml, &post_shift, &dummy);
3515                             if (mh)
3516                               abort ();
3517                           }
3518                         else
3519                           pre_shift = 0;
3520
3521                         if (mh != 0)
3522                           {
3523                             rtx t1, t2, t3, t4;
3524
3525                             if (post_shift - 1 >= BITS_PER_WORD)
3526                               goto fail1;
3527
3528                             extra_cost
3529                               = (shift_cost[compute_mode][post_shift - 1]
3530                                  + shift_cost[compute_mode][1]
3531                                  + 2 * add_cost[compute_mode]);
3532                             t1 = expand_mult_highpart (compute_mode, op0, ml,
3533                                                        NULL_RTX, 1,
3534                                                        max_cost - extra_cost);
3535                             if (t1 == 0)
3536                               goto fail1;
3537                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
3538                                                                op0, t1),
3539                                                 NULL_RTX);
3540                             t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3541                                                build_int_2 (1, 0), NULL_RTX,1);
3542                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
3543                                                               t1, t3),
3544                                                 NULL_RTX);
3545                             quotient
3546                               = expand_shift (RSHIFT_EXPR, compute_mode, t4,
3547                                               build_int_2 (post_shift - 1, 0),
3548                                               tquotient, 1);
3549                           }
3550                         else
3551                           {
3552                             rtx t1, t2;
3553
3554                             if (pre_shift >= BITS_PER_WORD
3555                                 || post_shift >= BITS_PER_WORD)
3556                               goto fail1;
3557
3558                             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3559                                                build_int_2 (pre_shift, 0),
3560                                                NULL_RTX, 1);
3561                             extra_cost
3562                               = (shift_cost[compute_mode][pre_shift]
3563                                  + shift_cost[compute_mode][post_shift]);
3564                             t2 = expand_mult_highpart (compute_mode, t1, ml,
3565                                                        NULL_RTX, 1,
3566                                                        max_cost - extra_cost);
3567                             if (t2 == 0)
3568                               goto fail1;
3569                             quotient
3570                               = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3571                                               build_int_2 (post_shift, 0),
3572                                               tquotient, 1);
3573                           }
3574                       }
3575                   }
3576                 else            /* Too wide mode to use tricky code */
3577                   break;
3578
3579                 insn = get_last_insn ();
3580                 if (insn != last
3581                     && (set = single_set (insn)) != 0
3582                     && SET_DEST (set) == quotient)
3583                   set_unique_reg_note (insn,
3584                                        REG_EQUAL,
3585                                        gen_rtx_UDIV (compute_mode, op0, op1));
3586               }
3587             else                /* TRUNC_DIV, signed */
3588               {
3589                 unsigned HOST_WIDE_INT ml;
3590                 int lgup, post_shift;
3591                 HOST_WIDE_INT d = INTVAL (op1);
3592                 unsigned HOST_WIDE_INT abs_d = d >= 0 ? d : -d;
3593
3594                 /* n rem d = n rem -d */
3595                 if (rem_flag && d < 0)
3596                   {
3597                     d = abs_d;
3598                     op1 = gen_int_mode (abs_d, compute_mode);
3599                   }
3600
3601                 if (d == 1)
3602                   quotient = op0;
3603                 else if (d == -1)
3604                   quotient = expand_unop (compute_mode, neg_optab, op0,
3605                                           tquotient, 0);
3606                 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
3607                   {
3608                     /* This case is not handled correctly below.  */
3609                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
3610                                                 compute_mode, 1, 1);
3611                     if (quotient == 0)
3612                       goto fail1;
3613                   }
3614                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
3615                          && (rem_flag ? smod_pow2_cheap[compute_mode]
3616                                       : sdiv_pow2_cheap[compute_mode])
3617                          /* We assume that cheap metric is true if the
3618                             optab has an expander for this mode.  */
3619                          && (((rem_flag ? smod_optab : sdiv_optab)
3620                               ->handlers[compute_mode].insn_code
3621                               != CODE_FOR_nothing)
3622                              || (sdivmod_optab->handlers[compute_mode]
3623                                  .insn_code != CODE_FOR_nothing)))
3624                   ;
3625                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
3626                   {
3627                     if (rem_flag)
3628                       {
3629                         remainder = expand_smod_pow2 (compute_mode, op0, d);
3630                         if (remainder)
3631                           return gen_lowpart (mode, remainder);
3632                       }
3633                     quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
3634
3635                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
3636                        negate the quotient.  */
3637                     if (d < 0)
3638                       {
3639                         insn = get_last_insn ();
3640                         if (insn != last
3641                             && (set = single_set (insn)) != 0
3642                             && SET_DEST (set) == quotient
3643                             && abs_d < ((unsigned HOST_WIDE_INT) 1
3644                                         << (HOST_BITS_PER_WIDE_INT - 1)))
3645                           set_unique_reg_note (insn,
3646                                                REG_EQUAL,
3647                                                gen_rtx_DIV (compute_mode,
3648                                                             op0,
3649                                                             GEN_INT
3650                                                             (trunc_int_for_mode
3651                                                              (abs_d,
3652                                                               compute_mode))));
3653
3654                         quotient = expand_unop (compute_mode, neg_optab,
3655                                                 quotient, quotient, 0);
3656                       }
3657                   }
3658                 else if (size <= HOST_BITS_PER_WIDE_INT)
3659                   {
3660                     choose_multiplier (abs_d, size, size - 1,
3661                                        &ml, &post_shift, &lgup);
3662                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
3663                       {
3664                         rtx t1, t2, t3;
3665
3666                         if (post_shift >= BITS_PER_WORD
3667                             || size - 1 >= BITS_PER_WORD)
3668                           goto fail1;
3669
3670                         extra_cost = (shift_cost[compute_mode][post_shift]
3671                                       + shift_cost[compute_mode][size - 1]
3672                                       + add_cost[compute_mode]);
3673                         t1 = expand_mult_highpart (compute_mode, op0, ml,
3674                                                    NULL_RTX, 0,
3675                                                    max_cost - extra_cost);
3676                         if (t1 == 0)
3677                           goto fail1;
3678                         t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3679                                            build_int_2 (post_shift, 0), NULL_RTX, 0);
3680                         t3 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3681                                            build_int_2 (size - 1, 0), NULL_RTX, 0);
3682                         if (d < 0)
3683                           quotient
3684                             = force_operand (gen_rtx_MINUS (compute_mode,
3685                                                             t3, t2),
3686                                              tquotient);
3687                         else
3688                           quotient
3689                             = force_operand (gen_rtx_MINUS (compute_mode,
3690                                                             t2, t3),
3691                                              tquotient);
3692                       }
3693                     else
3694                       {
3695                         rtx t1, t2, t3, t4;
3696
3697                         if (post_shift >= BITS_PER_WORD
3698                             || size - 1 >= BITS_PER_WORD)
3699                           goto fail1;
3700
3701                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
3702                         extra_cost = (shift_cost[compute_mode][post_shift]
3703                                       + shift_cost[compute_mode][size - 1]
3704                                       + 2 * add_cost[compute_mode]);
3705                         t1 = expand_mult_highpart (compute_mode, op0, ml,
3706                                                    NULL_RTX, 0,
3707                                                    max_cost - extra_cost);
3708                         if (t1 == 0)
3709                           goto fail1;
3710                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
3711                                                           t1, op0),
3712                                             NULL_RTX);
3713                         t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3714                                            build_int_2 (post_shift, 0),
3715                                            NULL_RTX, 0);
3716                         t4 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3717                                            build_int_2 (size - 1, 0),
3718                                            NULL_RTX, 0);
3719                         if (d < 0)
3720                           quotient
3721                             = force_operand (gen_rtx_MINUS (compute_mode,
3722                                                             t4, t3),
3723                                              tquotient);
3724                         else
3725                           quotient
3726                             = force_operand (gen_rtx_MINUS (compute_mode,
3727                                                             t3, t4),
3728                                              tquotient);
3729                       }
3730                   }
3731                 else            /* Too wide mode to use tricky code */
3732                   break;
3733
3734                 insn = get_last_insn ();
3735                 if (insn != last
3736                     && (set = single_set (insn)) != 0
3737                     && SET_DEST (set) == quotient)
3738                   set_unique_reg_note (insn,
3739                                        REG_EQUAL,
3740                                        gen_rtx_DIV (compute_mode, op0, op1));
3741               }
3742             break;
3743           }
3744       fail1:
3745         delete_insns_since (last);
3746         break;
3747
3748       case FLOOR_DIV_EXPR:
3749       case FLOOR_MOD_EXPR:
3750       /* We will come here only for signed operations.  */
3751         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
3752           {
3753             unsigned HOST_WIDE_INT mh, ml;
3754             int pre_shift, lgup, post_shift;
3755             HOST_WIDE_INT d = INTVAL (op1);
3756
3757             if (d > 0)
3758               {
3759                 /* We could just as easily deal with negative constants here,
3760                    but it does not seem worth the trouble for GCC 2.6.  */
3761                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3762                   {
3763                     pre_shift = floor_log2 (d);
3764                     if (rem_flag)
3765                       {
3766                         remainder = expand_binop (compute_mode, and_optab, op0,
3767                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3768                                                   remainder, 0, OPTAB_LIB_WIDEN);
3769                         if (remainder)
3770                           return gen_lowpart (mode, remainder);
3771                       }
3772                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3773                                              build_int_2 (pre_shift, 0),
3774                                              tquotient, 0);
3775                   }
3776                 else
3777                   {
3778                     rtx t1, t2, t3, t4;
3779
3780                     mh = choose_multiplier (d, size, size - 1,
3781                                             &ml, &post_shift, &lgup);
3782                     if (mh)
3783                       abort ();
3784
3785                     if (post_shift < BITS_PER_WORD
3786                         && size - 1 < BITS_PER_WORD)
3787                       {
3788                         t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3789                                            build_int_2 (size - 1, 0),
3790                                            NULL_RTX, 0);
3791                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
3792                                            NULL_RTX, 0, OPTAB_WIDEN);
3793                         extra_cost = (shift_cost[compute_mode][post_shift]
3794                                       + shift_cost[compute_mode][size - 1]
3795                                       + 2 * add_cost[compute_mode]);
3796                         t3 = expand_mult_highpart (compute_mode, t2, ml,
3797                                                    NULL_RTX, 1,
3798                                                    max_cost - extra_cost);
3799                         if (t3 != 0)
3800                           {
3801                             t4 = expand_shift (RSHIFT_EXPR, compute_mode, t3,
3802                                                build_int_2 (post_shift, 0),
3803                                                NULL_RTX, 1);
3804                             quotient = expand_binop (compute_mode, xor_optab,
3805                                                      t4, t1, tquotient, 0,
3806                                                      OPTAB_WIDEN);
3807                           }
3808                       }
3809                   }
3810               }
3811             else
3812               {
3813                 rtx nsign, t1, t2, t3, t4;
3814                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
3815                                                   op0, constm1_rtx), NULL_RTX);
3816                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
3817                                    0, OPTAB_WIDEN);
3818                 nsign = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3819                                       build_int_2 (size - 1, 0), NULL_RTX, 0);
3820                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
3821                                     NULL_RTX);
3822                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
3823                                     NULL_RTX, 0);
3824                 if (t4)
3825                   {
3826                     rtx t5;
3827                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
3828                                       NULL_RTX, 0);
3829                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
3830                                                             t4, t5),
3831                                               tquotient);
3832                   }
3833               }
3834           }
3835
3836         if (quotient != 0)
3837           break;
3838         delete_insns_since (last);
3839
3840         /* Try using an instruction that produces both the quotient and
3841            remainder, using truncation.  We can easily compensate the quotient
3842            or remainder to get floor rounding, once we have the remainder.
3843            Notice that we compute also the final remainder value here,
3844            and return the result right away.  */
3845         if (target == 0 || GET_MODE (target) != compute_mode)
3846           target = gen_reg_rtx (compute_mode);
3847
3848         if (rem_flag)
3849           {
3850             remainder
3851               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
3852             quotient = gen_reg_rtx (compute_mode);
3853           }
3854         else
3855           {
3856             quotient
3857               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
3858             remainder = gen_reg_rtx (compute_mode);
3859           }
3860
3861         if (expand_twoval_binop (sdivmod_optab, op0, op1,
3862                                  quotient, remainder, 0))
3863           {
3864             /* This could be computed with a branch-less sequence.
3865                Save that for later.  */
3866             rtx tem;
3867             rtx label = gen_label_rtx ();
3868             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
3869             tem = expand_binop (compute_mode, xor_optab, op0, op1,
3870                                 NULL_RTX, 0, OPTAB_WIDEN);
3871             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
3872             expand_dec (quotient, const1_rtx);
3873             expand_inc (remainder, op1);
3874             emit_label (label);
3875             return gen_lowpart (mode, rem_flag ? remainder : quotient);
3876           }
3877
3878         /* No luck with division elimination or divmod.  Have to do it
3879            by conditionally adjusting op0 *and* the result.  */
3880         {
3881           rtx label1, label2, label3, label4, label5;
3882           rtx adjusted_op0;
3883           rtx tem;
3884
3885           quotient = gen_reg_rtx (compute_mode);
3886           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3887           label1 = gen_label_rtx ();
3888           label2 = gen_label_rtx ();
3889           label3 = gen_label_rtx ();
3890           label4 = gen_label_rtx ();
3891           label5 = gen_label_rtx ();
3892           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
3893           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
3894           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3895                               quotient, 0, OPTAB_LIB_WIDEN);
3896           if (tem != quotient)
3897             emit_move_insn (quotient, tem);
3898           emit_jump_insn (gen_jump (label5));
3899           emit_barrier ();
3900           emit_label (label1);
3901           expand_inc (adjusted_op0, const1_rtx);
3902           emit_jump_insn (gen_jump (label4));
3903           emit_barrier ();
3904           emit_label (label2);
3905           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
3906           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3907                               quotient, 0, OPTAB_LIB_WIDEN);
3908           if (tem != quotient)
3909             emit_move_insn (quotient, tem);
3910           emit_jump_insn (gen_jump (label5));
3911           emit_barrier ();
3912           emit_label (label3);
3913           expand_dec (adjusted_op0, const1_rtx);
3914           emit_label (label4);
3915           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3916                               quotient, 0, OPTAB_LIB_WIDEN);
3917           if (tem != quotient)
3918             emit_move_insn (quotient, tem);
3919           expand_dec (quotient, const1_rtx);
3920           emit_label (label5);
3921         }
3922         break;
3923
3924       case CEIL_DIV_EXPR:
3925       case CEIL_MOD_EXPR:
3926         if (unsignedp)
3927           {
3928             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
3929               {
3930                 rtx t1, t2, t3;
3931                 unsigned HOST_WIDE_INT d = INTVAL (op1);
3932                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3933                                    build_int_2 (floor_log2 (d), 0),
3934                                    tquotient, 1);
3935                 t2 = expand_binop (compute_mode, and_optab, op0,
3936                                    GEN_INT (d - 1),
3937                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3938                 t3 = gen_reg_rtx (compute_mode);
3939                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
3940                                       compute_mode, 1, 1);
3941                 if (t3 == 0)
3942                   {
3943                     rtx lab;
3944                     lab = gen_label_rtx ();
3945                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
3946                     expand_inc (t1, const1_rtx);
3947                     emit_label (lab);
3948                     quotient = t1;
3949                   }
3950                 else
3951                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
3952                                                           t1, t3),
3953                                             tquotient);
3954                 break;
3955               }
3956
3957             /* Try using an instruction that produces both the quotient and
3958                remainder, using truncation.  We can easily compensate the
3959                quotient or remainder to get ceiling rounding, once we have the
3960                remainder.  Notice that we compute also the final remainder
3961                value here, and return the result right away.  */
3962             if (target == 0 || GET_MODE (target) != compute_mode)
3963               target = gen_reg_rtx (compute_mode);
3964
3965             if (rem_flag)
3966               {
3967                 remainder = (REG_P (target)
3968                              ? target : gen_reg_rtx (compute_mode));
3969                 quotient = gen_reg_rtx (compute_mode);
3970               }
3971             else
3972               {
3973                 quotient = (REG_P (target)
3974                             ? target : gen_reg_rtx (compute_mode));
3975                 remainder = gen_reg_rtx (compute_mode);
3976               }
3977
3978             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
3979                                      remainder, 1))
3980               {
3981                 /* This could be computed with a branch-less sequence.
3982                    Save that for later.  */
3983                 rtx label = gen_label_rtx ();
3984                 do_cmp_and_jump (remainder, const0_rtx, EQ,
3985                                  compute_mode, label);
3986                 expand_inc (quotient, const1_rtx);
3987                 expand_dec (remainder, op1);
3988                 emit_label (label);
3989                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
3990               }
3991
3992             /* No luck with division elimination or divmod.  Have to do it
3993                by conditionally adjusting op0 *and* the result.  */
3994             {
3995               rtx label1, label2;
3996               rtx adjusted_op0, tem;
3997
3998               quotient = gen_reg_rtx (compute_mode);
3999               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4000               label1 = gen_label_rtx ();
4001               label2 = gen_label_rtx ();
4002               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4003                                compute_mode, label1);
4004               emit_move_insn  (quotient, const0_rtx);
4005               emit_jump_insn (gen_jump (label2));
4006               emit_barrier ();
4007               emit_label (label1);
4008               expand_dec (adjusted_op0, const1_rtx);
4009               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4010                                   quotient, 1, OPTAB_LIB_WIDEN);
4011               if (tem != quotient)
4012                 emit_move_insn (quotient, tem);
4013               expand_inc (quotient, const1_rtx);
4014               emit_label (label2);
4015             }
4016           }
4017         else /* signed */
4018           {
4019             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4020                 && INTVAL (op1) >= 0)
4021               {
4022                 /* This is extremely similar to the code for the unsigned case
4023                    above.  For 2.7 we should merge these variants, but for
4024                    2.6.1 I don't want to touch the code for unsigned since that
4025                    get used in C.  The signed case will only be used by other
4026                    languages (Ada).  */
4027
4028                 rtx t1, t2, t3;
4029                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4030                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4031                                    build_int_2 (floor_log2 (d), 0),
4032                                    tquotient, 0);
4033                 t2 = expand_binop (compute_mode, and_optab, op0,
4034                                    GEN_INT (d - 1),
4035                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4036                 t3 = gen_reg_rtx (compute_mode);
4037                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4038                                       compute_mode, 1, 1);
4039                 if (t3 == 0)
4040                   {
4041                     rtx lab;
4042                     lab = gen_label_rtx ();
4043                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4044                     expand_inc (t1, const1_rtx);
4045                     emit_label (lab);
4046                     quotient = t1;
4047                   }
4048                 else
4049                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4050                                                           t1, t3),
4051                                             tquotient);
4052                 break;
4053               }
4054
4055             /* Try using an instruction that produces both the quotient and
4056                remainder, using truncation.  We can easily compensate the
4057                quotient or remainder to get ceiling rounding, once we have the
4058                remainder.  Notice that we compute also the final remainder
4059                value here, and return the result right away.  */
4060             if (target == 0 || GET_MODE (target) != compute_mode)
4061               target = gen_reg_rtx (compute_mode);
4062             if (rem_flag)
4063               {
4064                 remainder= (REG_P (target)
4065                             ? target : gen_reg_rtx (compute_mode));
4066                 quotient = gen_reg_rtx (compute_mode);
4067               }
4068             else
4069               {
4070                 quotient = (REG_P (target)
4071                             ? target : gen_reg_rtx (compute_mode));
4072                 remainder = gen_reg_rtx (compute_mode);
4073               }
4074
4075             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4076                                      remainder, 0))
4077               {
4078                 /* This could be computed with a branch-less sequence.
4079                    Save that for later.  */
4080                 rtx tem;
4081                 rtx label = gen_label_rtx ();
4082                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4083                                  compute_mode, label);
4084                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4085                                     NULL_RTX, 0, OPTAB_WIDEN);
4086                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4087                 expand_inc (quotient, const1_rtx);
4088                 expand_dec (remainder, op1);
4089                 emit_label (label);
4090                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4091               }
4092
4093             /* No luck with division elimination or divmod.  Have to do it
4094                by conditionally adjusting op0 *and* the result.  */
4095             {
4096               rtx label1, label2, label3, label4, label5;
4097               rtx adjusted_op0;
4098               rtx tem;
4099
4100               quotient = gen_reg_rtx (compute_mode);
4101               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4102               label1 = gen_label_rtx ();
4103               label2 = gen_label_rtx ();
4104               label3 = gen_label_rtx ();
4105               label4 = gen_label_rtx ();
4106               label5 = gen_label_rtx ();
4107               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4108               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4109                                compute_mode, label1);
4110               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4111                                   quotient, 0, OPTAB_LIB_WIDEN);
4112               if (tem != quotient)
4113                 emit_move_insn (quotient, tem);
4114               emit_jump_insn (gen_jump (label5));
4115               emit_barrier ();
4116               emit_label (label1);
4117               expand_dec (adjusted_op0, const1_rtx);
4118               emit_jump_insn (gen_jump (label4));
4119               emit_barrier ();
4120               emit_label (label2);
4121               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4122                                compute_mode, label3);
4123               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4124                                   quotient, 0, OPTAB_LIB_WIDEN);
4125               if (tem != quotient)
4126                 emit_move_insn (quotient, tem);
4127               emit_jump_insn (gen_jump (label5));
4128               emit_barrier ();
4129               emit_label (label3);
4130               expand_inc (adjusted_op0, const1_rtx);
4131               emit_label (label4);
4132               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4133                                   quotient, 0, OPTAB_LIB_WIDEN);
4134               if (tem != quotient)
4135                 emit_move_insn (quotient, tem);
4136               expand_inc (quotient, const1_rtx);
4137               emit_label (label5);
4138             }
4139           }
4140         break;
4141
4142       case EXACT_DIV_EXPR:
4143         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4144           {
4145             HOST_WIDE_INT d = INTVAL (op1);
4146             unsigned HOST_WIDE_INT ml;
4147             int pre_shift;
4148             rtx t1;
4149
4150             pre_shift = floor_log2 (d & -d);
4151             ml = invert_mod2n (d >> pre_shift, size);
4152             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4153                                build_int_2 (pre_shift, 0), NULL_RTX, unsignedp);
4154             quotient = expand_mult (compute_mode, t1,
4155                                     gen_int_mode (ml, compute_mode),
4156                                     NULL_RTX, 1);
4157
4158             insn = get_last_insn ();
4159             set_unique_reg_note (insn,
4160                                  REG_EQUAL,
4161                                  gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4162                                                  compute_mode,
4163                                                  op0, op1));
4164           }
4165         break;
4166
4167       case ROUND_DIV_EXPR:
4168       case ROUND_MOD_EXPR:
4169         if (unsignedp)
4170           {
4171             rtx tem;
4172             rtx label;
4173             label = gen_label_rtx ();
4174             quotient = gen_reg_rtx (compute_mode);
4175             remainder = gen_reg_rtx (compute_mode);
4176             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4177               {
4178                 rtx tem;
4179                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4180                                          quotient, 1, OPTAB_LIB_WIDEN);
4181                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4182                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4183                                           remainder, 1, OPTAB_LIB_WIDEN);
4184               }
4185             tem = plus_constant (op1, -1);
4186             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4187                                 build_int_2 (1, 0), NULL_RTX, 1);
4188             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4189             expand_inc (quotient, const1_rtx);
4190             expand_dec (remainder, op1);
4191             emit_label (label);
4192           }
4193         else
4194           {
4195             rtx abs_rem, abs_op1, tem, mask;
4196             rtx label;
4197             label = gen_label_rtx ();
4198             quotient = gen_reg_rtx (compute_mode);
4199             remainder = gen_reg_rtx (compute_mode);
4200             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4201               {
4202                 rtx tem;
4203                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4204                                          quotient, 0, OPTAB_LIB_WIDEN);
4205                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4206                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4207                                           remainder, 0, OPTAB_LIB_WIDEN);
4208               }
4209             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4210             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4211             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4212                                 build_int_2 (1, 0), NULL_RTX, 1);
4213             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4214             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4215                                 NULL_RTX, 0, OPTAB_WIDEN);
4216             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4217                                 build_int_2 (size - 1, 0), NULL_RTX, 0);
4218             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4219                                 NULL_RTX, 0, OPTAB_WIDEN);
4220             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4221                                 NULL_RTX, 0, OPTAB_WIDEN);
4222             expand_inc (quotient, tem);
4223             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4224                                 NULL_RTX, 0, OPTAB_WIDEN);
4225             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4226                                 NULL_RTX, 0, OPTAB_WIDEN);
4227             expand_dec (remainder, tem);
4228             emit_label (label);
4229           }
4230         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4231
4232       default:
4233         abort ();
4234       }
4235
4236   if (quotient == 0)
4237     {
4238       if (target && GET_MODE (target) != compute_mode)
4239         target = 0;
4240
4241       if (rem_flag)
4242         {
4243           /* Try to produce the remainder without producing the quotient.
4244              If we seem to have a divmod pattern that does not require widening,
4245              don't try widening here.  We should really have a WIDEN argument
4246              to expand_twoval_binop, since what we'd really like to do here is
4247              1) try a mod insn in compute_mode
4248              2) try a divmod insn in compute_mode
4249              3) try a div insn in compute_mode and multiply-subtract to get
4250                 remainder
4251              4) try the same things with widening allowed.  */
4252           remainder
4253             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4254                                  op0, op1, target,
4255                                  unsignedp,
4256                                  ((optab2->handlers[compute_mode].insn_code
4257                                    != CODE_FOR_nothing)
4258                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4259           if (remainder == 0)
4260             {
4261               /* No luck there.  Can we do remainder and divide at once
4262                  without a library call?  */
4263               remainder = gen_reg_rtx (compute_mode);
4264               if (! expand_twoval_binop ((unsignedp
4265                                           ? udivmod_optab
4266                                           : sdivmod_optab),
4267                                          op0, op1,
4268                                          NULL_RTX, remainder, unsignedp))
4269                 remainder = 0;
4270             }
4271
4272           if (remainder)
4273             return gen_lowpart (mode, remainder);
4274         }
4275
4276       /* Produce the quotient.  Try a quotient insn, but not a library call.
4277          If we have a divmod in this mode, use it in preference to widening
4278          the div (for this test we assume it will not fail). Note that optab2
4279          is set to the one of the two optabs that the call below will use.  */
4280       quotient
4281         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4282                              op0, op1, rem_flag ? NULL_RTX : target,
4283                              unsignedp,
4284                              ((optab2->handlers[compute_mode].insn_code
4285                                != CODE_FOR_nothing)
4286                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4287
4288       if (quotient == 0)
4289         {
4290           /* No luck there.  Try a quotient-and-remainder insn,
4291              keeping the quotient alone.  */
4292           quotient = gen_reg_rtx (compute_mode);
4293           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4294                                      op0, op1,
4295                                      quotient, NULL_RTX, unsignedp))
4296             {
4297               quotient = 0;
4298               if (! rem_flag)
4299                 /* Still no luck.  If we are not computing the remainder,
4300                    use a library call for the quotient.  */
4301                 quotient = sign_expand_binop (compute_mode,
4302                                               udiv_optab, sdiv_optab,
4303                                               op0, op1, target,
4304                                               unsignedp, OPTAB_LIB_WIDEN);
4305             }
4306         }
4307     }
4308
4309   if (rem_flag)
4310     {
4311       if (target && GET_MODE (target) != compute_mode)
4312         target = 0;
4313
4314       if (quotient == 0)
4315         /* No divide instruction either.  Use library for remainder.  */
4316         remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4317                                        op0, op1, target,
4318                                        unsignedp, OPTAB_LIB_WIDEN);
4319       else
4320         {
4321           /* We divided.  Now finish doing X - Y * (X / Y).  */
4322           remainder = expand_mult (compute_mode, quotient, op1,
4323                                    NULL_RTX, unsignedp);
4324           remainder = expand_binop (compute_mode, sub_optab, op0,
4325                                     remainder, target, unsignedp,
4326                                     OPTAB_LIB_WIDEN);
4327         }
4328     }
4329
4330   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4331 }
4332 \f
4333 /* Return a tree node with data type TYPE, describing the value of X.
4334    Usually this is an VAR_DECL, if there is no obvious better choice.
4335    X may be an expression, however we only support those expressions
4336    generated by loop.c.  */
4337
4338 tree
4339 make_tree (tree type, rtx x)
4340 {
4341   tree t;
4342
4343   switch (GET_CODE (x))
4344     {
4345     case CONST_INT:
4346       t = build_int_2 (INTVAL (x),
4347                        (TYPE_UNSIGNED (type)
4348                         && (GET_MODE_BITSIZE (TYPE_MODE (type))
4349                             < HOST_BITS_PER_WIDE_INT))
4350                        || INTVAL (x) >= 0 ? 0 : -1);
4351       TREE_TYPE (t) = type;
4352       return t;
4353
4354     case CONST_DOUBLE:
4355       if (GET_MODE (x) == VOIDmode)
4356         {
4357           t = build_int_2 (CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4358           TREE_TYPE (t) = type;
4359         }
4360       else
4361         {
4362           REAL_VALUE_TYPE d;
4363
4364           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4365           t = build_real (type, d);
4366         }
4367
4368       return t;
4369
4370     case CONST_VECTOR:
4371       {
4372         int i, units;
4373         rtx elt;
4374         tree t = NULL_TREE;
4375
4376         units = CONST_VECTOR_NUNITS (x);
4377
4378         /* Build a tree with vector elements.  */
4379         for (i = units - 1; i >= 0; --i)
4380           {
4381             elt = CONST_VECTOR_ELT (x, i);
4382             t = tree_cons (NULL_TREE, make_tree (type, elt), t);
4383           }
4384
4385         return build_vector (type, t);
4386       }
4387
4388     case PLUS:
4389       return fold (build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4390                            make_tree (type, XEXP (x, 1))));
4391
4392     case MINUS:
4393       return fold (build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4394                            make_tree (type, XEXP (x, 1))));
4395
4396     case NEG:
4397       return fold (build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0))));
4398
4399     case MULT:
4400       return fold (build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4401                            make_tree (type, XEXP (x, 1))));
4402
4403     case ASHIFT:
4404       return fold (build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4405                            make_tree (type, XEXP (x, 1))));
4406
4407     case LSHIFTRT:
4408       t = lang_hooks.types.unsigned_type (type);
4409       return fold (convert (type,
4410                             build2 (RSHIFT_EXPR, t,
4411                                     make_tree (t, XEXP (x, 0)),
4412                                     make_tree (type, XEXP (x, 1)))));
4413
4414     case ASHIFTRT:
4415       t = lang_hooks.types.signed_type (type);
4416       return fold (convert (type,
4417                             build2 (RSHIFT_EXPR, t,
4418                                     make_tree (t, XEXP (x, 0)),
4419                                     make_tree (type, XEXP (x, 1)))));
4420
4421     case DIV:
4422       if (TREE_CODE (type) != REAL_TYPE)
4423         t = lang_hooks.types.signed_type (type);
4424       else
4425         t = type;
4426
4427       return fold (convert (type,
4428                             build2 (TRUNC_DIV_EXPR, t,
4429                                     make_tree (t, XEXP (x, 0)),
4430                                     make_tree (t, XEXP (x, 1)))));
4431     case UDIV:
4432       t = lang_hooks.types.unsigned_type (type);
4433       return fold (convert (type,
4434                             build2 (TRUNC_DIV_EXPR, t,
4435                                     make_tree (t, XEXP (x, 0)),
4436                                     make_tree (t, XEXP (x, 1)))));
4437
4438     case SIGN_EXTEND:
4439     case ZERO_EXTEND:
4440       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
4441                                           GET_CODE (x) == ZERO_EXTEND);
4442       return fold (convert (type, make_tree (t, XEXP (x, 0))));
4443
4444     default:
4445       t = build_decl (VAR_DECL, NULL_TREE, type);
4446
4447       /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
4448          ptr_mode.  So convert.  */
4449       if (POINTER_TYPE_P (type))
4450         x = convert_memory_address (TYPE_MODE (type), x);
4451
4452       /* Note that we do *not* use SET_DECL_RTL here, because we do not
4453          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
4454       t->decl.rtl = x;
4455
4456       return t;
4457     }
4458 }
4459
4460 /* Check whether the multiplication X * MULT + ADD overflows.
4461    X, MULT and ADD must be CONST_*.
4462    MODE is the machine mode for the computation.
4463    X and MULT must have mode MODE.  ADD may have a different mode.
4464    So can X (defaults to same as MODE).
4465    UNSIGNEDP is nonzero to do unsigned multiplication.  */
4466
4467 bool
4468 const_mult_add_overflow_p (rtx x, rtx mult, rtx add, enum machine_mode mode, int unsignedp)
4469 {
4470   tree type, mult_type, add_type, result;
4471
4472   type = lang_hooks.types.type_for_mode (mode, unsignedp);
4473
4474   /* In order to get a proper overflow indication from an unsigned
4475      type, we have to pretend that it's a sizetype.  */
4476   mult_type = type;
4477   if (unsignedp)
4478     {
4479       mult_type = copy_node (type);
4480       TYPE_IS_SIZETYPE (mult_type) = 1;
4481     }
4482
4483   add_type = (GET_MODE (add) == VOIDmode ? mult_type
4484               : lang_hooks.types.type_for_mode (GET_MODE (add), unsignedp));
4485
4486   result = fold (build2 (PLUS_EXPR, mult_type,
4487                          fold (build2 (MULT_EXPR, mult_type,
4488                                        make_tree (mult_type, x),
4489                                        make_tree (mult_type, mult))),
4490                          make_tree (add_type, add)));
4491
4492   return TREE_CONSTANT_OVERFLOW (result);
4493 }
4494
4495 /* Return an rtx representing the value of X * MULT + ADD.
4496    TARGET is a suggestion for where to store the result (an rtx).
4497    MODE is the machine mode for the computation.
4498    X and MULT must have mode MODE.  ADD may have a different mode.
4499    So can X (defaults to same as MODE).
4500    UNSIGNEDP is nonzero to do unsigned multiplication.
4501    This may emit insns.  */
4502
4503 rtx
4504 expand_mult_add (rtx x, rtx target, rtx mult, rtx add, enum machine_mode mode,
4505                  int unsignedp)
4506 {
4507   tree type = lang_hooks.types.type_for_mode (mode, unsignedp);
4508   tree add_type = (GET_MODE (add) == VOIDmode
4509                    ? type: lang_hooks.types.type_for_mode (GET_MODE (add),
4510                                                            unsignedp));
4511   tree result =  fold (build2 (PLUS_EXPR, type,
4512                                fold (build2 (MULT_EXPR, type,
4513                                              make_tree (type, x),
4514                                              make_tree (type, mult))),
4515                                make_tree (add_type, add)));
4516
4517   return expand_expr (result, target, VOIDmode, 0);
4518 }
4519 \f
4520 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
4521    and returning TARGET.
4522
4523    If TARGET is 0, a pseudo-register or constant is returned.  */
4524
4525 rtx
4526 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
4527 {
4528   rtx tem = 0;
4529
4530   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
4531     tem = simplify_binary_operation (AND, mode, op0, op1);
4532   if (tem == 0)
4533     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
4534
4535   if (target == 0)
4536     target = tem;
4537   else if (tem != target)
4538     emit_move_insn (target, tem);
4539   return target;
4540 }
4541 \f
4542 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
4543    and storing in TARGET.  Normally return TARGET.
4544    Return 0 if that cannot be done.
4545
4546    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
4547    it is VOIDmode, they cannot both be CONST_INT.
4548
4549    UNSIGNEDP is for the case where we have to widen the operands
4550    to perform the operation.  It says to use zero-extension.
4551
4552    NORMALIZEP is 1 if we should convert the result to be either zero
4553    or one.  Normalize is -1 if we should convert the result to be
4554    either zero or -1.  If NORMALIZEP is zero, the result will be left
4555    "raw" out of the scc insn.  */
4556
4557 rtx
4558 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
4559                  enum machine_mode mode, int unsignedp, int normalizep)
4560 {
4561   rtx subtarget;
4562   enum insn_code icode;
4563   enum machine_mode compare_mode;
4564   enum machine_mode target_mode = GET_MODE (target);
4565   rtx tem;
4566   rtx last = get_last_insn ();
4567   rtx pattern, comparison;
4568
4569   if (unsignedp)
4570     code = unsigned_condition (code);
4571
4572   /* If one operand is constant, make it the second one.  Only do this
4573      if the other operand is not constant as well.  */
4574
4575   if (swap_commutative_operands_p (op0, op1))
4576     {
4577       tem = op0;
4578       op0 = op1;
4579       op1 = tem;
4580       code = swap_condition (code);
4581     }
4582
4583   if (mode == VOIDmode)
4584     mode = GET_MODE (op0);
4585
4586   /* For some comparisons with 1 and -1, we can convert this to
4587      comparisons with zero.  This will often produce more opportunities for
4588      store-flag insns.  */
4589
4590   switch (code)
4591     {
4592     case LT:
4593       if (op1 == const1_rtx)
4594         op1 = const0_rtx, code = LE;
4595       break;
4596     case LE:
4597       if (op1 == constm1_rtx)
4598         op1 = const0_rtx, code = LT;
4599       break;
4600     case GE:
4601       if (op1 == const1_rtx)
4602         op1 = const0_rtx, code = GT;
4603       break;
4604     case GT:
4605       if (op1 == constm1_rtx)
4606         op1 = const0_rtx, code = GE;
4607       break;
4608     case GEU:
4609       if (op1 == const1_rtx)
4610         op1 = const0_rtx, code = NE;
4611       break;
4612     case LTU:
4613       if (op1 == const1_rtx)
4614         op1 = const0_rtx, code = EQ;
4615       break;
4616     default:
4617       break;
4618     }
4619
4620   /* If we are comparing a double-word integer with zero or -1, we can
4621      convert the comparison into one involving a single word.  */
4622   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
4623       && GET_MODE_CLASS (mode) == MODE_INT
4624       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
4625     {
4626       if ((code == EQ || code == NE)
4627           && (op1 == const0_rtx || op1 == constm1_rtx))
4628         {
4629           rtx op00, op01, op0both;
4630
4631           /* Do a logical OR or AND of the two words and compare the result.  */
4632           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
4633           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
4634           op0both = expand_binop (word_mode,
4635                                   op1 == const0_rtx ? ior_optab : and_optab,
4636                                   op00, op01, NULL_RTX, unsignedp, OPTAB_DIRECT);
4637
4638           if (op0both != 0)
4639             return emit_store_flag (target, code, op0both, op1, word_mode,
4640                                     unsignedp, normalizep);
4641         }
4642       else if ((code == LT || code == GE) && op1 == const0_rtx)
4643         {
4644           rtx op0h;
4645
4646           /* If testing the sign bit, can just test on high word.  */
4647           op0h = simplify_gen_subreg (word_mode, op0, mode,
4648                                       subreg_highpart_offset (word_mode, mode));
4649           return emit_store_flag (target, code, op0h, op1, word_mode,
4650                                   unsignedp, normalizep);
4651         }
4652     }
4653
4654   /* From now on, we won't change CODE, so set ICODE now.  */
4655   icode = setcc_gen_code[(int) code];
4656
4657   /* If this is A < 0 or A >= 0, we can do this by taking the ones
4658      complement of A (for GE) and shifting the sign bit to the low bit.  */
4659   if (op1 == const0_rtx && (code == LT || code == GE)
4660       && GET_MODE_CLASS (mode) == MODE_INT
4661       && (normalizep || STORE_FLAG_VALUE == 1
4662           || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4663               && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
4664                   == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
4665     {
4666       subtarget = target;
4667
4668       /* If the result is to be wider than OP0, it is best to convert it
4669          first.  If it is to be narrower, it is *incorrect* to convert it
4670          first.  */
4671       if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
4672         {
4673           op0 = convert_modes (target_mode, mode, op0, 0);
4674           mode = target_mode;
4675         }
4676
4677       if (target_mode != mode)
4678         subtarget = 0;
4679
4680       if (code == GE)
4681         op0 = expand_unop (mode, one_cmpl_optab, op0,
4682                            ((STORE_FLAG_VALUE == 1 || normalizep)
4683                             ? 0 : subtarget), 0);
4684
4685       if (STORE_FLAG_VALUE == 1 || normalizep)
4686         /* If we are supposed to produce a 0/1 value, we want to do
4687            a logical shift from the sign bit to the low-order bit; for
4688            a -1/0 value, we do an arithmetic shift.  */
4689         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
4690                             size_int (GET_MODE_BITSIZE (mode) - 1),
4691                             subtarget, normalizep != -1);
4692
4693       if (mode != target_mode)
4694         op0 = convert_modes (target_mode, mode, op0, 0);
4695
4696       return op0;
4697     }
4698
4699   if (icode != CODE_FOR_nothing)
4700     {
4701       insn_operand_predicate_fn pred;
4702
4703       /* We think we may be able to do this with a scc insn.  Emit the
4704          comparison and then the scc insn.  */
4705
4706       do_pending_stack_adjust ();
4707       last = get_last_insn ();
4708
4709       comparison
4710         = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX);
4711       if (CONSTANT_P (comparison))
4712         {
4713           if (GET_CODE (comparison) == CONST_INT)
4714             {
4715               if (comparison == const0_rtx)
4716                 return const0_rtx;
4717             }
4718 #ifdef FLOAT_STORE_FLAG_VALUE
4719           else if (GET_CODE (comparison) == CONST_DOUBLE)
4720             {
4721               if (comparison == CONST0_RTX (GET_MODE (comparison)))
4722                 return const0_rtx;
4723             }
4724 #endif
4725           else
4726             abort ();
4727           if (normalizep == 1)
4728             return const1_rtx;
4729           if (normalizep == -1)
4730             return constm1_rtx;
4731           return const_true_rtx;
4732         }
4733
4734       /* The code of COMPARISON may not match CODE if compare_from_rtx
4735          decided to swap its operands and reverse the original code.
4736
4737          We know that compare_from_rtx returns either a CONST_INT or
4738          a new comparison code, so it is safe to just extract the
4739          code from COMPARISON.  */
4740       code = GET_CODE (comparison);
4741
4742       /* Get a reference to the target in the proper mode for this insn.  */
4743       compare_mode = insn_data[(int) icode].operand[0].mode;
4744       subtarget = target;
4745       pred = insn_data[(int) icode].operand[0].predicate;
4746       if (preserve_subexpressions_p ()
4747           || ! (*pred) (subtarget, compare_mode))
4748         subtarget = gen_reg_rtx (compare_mode);
4749
4750       pattern = GEN_FCN (icode) (subtarget);
4751       if (pattern)
4752         {
4753           emit_insn (pattern);
4754
4755           /* If we are converting to a wider mode, first convert to
4756              TARGET_MODE, then normalize.  This produces better combining
4757              opportunities on machines that have a SIGN_EXTRACT when we are
4758              testing a single bit.  This mostly benefits the 68k.
4759
4760              If STORE_FLAG_VALUE does not have the sign bit set when
4761              interpreted in COMPARE_MODE, we can do this conversion as
4762              unsigned, which is usually more efficient.  */
4763           if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (compare_mode))
4764             {
4765               convert_move (target, subtarget,
4766                             (GET_MODE_BITSIZE (compare_mode)
4767                              <= HOST_BITS_PER_WIDE_INT)
4768                             && 0 == (STORE_FLAG_VALUE
4769                                      & ((HOST_WIDE_INT) 1
4770                                         << (GET_MODE_BITSIZE (compare_mode) -1))));
4771               op0 = target;
4772               compare_mode = target_mode;
4773             }
4774           else
4775             op0 = subtarget;
4776
4777           /* If we want to keep subexpressions around, don't reuse our
4778              last target.  */
4779
4780           if (preserve_subexpressions_p ())
4781             subtarget = 0;
4782
4783           /* Now normalize to the proper value in COMPARE_MODE.  Sometimes
4784              we don't have to do anything.  */
4785           if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
4786             ;
4787           /* STORE_FLAG_VALUE might be the most negative number, so write
4788              the comparison this way to avoid a compiler-time warning.  */
4789           else if (- normalizep == STORE_FLAG_VALUE)
4790             op0 = expand_unop (compare_mode, neg_optab, op0, subtarget, 0);
4791
4792           /* We don't want to use STORE_FLAG_VALUE < 0 below since this
4793              makes it hard to use a value of just the sign bit due to
4794              ANSI integer constant typing rules.  */
4795           else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_WIDE_INT
4796                    && (STORE_FLAG_VALUE
4797                        & ((HOST_WIDE_INT) 1
4798                           << (GET_MODE_BITSIZE (compare_mode) - 1))))
4799             op0 = expand_shift (RSHIFT_EXPR, compare_mode, op0,
4800                                 size_int (GET_MODE_BITSIZE (compare_mode) - 1),
4801                                 subtarget, normalizep == 1);
4802           else if (STORE_FLAG_VALUE & 1)
4803             {
4804               op0 = expand_and (compare_mode, op0, const1_rtx, subtarget);
4805               if (normalizep == -1)
4806                 op0 = expand_unop (compare_mode, neg_optab, op0, op0, 0);
4807             }
4808           else
4809             abort ();
4810
4811           /* If we were converting to a smaller mode, do the
4812              conversion now.  */
4813           if (target_mode != compare_mode)
4814             {
4815               convert_move (target, op0, 0);
4816               return target;
4817             }
4818           else
4819             return op0;
4820         }
4821     }
4822
4823   delete_insns_since (last);
4824
4825   /* If expensive optimizations, use different pseudo registers for each
4826      insn, instead of reusing the same pseudo.  This leads to better CSE,
4827      but slows down the compiler, since there are more pseudos */
4828   subtarget = (!flag_expensive_optimizations
4829                && (target_mode == mode)) ? target : NULL_RTX;
4830
4831   /* If we reached here, we can't do this with a scc insn.  However, there
4832      are some comparisons that can be done directly.  For example, if
4833      this is an equality comparison of integers, we can try to exclusive-or
4834      (or subtract) the two operands and use a recursive call to try the
4835      comparison with zero.  Don't do any of these cases if branches are
4836      very cheap.  */
4837
4838   if (BRANCH_COST > 0
4839       && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
4840       && op1 != const0_rtx)
4841     {
4842       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
4843                           OPTAB_WIDEN);
4844
4845       if (tem == 0)
4846         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
4847                             OPTAB_WIDEN);
4848       if (tem != 0)
4849         tem = emit_store_flag (target, code, tem, const0_rtx,
4850                                mode, unsignedp, normalizep);
4851       if (tem == 0)
4852         delete_insns_since (last);
4853       return tem;
4854     }
4855
4856   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
4857      the constant zero.  Reject all other comparisons at this point.  Only
4858      do LE and GT if branches are expensive since they are expensive on
4859      2-operand machines.  */
4860
4861   if (BRANCH_COST == 0
4862       || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
4863       || (code != EQ && code != NE
4864           && (BRANCH_COST <= 1 || (code != LE && code != GT))))
4865     return 0;
4866
4867   /* See what we need to return.  We can only return a 1, -1, or the
4868      sign bit.  */
4869
4870   if (normalizep == 0)
4871     {
4872       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
4873         normalizep = STORE_FLAG_VALUE;
4874
4875       else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4876                && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
4877                    == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
4878         ;
4879       else
4880         return 0;
4881     }
4882
4883   /* Try to put the result of the comparison in the sign bit.  Assume we can't
4884      do the necessary operation below.  */
4885
4886   tem = 0;
4887
4888   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
4889      the sign bit set.  */
4890
4891   if (code == LE)
4892     {
4893       /* This is destructive, so SUBTARGET can't be OP0.  */
4894       if (rtx_equal_p (subtarget, op0))
4895         subtarget = 0;
4896
4897       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
4898                           OPTAB_WIDEN);
4899       if (tem)
4900         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
4901                             OPTAB_WIDEN);
4902     }
4903
4904   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
4905      number of bits in the mode of OP0, minus one.  */
4906
4907   if (code == GT)
4908     {
4909       if (rtx_equal_p (subtarget, op0))
4910         subtarget = 0;
4911
4912       tem = expand_shift (RSHIFT_EXPR, mode, op0,
4913                           size_int (GET_MODE_BITSIZE (mode) - 1),
4914                           subtarget, 0);
4915       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
4916                           OPTAB_WIDEN);
4917     }
4918
4919   if (code == EQ || code == NE)
4920     {
4921       /* For EQ or NE, one way to do the comparison is to apply an operation
4922          that converts the operand into a positive number if it is nonzero
4923          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
4924          for NE we negate.  This puts the result in the sign bit.  Then we
4925          normalize with a shift, if needed.
4926
4927          Two operations that can do the above actions are ABS and FFS, so try
4928          them.  If that doesn't work, and MODE is smaller than a full word,
4929          we can use zero-extension to the wider mode (an unsigned conversion)
4930          as the operation.  */
4931
4932       /* Note that ABS doesn't yield a positive number for INT_MIN, but
4933          that is compensated by the subsequent overflow when subtracting
4934          one / negating.  */
4935
4936       if (abs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
4937         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
4938       else if (ffs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
4939         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
4940       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
4941         {
4942           tem = convert_modes (word_mode, mode, op0, 1);
4943           mode = word_mode;
4944         }
4945
4946       if (tem != 0)
4947         {
4948           if (code == EQ)
4949             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
4950                                 0, OPTAB_WIDEN);
4951           else
4952             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
4953         }
4954
4955       /* If we couldn't do it that way, for NE we can "or" the two's complement
4956          of the value with itself.  For EQ, we take the one's complement of
4957          that "or", which is an extra insn, so we only handle EQ if branches
4958          are expensive.  */
4959
4960       if (tem == 0 && (code == NE || BRANCH_COST > 1))
4961         {
4962           if (rtx_equal_p (subtarget, op0))
4963             subtarget = 0;
4964
4965           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
4966           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
4967                               OPTAB_WIDEN);
4968
4969           if (tem && code == EQ)
4970             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
4971         }
4972     }
4973
4974   if (tem && normalizep)
4975     tem = expand_shift (RSHIFT_EXPR, mode, tem,
4976                         size_int (GET_MODE_BITSIZE (mode) - 1),
4977                         subtarget, normalizep == 1);
4978
4979   if (tem)
4980     {
4981       if (GET_MODE (tem) != target_mode)
4982         {
4983           convert_move (target, tem, 0);
4984           tem = target;
4985         }
4986       else if (!subtarget)
4987         {
4988           emit_move_insn (target, tem);
4989           tem = target;
4990         }
4991     }
4992   else
4993     delete_insns_since (last);
4994
4995   return tem;
4996 }
4997
4998 /* Like emit_store_flag, but always succeeds.  */
4999
5000 rtx
5001 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5002                        enum machine_mode mode, int unsignedp, int normalizep)
5003 {
5004   rtx tem, label;
5005
5006   /* First see if emit_store_flag can do the job.  */
5007   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5008   if (tem != 0)
5009     return tem;
5010
5011   if (normalizep == 0)
5012     normalizep = 1;
5013
5014   /* If this failed, we have to do this with set/compare/jump/set code.  */
5015
5016   if (!REG_P (target)
5017       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5018     target = gen_reg_rtx (GET_MODE (target));
5019
5020   emit_move_insn (target, const1_rtx);
5021   label = gen_label_rtx ();
5022   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5023                            NULL_RTX, label);
5024
5025   emit_move_insn (target, const0_rtx);
5026   emit_label (label);
5027
5028   return target;
5029 }
5030 \f
5031 /* Perform possibly multi-word comparison and conditional jump to LABEL
5032    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE
5033
5034    The algorithm is based on the code in expr.c:do_jump.
5035
5036    Note that this does not perform a general comparison.  Only variants
5037    generated within expmed.c are correctly handled, others abort (but could
5038    be handled if needed).  */
5039
5040 static void
5041 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5042                  rtx label)
5043 {
5044   /* If this mode is an integer too wide to compare properly,
5045      compare word by word.  Rely on cse to optimize constant cases.  */
5046
5047   if (GET_MODE_CLASS (mode) == MODE_INT
5048       && ! can_compare_p (op, mode, ccp_jump))
5049     {
5050       rtx label2 = gen_label_rtx ();
5051
5052       switch (op)
5053         {
5054         case LTU:
5055           do_jump_by_parts_greater_rtx (mode, 1, arg2, arg1, label2, label);
5056           break;
5057
5058         case LEU:
5059           do_jump_by_parts_greater_rtx (mode, 1, arg1, arg2, label, label2);
5060           break;
5061
5062         case LT:
5063           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label2, label);
5064           break;
5065
5066         case GT:
5067           do_jump_by_parts_greater_rtx (mode, 0, arg1, arg2, label2, label);
5068           break;
5069
5070         case GE:
5071           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label, label2);
5072           break;
5073
5074           /* do_jump_by_parts_equality_rtx compares with zero.  Luckily
5075              that's the only equality operations we do */
5076         case EQ:
5077           if (arg2 != const0_rtx || mode != GET_MODE(arg1))
5078             abort ();
5079           do_jump_by_parts_equality_rtx (arg1, label2, label);
5080           break;
5081
5082         case NE:
5083           if (arg2 != const0_rtx || mode != GET_MODE(arg1))
5084             abort ();
5085           do_jump_by_parts_equality_rtx (arg1, label, label2);
5086           break;
5087
5088         default:
5089           abort ();
5090         }
5091
5092       emit_label (label2);
5093     }
5094   else
5095     emit_cmp_and_jump_insns (arg1, arg2, op, NULL_RTX, mode, 0, label);
5096 }