gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 2, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING.  If not, write to the Free
  20 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  21 02111-1307, USA.  */
  22
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "coretypes.h"
  27 #include "tm.h"
  28 #include "toplev.h"
  29 #include "rtl.h"
  30 #include "tree.h"
  31 #include "tm_p.h"
  32 #include "flags.h"
  33 #include "insn-config.h"
  34 #include "expr.h"
  35 #include "optabs.h"
  36 #include "real.h"
  37 #include "recog.h"
  38 #include "langhooks.h"
  39
  40 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  41                                    unsigned HOST_WIDE_INT,
  42                                    unsigned HOST_WIDE_INT, rtx);
  43 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  44                                    unsigned HOST_WIDE_INT, rtx);
  45 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  46                                     unsigned HOST_WIDE_INT,
  47                                     unsigned HOST_WIDE_INT,
  48                                     unsigned HOST_WIDE_INT, rtx, int);
  49 static rtx mask_rtx (enum machine_mode, int, int, int);
  50 static rtx lshift_value (enum machine_mode, rtx, int, int);
  51 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  52                                     unsigned HOST_WIDE_INT, int);
  53 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  54 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  55 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  56
  57 /* Nonzero means divides or modulus operations are relatively cheap for
  58    powers of two, so don't use branches; emit the operation instead.
  59    Usually, this will mean that the MD file will emit non-branch
  60    sequences.  */
  61
  62 static bool sdiv_pow2_cheap[NUM_MACHINE_MODES];
  63 static bool smod_pow2_cheap[NUM_MACHINE_MODES];
  64
  65 #ifndef SLOW_UNALIGNED_ACCESS
  66 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  67 #endif
  68
  69 /* For compilers that support multiple targets with different word sizes,
  70    MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
  71    is the H8/300(H) compiler.  */
  72
  73 #ifndef MAX_BITS_PER_WORD
  74 #define MAX_BITS_PER_WORD BITS_PER_WORD
  75 #endif
  76
  77 /* Reduce conditional compilation elsewhere.  */
  78 #ifndef HAVE_insv
  79 #define HAVE_insv       0
  80 #define CODE_FOR_insv   CODE_FOR_nothing
  81 #define gen_insv(a,b,c,d) NULL_RTX
  82 #endif
  83 #ifndef HAVE_extv
  84 #define HAVE_extv       0
  85 #define CODE_FOR_extv   CODE_FOR_nothing
  86 #define gen_extv(a,b,c,d) NULL_RTX
  87 #endif
  88 #ifndef HAVE_extzv
  89 #define HAVE_extzv      0
  90 #define CODE_FOR_extzv  CODE_FOR_nothing
  91 #define gen_extzv(a,b,c,d) NULL_RTX
  92 #endif
  93
  94 /* Cost of various pieces of RTL.  Note that some of these are indexed by
  95    shift count and some by mode.  */
  96 static int zero_cost;
  97 static int add_cost[NUM_MACHINE_MODES];
  98 static int neg_cost[NUM_MACHINE_MODES];
  99 static int shift_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 100 static int shiftadd_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 101 static int shiftsub_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 102 static int mul_cost[NUM_MACHINE_MODES];
 103 static int div_cost[NUM_MACHINE_MODES];
 104 static int mul_widen_cost[NUM_MACHINE_MODES];
 105 static int mul_highpart_cost[NUM_MACHINE_MODES];
 106
 107 void
 108 init_expmed (void)
 109 {
 110   struct
 111   {
 112     struct rtx_def reg;         rtunion reg_fld[2];
 113     struct rtx_def plus;        rtunion plus_fld1;
 114     struct rtx_def neg;
 115     struct rtx_def udiv;        rtunion udiv_fld1;
 116     struct rtx_def mult;        rtunion mult_fld1;
 117     struct rtx_def div;         rtunion div_fld1;
 118     struct rtx_def mod;         rtunion mod_fld1;
 119     struct rtx_def zext;
 120     struct rtx_def wide_mult;   rtunion wide_mult_fld1;
 121     struct rtx_def wide_lshr;   rtunion wide_lshr_fld1;
 122     struct rtx_def wide_trunc;
 123     struct rtx_def shift;       rtunion shift_fld1;
 124     struct rtx_def shift_mult;  rtunion shift_mult_fld1;
 125     struct rtx_def shift_add;   rtunion shift_add_fld1;
 126     struct rtx_def shift_sub;   rtunion shift_sub_fld1;
 127   } all;
 128
 129   rtx pow2[MAX_BITS_PER_WORD];
 130   rtx cint[MAX_BITS_PER_WORD];
 131   int m, n;
 132   enum machine_mode mode, wider_mode;
 133
 134   zero_cost = rtx_cost (const0_rtx, 0);
 135
 136   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 137     {
 138       pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 139       cint[m] = GEN_INT (m);
 140     }
 141
 142   memset (&all, 0, sizeof all);
 143
 144   PUT_CODE (&all.reg, REG);
 145   REGNO (&all.reg) = 10000;
 146
 147   PUT_CODE (&all.plus, PLUS);
 148   XEXP (&all.plus, 0) = &all.reg;
 149   XEXP (&all.plus, 1) = &all.reg;
 150
 151   PUT_CODE (&all.neg, NEG);
 152   XEXP (&all.neg, 0) = &all.reg;
 153
 154   PUT_CODE (&all.udiv, UDIV);
 155   XEXP (&all.udiv, 0) = &all.reg;
 156   XEXP (&all.udiv, 1) = &all.reg;
 157
 158   PUT_CODE (&all.mult, MULT);
 159   XEXP (&all.mult, 0) = &all.reg;
 160   XEXP (&all.mult, 1) = &all.reg;
 161
 162   PUT_CODE (&all.div, DIV);
 163   XEXP (&all.div, 0) = &all.reg;
 164   XEXP (&all.div, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
 165
 166   PUT_CODE (&all.mod, MOD);
 167   XEXP (&all.mod, 0) = &all.reg;
 168   XEXP (&all.mod, 1) = XEXP (&all.div, 1);
 169
 170   PUT_CODE (&all.zext, ZERO_EXTEND);
 171   XEXP (&all.zext, 0) = &all.reg;
 172
 173   PUT_CODE (&all.wide_mult, MULT);
 174   XEXP (&all.wide_mult, 0) = &all.zext;
 175   XEXP (&all.wide_mult, 1) = &all.zext;
 176
 177   PUT_CODE (&all.wide_lshr, LSHIFTRT);
 178   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
 179
 180   PUT_CODE (&all.wide_trunc, TRUNCATE);
 181   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
 182
 183   PUT_CODE (&all.shift, ASHIFT);
 184   XEXP (&all.shift, 0) = &all.reg;
 185
 186   PUT_CODE (&all.shift_mult, MULT);
 187   XEXP (&all.shift_mult, 0) = &all.reg;
 188
 189   PUT_CODE (&all.shift_add, PLUS);
 190   XEXP (&all.shift_add, 0) = &all.shift_mult;
 191   XEXP (&all.shift_add, 1) = &all.reg;
 192
 193   PUT_CODE (&all.shift_sub, MINUS);
 194   XEXP (&all.shift_sub, 0) = &all.shift_mult;
 195   XEXP (&all.shift_sub, 1) = &all.reg;
 196
 197   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 198        mode != VOIDmode;
 199        mode = GET_MODE_WIDER_MODE (mode))
 200     {
 201       PUT_MODE (&all.reg, mode);
 202       PUT_MODE (&all.plus, mode);
 203       PUT_MODE (&all.neg, mode);
 204       PUT_MODE (&all.udiv, mode);
 205       PUT_MODE (&all.mult, mode);
 206       PUT_MODE (&all.div, mode);
 207       PUT_MODE (&all.mod, mode);
 208       PUT_MODE (&all.wide_trunc, mode);
 209       PUT_MODE (&all.shift, mode);
 210       PUT_MODE (&all.shift_mult, mode);
 211       PUT_MODE (&all.shift_add, mode);
 212       PUT_MODE (&all.shift_sub, mode);
 213
 214       add_cost[mode] = rtx_cost (&all.plus, SET);
 215       neg_cost[mode] = rtx_cost (&all.neg, SET);
 216       div_cost[mode] = rtx_cost (&all.udiv, SET);
 217       mul_cost[mode] = rtx_cost (&all.mult, SET);
 218
 219       sdiv_pow2_cheap[mode] = (rtx_cost (&all.div, SET) <= 2 * add_cost[mode]);
 220       smod_pow2_cheap[mode] = (rtx_cost (&all.mod, SET) <= 4 * add_cost[mode]);
 221
 222       wider_mode = GET_MODE_WIDER_MODE (mode);
 223       if (wider_mode != VOIDmode)
 224         {
 225           PUT_MODE (&all.zext, wider_mode);
 226           PUT_MODE (&all.wide_mult, wider_mode);
 227           PUT_MODE (&all.wide_lshr, wider_mode);
 228           XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
 229
 230           mul_widen_cost[wider_mode] = rtx_cost (&all.wide_mult, SET);
 231           mul_highpart_cost[mode] = rtx_cost (&all.wide_trunc, SET);
 232         }
 233
 234       shift_cost[mode][0] = 0;
 235       shiftadd_cost[mode][0] = shiftsub_cost[mode][0] = add_cost[mode];
 236
 237       n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
 238       for (m = 1; m < n; m++)
 239         {
 240           XEXP (&all.shift, 1) = cint[m];
 241           XEXP (&all.shift_mult, 1) = pow2[m];
 242
 243           shift_cost[mode][m] = rtx_cost (&all.shift, SET);
 244           shiftadd_cost[mode][m] = rtx_cost (&all.shift_add, SET);
 245           shiftsub_cost[mode][m] = rtx_cost (&all.shift_sub, SET);
 246         }
 247     }
 248 }
 249
 250 /* Return an rtx representing minus the value of X.
 251    MODE is the intended mode of the result,
 252    useful if X is a CONST_INT.  */
 253
 254 rtx
 255 negate_rtx (enum machine_mode mode, rtx x)
 256 {
 257   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 258
 259   if (result == 0)
 260     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 261
 262   return result;
 263 }
 264
 265 /* Report on the availability of insv/extv/extzv and the desired mode
 266    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
 267    is false; else the mode of the specified operand.  If OPNO is -1,
 268    all the caller cares about is whether the insn is available.  */
 269 enum machine_mode
 270 mode_for_extraction (enum extraction_pattern pattern, int opno)
 271 {
 272   const struct insn_data *data;
 273
 274   switch (pattern)
 275     {
 276     case EP_insv:
 277       if (HAVE_insv)
 278         {
 279           data = &insn_data[CODE_FOR_insv];
 280           break;
 281         }
 282       return MAX_MACHINE_MODE;
 283
 284     case EP_extv:
 285       if (HAVE_extv)
 286         {
 287           data = &insn_data[CODE_FOR_extv];
 288           break;
 289         }
 290       return MAX_MACHINE_MODE;
 291
 292     case EP_extzv:
 293       if (HAVE_extzv)
 294         {
 295           data = &insn_data[CODE_FOR_extzv];
 296           break;
 297         }
 298       return MAX_MACHINE_MODE;
 299
 300     default:
 301       abort ();
 302     }
 303
 304   if (opno == -1)
 305     return VOIDmode;
 306
 307   /* Everyone who uses this function used to follow it with
 308      if (result == VOIDmode) result = word_mode; */
 309   if (data->operand[opno].mode == VOIDmode)
 310     return word_mode;
 311   return data->operand[opno].mode;
 312 }
 313
 314 \f
 315 /* Generate code to store value from rtx VALUE
 316    into a bit-field within structure STR_RTX
 317    containing BITSIZE bits starting at bit BITNUM.
 318    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
 319    ALIGN is the alignment that STR_RTX is known to have.
 320    TOTAL_SIZE is the size of the structure in bytes, or -1 if varying.  */
 321
 322 /* ??? Note that there are two different ideas here for how
 323    to determine the size to count bits within, for a register.
 324    One is BITS_PER_WORD, and the other is the size of operand 3
 325    of the insv pattern.
 326
 327    If operand 3 of the insv pattern is VOIDmode, then we will use BITS_PER_WORD
 328    else, we use the mode of operand 3.  */
 329
 330 rtx
 331 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 332                  unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
 333                  rtx value)
 334 {
 335   unsigned int unit
 336     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
 337   unsigned HOST_WIDE_INT offset = bitnum / unit;
 338   unsigned HOST_WIDE_INT bitpos = bitnum % unit;
 339   rtx op0 = str_rtx;
 340   int byte_offset;
 341
 342   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
 343
 344   while (GET_CODE (op0) == SUBREG)
 345     {
 346       /* The following line once was done only if WORDS_BIG_ENDIAN,
 347          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 348          meaningful at a much higher level; when structures are copied
 349          between memory and regs, the higher-numbered regs
 350          always get higher addresses.  */
 351       offset += (SUBREG_BYTE (op0) / UNITS_PER_WORD);
 352       /* We used to adjust BITPOS here, but now we do the whole adjustment
 353          right after the loop.  */
 354       op0 = SUBREG_REG (op0);
 355     }
 356
 357   /* Use vec_set patterns for inserting parts of vectors whenever
 358      available.  */
 359   if (VECTOR_MODE_P (GET_MODE (op0))
 360       && !MEM_P (op0)
 361       && (vec_set_optab->handlers[GET_MODE (op0)].insn_code
 362           != CODE_FOR_nothing)
 363       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 364       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 365       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 366     {
 367       enum machine_mode outermode = GET_MODE (op0);
 368       enum machine_mode innermode = GET_MODE_INNER (outermode);
 369       int icode = (int) vec_set_optab->handlers[outermode].insn_code;
 370       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 371       rtx rtxpos = GEN_INT (pos);
 372       rtx src = value;
 373       rtx dest = op0;
 374       rtx pat, seq;
 375       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
 376       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
 377       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
 378
 379       start_sequence ();
 380
 381       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
 382         src = copy_to_mode_reg (mode1, src);
 383
 384       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
 385         rtxpos = copy_to_mode_reg (mode1, rtxpos);
 386
 387       /* We could handle this, but we should always be called with a pseudo
 388          for our targets and all insns should take them as outputs.  */
 389       if (! (*insn_data[icode].operand[0].predicate) (dest, mode0)
 390           || ! (*insn_data[icode].operand[1].predicate) (src, mode1)
 391           || ! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
 392         abort ();
 393       pat = GEN_FCN (icode) (dest, src, rtxpos);
 394       seq = get_insns ();
 395       end_sequence ();
 396       if (pat)
 397         {
 398           emit_insn (seq);
 399           emit_insn (pat);
 400           return dest;
 401         }
 402     }
 403
 404   if (flag_force_mem)
 405     {
 406       int old_generating_concat_p = generating_concat_p;
 407       generating_concat_p = 0;
 408       value = force_not_mem (value);
 409       generating_concat_p = old_generating_concat_p;
 410     }
 411
 412   /* If the target is a register, overwriting the entire object, or storing
 413      a full-word or multi-word field can be done with just a SUBREG.
 414
 415      If the target is memory, storing any naturally aligned field can be
 416      done with a simple store.  For targets that support fast unaligned
 417      memory, any naturally sized, unit aligned field can be done directly.  */
 418
 419   byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 420                 + (offset * UNITS_PER_WORD);
 421
 422   if (bitpos == 0
 423       && bitsize == GET_MODE_BITSIZE (fieldmode)
 424       && (!MEM_P (op0)
 425           ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 426              || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
 427              && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
 428           : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 429              || (offset * BITS_PER_UNIT % bitsize == 0
 430                  && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
 431     {
 432       if (GET_MODE (op0) != fieldmode)
 433         {
 434           if (GET_CODE (op0) == SUBREG)
 435             {
 436               if (GET_MODE (SUBREG_REG (op0)) == fieldmode
 437                   || GET_MODE_CLASS (fieldmode) == MODE_INT
 438                   || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT)
 439                 op0 = SUBREG_REG (op0);
 440               else
 441                 /* Else we've got some float mode source being extracted into
 442                    a different float mode destination -- this combination of
 443                    subregs results in Severe Tire Damage.  */
 444                 abort ();
 445             }
 446           if (REG_P (op0))
 447             op0 = gen_rtx_SUBREG (fieldmode, op0, byte_offset);
 448           else
 449             op0 = adjust_address (op0, fieldmode, offset);
 450         }
 451       emit_move_insn (op0, value);
 452       return value;
 453     }
 454
 455   /* Make sure we are playing with integral modes.  Pun with subregs
 456      if we aren't.  This must come after the entire register case above,
 457      since that case is valid for any mode.  The following cases are only
 458      valid for integral modes.  */
 459   {
 460     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 461     if (imode != GET_MODE (op0))
 462       {
 463         if (MEM_P (op0))
 464           op0 = adjust_address (op0, imode, 0);
 465         else if (imode != BLKmode)
 466           op0 = gen_lowpart (imode, op0);
 467         else
 468           abort ();
 469       }
 470   }
 471
 472   /* We may be accessing data outside the field, which means
 473      we can alias adjacent data.  */
 474   if (MEM_P (op0))
 475     {
 476       op0 = shallow_copy_rtx (op0);
 477       set_mem_alias_set (op0, 0);
 478       set_mem_expr (op0, 0);
 479     }
 480
 481   /* If OP0 is a register, BITPOS must count within a word.
 482      But as we have it, it counts within whatever size OP0 now has.
 483      On a bigendian machine, these are not the same, so convert.  */
 484   if (BYTES_BIG_ENDIAN
 485       && !MEM_P (op0)
 486       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 487     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 488
 489   /* Storing an lsb-aligned field in a register
 490      can be done with a movestrict instruction.  */
 491
 492   if (!MEM_P (op0)
 493       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 494       && bitsize == GET_MODE_BITSIZE (fieldmode)
 495       && (movstrict_optab->handlers[fieldmode].insn_code
 496           != CODE_FOR_nothing))
 497     {
 498       int icode = movstrict_optab->handlers[fieldmode].insn_code;
 499
 500       /* Get appropriate low part of the value being stored.  */
 501       if (GET_CODE (value) == CONST_INT || REG_P (value))
 502         value = gen_lowpart (fieldmode, value);
 503       else if (!(GET_CODE (value) == SYMBOL_REF
 504                  || GET_CODE (value) == LABEL_REF
 505                  || GET_CODE (value) == CONST))
 506         value = convert_to_mode (fieldmode, value, 0);
 507
 508       if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
 509         value = copy_to_mode_reg (fieldmode, value);
 510
 511       if (GET_CODE (op0) == SUBREG)
 512         {
 513           if (GET_MODE (SUBREG_REG (op0)) == fieldmode
 514               || GET_MODE_CLASS (fieldmode) == MODE_INT
 515               || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT)
 516             op0 = SUBREG_REG (op0);
 517           else
 518             /* Else we've got some float mode source being extracted into
 519                a different float mode destination -- this combination of
 520                subregs results in Severe Tire Damage.  */
 521             abort ();
 522         }
 523
 524       emit_insn (GEN_FCN (icode)
 525                  (gen_rtx_SUBREG (fieldmode, op0,
 526                                   (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 527                                   + (offset * UNITS_PER_WORD)),
 528                                   value));
 529
 530       return value;
 531     }
 532
 533   /* Handle fields bigger than a word.  */
 534
 535   if (bitsize > BITS_PER_WORD)
 536     {
 537       /* Here we transfer the words of the field
 538          in the order least significant first.
 539          This is because the most significant word is the one which may
 540          be less than full.
 541          However, only do that if the value is not BLKmode.  */
 542
 543       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 544       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 545       unsigned int i;
 546
 547       /* This is the mode we must force value to, so that there will be enough
 548          subwords to extract.  Note that fieldmode will often (always?) be
 549          VOIDmode, because that is what store_field uses to indicate that this
 550          is a bit field, but passing VOIDmode to operand_subword_force will
 551          result in an abort.  */
 552       fieldmode = GET_MODE (value);
 553       if (fieldmode == VOIDmode)
 554         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 555
 556       for (i = 0; i < nwords; i++)
 557         {
 558           /* If I is 0, use the low-order word in both field and target;
 559              if I is 1, use the next to lowest word; and so on.  */
 560           unsigned int wordnum = (backwards ? nwords - i - 1 : i);
 561           unsigned int bit_offset = (backwards
 562                                      ? MAX ((int) bitsize - ((int) i + 1)
 563                                             * BITS_PER_WORD,
 564                                             0)
 565                                      : (int) i * BITS_PER_WORD);
 566
 567           store_bit_field (op0, MIN (BITS_PER_WORD,
 568                                      bitsize - i * BITS_PER_WORD),
 569                            bitnum + bit_offset, word_mode,
 570                            operand_subword_force (value, wordnum, fieldmode));
 571         }
 572       return value;
 573     }
 574
 575   /* From here on we can assume that the field to be stored in is
 576      a full-word (whatever type that is), since it is shorter than a word.  */
 577
 578   /* OFFSET is the number of words or bytes (UNIT says which)
 579      from STR_RTX to the first word or byte containing part of the field.  */
 580
 581   if (!MEM_P (op0))
 582     {
 583       if (offset != 0
 584           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 585         {
 586           if (!REG_P (op0))
 587             {
 588               /* Since this is a destination (lvalue), we can't copy it to a
 589                  pseudo.  We can trivially remove a SUBREG that does not
 590                  change the size of the operand.  Such a SUBREG may have been
 591                  added above.  Otherwise, abort.  */
 592               if (GET_CODE (op0) == SUBREG
 593                   && (GET_MODE_SIZE (GET_MODE (op0))
 594                       == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))))
 595                 op0 = SUBREG_REG (op0);
 596               else
 597                 abort ();
 598             }
 599           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 600                                 op0, (offset * UNITS_PER_WORD));
 601         }
 602       offset = 0;
 603     }
 604
 605   /* If VALUE is a floating-point mode, access it as an integer of the
 606      corresponding size.  This can occur on a machine with 64 bit registers
 607      that uses SFmode for float.  This can also occur for unaligned float
 608      structure fields.  */
 609   if (GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 610       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 611     value = gen_lowpart ((GET_MODE (value) == VOIDmode
 612                           ? word_mode : int_mode_for_mode (GET_MODE (value))),
 613                          value);
 614
 615   /* Now OFFSET is nonzero only if OP0 is memory
 616      and is therefore always measured in bytes.  */
 617
 618   if (HAVE_insv
 619       && GET_MODE (value) != BLKmode
 620       && !(bitsize == 1 && GET_CODE (value) == CONST_INT)
 621       /* Ensure insv's size is wide enough for this field.  */
 622       && (GET_MODE_BITSIZE (op_mode) >= bitsize)
 623       && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
 624             && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))))
 625     {
 626       int xbitpos = bitpos;
 627       rtx value1;
 628       rtx xop0 = op0;
 629       rtx last = get_last_insn ();
 630       rtx pat;
 631       enum machine_mode maxmode = mode_for_extraction (EP_insv, 3);
 632       int save_volatile_ok = volatile_ok;
 633
 634       volatile_ok = 1;
 635
 636       /* If this machine's insv can only insert into a register, copy OP0
 637          into a register and save it back later.  */
 638       /* This used to check flag_force_mem, but that was a serious
 639          de-optimization now that flag_force_mem is enabled by -O2.  */
 640       if (MEM_P (op0)
 641           && ! ((*insn_data[(int) CODE_FOR_insv].operand[0].predicate)
 642                 (op0, VOIDmode)))
 643         {
 644           rtx tempreg;
 645           enum machine_mode bestmode;
 646
 647           /* Get the mode to use for inserting into this field.  If OP0 is
 648              BLKmode, get the smallest mode consistent with the alignment. If
 649              OP0 is a non-BLKmode object that is no wider than MAXMODE, use its
 650              mode. Otherwise, use the smallest mode containing the field.  */
 651
 652           if (GET_MODE (op0) == BLKmode
 653               || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (maxmode))
 654             bestmode
 655               = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0), maxmode,
 656                                MEM_VOLATILE_P (op0));
 657           else
 658             bestmode = GET_MODE (op0);
 659
 660           if (bestmode == VOIDmode
 661               || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
 662                   && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
 663             goto insv_loses;
 664
 665           /* Adjust address to point to the containing unit of that mode.
 666              Compute offset as multiple of this unit, counting in bytes.  */
 667           unit = GET_MODE_BITSIZE (bestmode);
 668           offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 669           bitpos = bitnum % unit;
 670           op0 = adjust_address (op0, bestmode,  offset);
 671
 672           /* Fetch that unit, store the bitfield in it, then store
 673              the unit.  */
 674           tempreg = copy_to_reg (op0);
 675           store_bit_field (tempreg, bitsize, bitpos, fieldmode, value);
 676           emit_move_insn (op0, tempreg);
 677           return value;
 678         }
 679       volatile_ok = save_volatile_ok;
 680
 681       /* Add OFFSET into OP0's address.  */
 682       if (MEM_P (xop0))
 683         xop0 = adjust_address (xop0, byte_mode, offset);
 684
 685       /* If xop0 is a register, we need it in MAXMODE
 686          to make it acceptable to the format of insv.  */
 687       if (GET_CODE (xop0) == SUBREG)
 688         /* We can't just change the mode, because this might clobber op0,
 689            and we will need the original value of op0 if insv fails.  */
 690         xop0 = gen_rtx_SUBREG (maxmode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 691       if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
 692         xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
 693
 694       /* On big-endian machines, we count bits from the most significant.
 695          If the bit field insn does not, we must invert.  */
 696
 697       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 698         xbitpos = unit - bitsize - xbitpos;
 699
 700       /* We have been counting XBITPOS within UNIT.
 701          Count instead within the size of the register.  */
 702       if (BITS_BIG_ENDIAN && !MEM_P (xop0))
 703         xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
 704
 705       unit = GET_MODE_BITSIZE (maxmode);
 706
 707       /* Convert VALUE to maxmode (which insv insn wants) in VALUE1.  */
 708       value1 = value;
 709       if (GET_MODE (value) != maxmode)
 710         {
 711           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 712             {
 713               /* Optimization: Don't bother really extending VALUE
 714                  if it has all the bits we will actually use.  However,
 715                  if we must narrow it, be sure we do it correctly.  */
 716
 717               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (maxmode))
 718                 {
 719                   rtx tmp;
 720
 721                   tmp = simplify_subreg (maxmode, value1, GET_MODE (value), 0);
 722                   if (! tmp)
 723                     tmp = simplify_gen_subreg (maxmode,
 724                                                force_reg (GET_MODE (value),
 725                                                           value1),
 726                                                GET_MODE (value), 0);
 727                   value1 = tmp;
 728                 }
 729               else
 730                 value1 = gen_lowpart (maxmode, value1);
 731             }
 732           else if (GET_CODE (value) == CONST_INT)
 733             value1 = gen_int_mode (INTVAL (value), maxmode);
 734           else if (!CONSTANT_P (value))
 735             /* Parse phase is supposed to make VALUE's data type
 736                match that of the component reference, which is a type
 737                at least as wide as the field; so VALUE should have
 738                a mode that corresponds to that type.  */
 739             abort ();
 740         }
 741
 742       /* If this machine's insv insists on a register,
 743          get VALUE1 into a register.  */
 744       if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
 745              (value1, maxmode)))
 746         value1 = force_reg (maxmode, value1);
 747
 748       pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
 749       if (pat)
 750         emit_insn (pat);
 751       else
 752         {
 753           delete_insns_since (last);
 754           store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 755         }
 756     }
 757   else
 758     insv_loses:
 759     /* Insv is not available; store using shifts and boolean ops.  */
 760     store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 761   return value;
 762 }
 763 \f
 764 /* Use shifts and boolean operations to store VALUE
 765    into a bit field of width BITSIZE
 766    in a memory location specified by OP0 except offset by OFFSET bytes.
 767      (OFFSET must be 0 if OP0 is a register.)
 768    The field starts at position BITPOS within the byte.
 769     (If OP0 is a register, it may be a full word or a narrower mode,
 770      but BITPOS still counts within a full word,
 771      which is significant on bigendian machines.)  */
 772
 773 static void
 774 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
 775                        unsigned HOST_WIDE_INT bitsize,
 776                        unsigned HOST_WIDE_INT bitpos, rtx value)
 777 {
 778   enum machine_mode mode;
 779   unsigned int total_bits = BITS_PER_WORD;
 780   rtx subtarget, temp;
 781   int all_zero = 0;
 782   int all_one = 0;
 783
 784   /* There is a case not handled here:
 785      a structure with a known alignment of just a halfword
 786      and a field split across two aligned halfwords within the structure.
 787      Or likewise a structure with a known alignment of just a byte
 788      and a field split across two bytes.
 789      Such cases are not supposed to be able to occur.  */
 790
 791   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 792     {
 793       if (offset != 0)
 794         abort ();
 795       /* Special treatment for a bit field split across two registers.  */
 796       if (bitsize + bitpos > BITS_PER_WORD)
 797         {
 798           store_split_bit_field (op0, bitsize, bitpos, value);
 799           return;
 800         }
 801     }
 802   else
 803     {
 804       /* Get the proper mode to use for this field.  We want a mode that
 805          includes the entire field.  If such a mode would be larger than
 806          a word, we won't be doing the extraction the normal way.
 807          We don't want a mode bigger than the destination.  */
 808
 809       mode = GET_MODE (op0);
 810       if (GET_MODE_BITSIZE (mode) == 0
 811           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 812         mode = word_mode;
 813       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 814                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 815
 816       if (mode == VOIDmode)
 817         {
 818           /* The only way this should occur is if the field spans word
 819              boundaries.  */
 820           store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
 821                                  value);
 822           return;
 823         }
 824
 825       total_bits = GET_MODE_BITSIZE (mode);
 826
 827       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 828          be in the range 0 to total_bits-1, and put any excess bytes in
 829          OFFSET.  */
 830       if (bitpos >= total_bits)
 831         {
 832           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 833           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 834                      * BITS_PER_UNIT);
 835         }
 836
 837       /* Get ref to an aligned byte, halfword, or word containing the field.
 838          Adjust BITPOS to be position within a word,
 839          and OFFSET to be the offset of that word.
 840          Then alter OP0 to refer to that word.  */
 841       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 842       offset -= (offset % (total_bits / BITS_PER_UNIT));
 843       op0 = adjust_address (op0, mode, offset);
 844     }
 845
 846   mode = GET_MODE (op0);
 847
 848   /* Now MODE is either some integral mode for a MEM as OP0,
 849      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 850      The bit field is contained entirely within OP0.
 851      BITPOS is the starting bit number within OP0.
 852      (OP0's mode may actually be narrower than MODE.)  */
 853
 854   if (BYTES_BIG_ENDIAN)
 855       /* BITPOS is the distance between our msb
 856          and that of the containing datum.
 857          Convert it to the distance from the lsb.  */
 858       bitpos = total_bits - bitsize - bitpos;
 859
 860   /* Now BITPOS is always the distance between our lsb
 861      and that of OP0.  */
 862
 863   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 864      we must first convert its mode to MODE.  */
 865
 866   if (GET_CODE (value) == CONST_INT)
 867     {
 868       HOST_WIDE_INT v = INTVAL (value);
 869
 870       if (bitsize < HOST_BITS_PER_WIDE_INT)
 871         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 872
 873       if (v == 0)
 874         all_zero = 1;
 875       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 876                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 877                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 878         all_one = 1;
 879
 880       value = lshift_value (mode, value, bitpos, bitsize);
 881     }
 882   else
 883     {
 884       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 885                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 886
 887       if (GET_MODE (value) != mode)
 888         {
 889           if ((REG_P (value) || GET_CODE (value) == SUBREG)
 890               && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
 891             value = gen_lowpart (mode, value);
 892           else
 893             value = convert_to_mode (mode, value, 1);
 894         }
 895
 896       if (must_and)
 897         value = expand_binop (mode, and_optab, value,
 898                               mask_rtx (mode, 0, bitsize, 0),
 899                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
 900       if (bitpos > 0)
 901         value = expand_shift (LSHIFT_EXPR, mode, value,
 902                               build_int_2 (bitpos, 0), NULL_RTX, 1);
 903     }
 904
 905   /* Now clear the chosen bits in OP0,
 906      except that if VALUE is -1 we need not bother.  */
 907
 908   subtarget = (REG_P (op0) || ! flag_force_mem) ? op0 : 0;
 909
 910   if (! all_one)
 911     {
 912       temp = expand_binop (mode, and_optab, op0,
 913                            mask_rtx (mode, bitpos, bitsize, 1),
 914                            subtarget, 1, OPTAB_LIB_WIDEN);
 915       subtarget = temp;
 916     }
 917   else
 918     temp = op0;
 919
 920   /* Now logical-or VALUE into OP0, unless it is zero.  */
 921
 922   if (! all_zero)
 923     temp = expand_binop (mode, ior_optab, temp, value,
 924                          subtarget, 1, OPTAB_LIB_WIDEN);
 925   if (op0 != temp)
 926     emit_move_insn (op0, temp);
 927 }
 928 \f
 929 /* Store a bit field that is split across multiple accessible memory objects.
 930
 931    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
 932    BITSIZE is the field width; BITPOS the position of its first bit
 933    (within the word).
 934    VALUE is the value to store.
 935
 936    This does not yet handle fields wider than BITS_PER_WORD.  */
 937
 938 static void
 939 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
 940                        unsigned HOST_WIDE_INT bitpos, rtx value)
 941 {
 942   unsigned int unit;
 943   unsigned int bitsdone = 0;
 944
 945   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
 946      much at a time.  */
 947   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 948     unit = BITS_PER_WORD;
 949   else
 950     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
 951
 952   /* If VALUE is a constant other than a CONST_INT, get it into a register in
 953      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
 954      that VALUE might be a floating-point constant.  */
 955   if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
 956     {
 957       rtx word = gen_lowpart_common (word_mode, value);
 958
 959       if (word && (value != word))
 960         value = word;
 961       else
 962         value = gen_lowpart_common (word_mode,
 963                                     force_reg (GET_MODE (value) != VOIDmode
 964                                                ? GET_MODE (value)
 965                                                : word_mode, value));
 966     }
 967
 968   while (bitsdone < bitsize)
 969     {
 970       unsigned HOST_WIDE_INT thissize;
 971       rtx part, word;
 972       unsigned HOST_WIDE_INT thispos;
 973       unsigned HOST_WIDE_INT offset;
 974
 975       offset = (bitpos + bitsdone) / unit;
 976       thispos = (bitpos + bitsdone) % unit;
 977
 978       /* THISSIZE must not overrun a word boundary.  Otherwise,
 979          store_fixed_bit_field will call us again, and we will mutually
 980          recurse forever.  */
 981       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
 982       thissize = MIN (thissize, unit - thispos);
 983
 984       if (BYTES_BIG_ENDIAN)
 985         {
 986           int total_bits;
 987
 988           /* We must do an endian conversion exactly the same way as it is
 989              done in extract_bit_field, so that the two calls to
 990              extract_fixed_bit_field will have comparable arguments.  */
 991           if (!MEM_P (value) || GET_MODE (value) == BLKmode)
 992             total_bits = BITS_PER_WORD;
 993           else
 994             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
 995
 996           /* Fetch successively less significant portions.  */
 997           if (GET_CODE (value) == CONST_INT)
 998             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 999                              >> (bitsize - bitsdone - thissize))
1000                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1001           else
1002             /* The args are chosen so that the last part includes the
1003                lsb.  Give extract_bit_field the value it needs (with
1004                endianness compensation) to fetch the piece we want.  */
1005             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1006                                             total_bits - bitsize + bitsdone,
1007                                             NULL_RTX, 1);
1008         }
1009       else
1010         {
1011           /* Fetch successively more significant portions.  */
1012           if (GET_CODE (value) == CONST_INT)
1013             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1014                              >> bitsdone)
1015                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1016           else
1017             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1018                                             bitsdone, NULL_RTX, 1);
1019         }
1020
1021       /* If OP0 is a register, then handle OFFSET here.
1022
1023          When handling multiword bitfields, extract_bit_field may pass
1024          down a word_mode SUBREG of a larger REG for a bitfield that actually
1025          crosses a word boundary.  Thus, for a SUBREG, we must find
1026          the current word starting from the base register.  */
1027       if (GET_CODE (op0) == SUBREG)
1028         {
1029           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1030           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1031                                         GET_MODE (SUBREG_REG (op0)));
1032           offset = 0;
1033         }
1034       else if (REG_P (op0))
1035         {
1036           word = operand_subword_force (op0, offset, GET_MODE (op0));
1037           offset = 0;
1038         }
1039       else
1040         word = op0;
1041
1042       /* OFFSET is in UNITs, and UNIT is in bits.
1043          store_fixed_bit_field wants offset in bytes.  */
1044       store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1045                              thispos, part);
1046       bitsdone += thissize;
1047     }
1048 }
1049 \f
1050 /* Generate code to extract a byte-field from STR_RTX
1051    containing BITSIZE bits, starting at BITNUM,
1052    and put it in TARGET if possible (if TARGET is nonzero).
1053    Regardless of TARGET, we return the rtx for where the value is placed.
1054
1055    STR_RTX is the structure containing the byte (a REG or MEM).
1056    UNSIGNEDP is nonzero if this is an unsigned bit field.
1057    MODE is the natural mode of the field value once extracted.
1058    TMODE is the mode the caller would like the value to have;
1059    but the value may be returned with type MODE instead.
1060
1061    TOTAL_SIZE is the size in bytes of the containing structure,
1062    or -1 if varying.
1063
1064    If a TARGET is specified and we can store in it at no extra cost,
1065    we do so, and return TARGET.
1066    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1067    if they are equally easy.  */
1068
1069 rtx
1070 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1071                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1072                    enum machine_mode mode, enum machine_mode tmode)
1073 {
1074   unsigned int unit
1075     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1076   unsigned HOST_WIDE_INT offset = bitnum / unit;
1077   unsigned HOST_WIDE_INT bitpos = bitnum % unit;
1078   rtx op0 = str_rtx;
1079   rtx spec_target = target;
1080   rtx spec_target_subreg = 0;
1081   enum machine_mode int_mode;
1082   enum machine_mode extv_mode = mode_for_extraction (EP_extv, 0);
1083   enum machine_mode extzv_mode = mode_for_extraction (EP_extzv, 0);
1084   enum machine_mode mode1;
1085   int byte_offset;
1086
1087   if (tmode == VOIDmode)
1088     tmode = mode;
1089
1090   while (GET_CODE (op0) == SUBREG)
1091     {
1092       bitpos += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1093       if (bitpos > unit)
1094         {
1095           offset += (bitpos / unit);
1096           bitpos %= unit;
1097         }
1098       op0 = SUBREG_REG (op0);
1099     }
1100
1101   if (REG_P (op0)
1102       && mode == GET_MODE (op0)
1103       && bitnum == 0
1104       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1105     {
1106       /* We're trying to extract a full register from itself.  */
1107       return op0;
1108     }
1109
1110   /* Use vec_extract patterns for extracting parts of vectors whenever
1111      available.  */
1112   if (VECTOR_MODE_P (GET_MODE (op0))
1113       && !MEM_P (op0)
1114       && (vec_extract_optab->handlers[GET_MODE (op0)].insn_code
1115           != CODE_FOR_nothing)
1116       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1117           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1118     {
1119       enum machine_mode outermode = GET_MODE (op0);
1120       enum machine_mode innermode = GET_MODE_INNER (outermode);
1121       int icode = (int) vec_extract_optab->handlers[outermode].insn_code;
1122       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1123       rtx rtxpos = GEN_INT (pos);
1124       rtx src = op0;
1125       rtx dest = NULL, pat, seq;
1126       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
1127       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
1128       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
1129
1130       if (innermode == tmode || innermode == mode)
1131         dest = target;
1132
1133       if (!dest)
1134         dest = gen_reg_rtx (innermode);
1135
1136       start_sequence ();
1137
1138       if (! (*insn_data[icode].operand[0].predicate) (dest, mode0))
1139         dest = copy_to_mode_reg (mode0, dest);
1140
1141       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
1142         src = copy_to_mode_reg (mode1, src);
1143
1144       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1145         rtxpos = copy_to_mode_reg (mode1, rtxpos);
1146
1147       /* We could handle this, but we should always be called with a pseudo
1148          for our targets and all insns should take them as outputs.  */
1149       if (! (*insn_data[icode].operand[0].predicate) (dest, mode0)
1150           || ! (*insn_data[icode].operand[1].predicate) (src, mode1)
1151           || ! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1152         abort ();
1153
1154       pat = GEN_FCN (icode) (dest, src, rtxpos);
1155       seq = get_insns ();
1156       end_sequence ();
1157       if (pat)
1158         {
1159           emit_insn (seq);
1160           emit_insn (pat);
1161           return dest;
1162         }
1163     }
1164
1165   /* Make sure we are playing with integral modes.  Pun with subregs
1166      if we aren't.  */
1167   {
1168     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1169     if (imode != GET_MODE (op0))
1170       {
1171         if (MEM_P (op0))
1172           op0 = adjust_address (op0, imode, 0);
1173         else if (imode != BLKmode)
1174           op0 = gen_lowpart (imode, op0);
1175         else
1176           abort ();
1177       }
1178   }
1179
1180   /* We may be accessing data outside the field, which means
1181      we can alias adjacent data.  */
1182   if (MEM_P (op0))
1183     {
1184       op0 = shallow_copy_rtx (op0);
1185       set_mem_alias_set (op0, 0);
1186       set_mem_expr (op0, 0);
1187     }
1188
1189   /* Extraction of a full-word or multi-word value from a structure
1190      in a register or aligned memory can be done with just a SUBREG.
1191      A subword value in the least significant part of a register
1192      can also be extracted with a SUBREG.  For this, we need the
1193      byte offset of the value in op0.  */
1194
1195   byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1196
1197   /* If OP0 is a register, BITPOS must count within a word.
1198      But as we have it, it counts within whatever size OP0 now has.
1199      On a bigendian machine, these are not the same, so convert.  */
1200   if (BYTES_BIG_ENDIAN
1201       && !MEM_P (op0)
1202       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1203     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1204
1205   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1206      If that's wrong, the solution is to test for it and set TARGET to 0
1207      if needed.  */
1208
1209   /* Only scalar integer modes can be converted via subregs.  There is an
1210      additional problem for FP modes here in that they can have a precision
1211      which is different from the size.  mode_for_size uses precision, but
1212      we want a mode based on the size, so we must avoid calling it for FP
1213      modes.  */
1214   mode1  = (SCALAR_INT_MODE_P (tmode)
1215             ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1216             : mode);
1217
1218   if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1219         && bitpos % BITS_PER_WORD == 0)
1220        || (mode1 != BLKmode
1221            /* ??? The big endian test here is wrong.  This is correct
1222               if the value is in a register, and if mode_for_size is not
1223               the same mode as op0.  This causes us to get unnecessarily
1224               inefficient code from the Thumb port when -mbig-endian.  */
1225            && (BYTES_BIG_ENDIAN
1226                ? bitpos + bitsize == BITS_PER_WORD
1227                : bitpos == 0)))
1228       && ((!MEM_P (op0)
1229            && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
1230                                      GET_MODE_BITSIZE (GET_MODE (op0)))
1231            && GET_MODE_SIZE (mode1) != 0
1232            && byte_offset % GET_MODE_SIZE (mode1) == 0)
1233           || (MEM_P (op0)
1234               && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1235                   || (offset * BITS_PER_UNIT % bitsize == 0
1236                       && MEM_ALIGN (op0) % bitsize == 0)))))
1237     {
1238       if (mode1 != GET_MODE (op0))
1239         {
1240           if (GET_CODE (op0) == SUBREG)
1241             {
1242               if (GET_MODE (SUBREG_REG (op0)) == mode1
1243                   || GET_MODE_CLASS (mode1) == MODE_INT
1244                   || GET_MODE_CLASS (mode1) == MODE_PARTIAL_INT)
1245                 op0 = SUBREG_REG (op0);
1246               else
1247                 /* Else we've got some float mode source being extracted into
1248                    a different float mode destination -- this combination of
1249                    subregs results in Severe Tire Damage.  */
1250                 goto no_subreg_mode_swap;
1251             }
1252           if (REG_P (op0))
1253             op0 = gen_rtx_SUBREG (mode1, op0, byte_offset);
1254           else
1255             op0 = adjust_address (op0, mode1, offset);
1256         }
1257       if (mode1 != mode)
1258         return convert_to_mode (tmode, op0, unsignedp);
1259       return op0;
1260     }
1261  no_subreg_mode_swap:
1262
1263   /* Handle fields bigger than a word.  */
1264
1265   if (bitsize > BITS_PER_WORD)
1266     {
1267       /* Here we transfer the words of the field
1268          in the order least significant first.
1269          This is because the most significant word is the one which may
1270          be less than full.  */
1271
1272       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1273       unsigned int i;
1274
1275       if (target == 0 || !REG_P (target))
1276         target = gen_reg_rtx (mode);
1277
1278       /* Indicate for flow that the entire target reg is being set.  */
1279       emit_insn (gen_rtx_CLOBBER (VOIDmode, target));
1280
1281       for (i = 0; i < nwords; i++)
1282         {
1283           /* If I is 0, use the low-order word in both field and target;
1284              if I is 1, use the next to lowest word; and so on.  */
1285           /* Word number in TARGET to use.  */
1286           unsigned int wordnum
1287             = (WORDS_BIG_ENDIAN
1288                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1289                : i);
1290           /* Offset from start of field in OP0.  */
1291           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1292                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1293                                                 * (int) BITS_PER_WORD))
1294                                      : (int) i * BITS_PER_WORD);
1295           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1296           rtx result_part
1297             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1298                                            bitsize - i * BITS_PER_WORD),
1299                                  bitnum + bit_offset, 1, target_part, mode,
1300                                  word_mode);
1301
1302           if (target_part == 0)
1303             abort ();
1304
1305           if (result_part != target_part)
1306             emit_move_insn (target_part, result_part);
1307         }
1308
1309       if (unsignedp)
1310         {
1311           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1312              need to be zero'd out.  */
1313           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1314             {
1315               unsigned int i, total_words;
1316
1317               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1318               for (i = nwords; i < total_words; i++)
1319                 emit_move_insn
1320                   (operand_subword (target,
1321                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1322                                     1, VOIDmode),
1323                    const0_rtx);
1324             }
1325           return target;
1326         }
1327
1328       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1329       target = expand_shift (LSHIFT_EXPR, mode, target,
1330                              build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1331                              NULL_RTX, 0);
1332       return expand_shift (RSHIFT_EXPR, mode, target,
1333                            build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1334                            NULL_RTX, 0);
1335     }
1336
1337   /* From here on we know the desired field is smaller than a word.  */
1338
1339   /* Check if there is a correspondingly-sized integer field, so we can
1340      safely extract it as one size of integer, if necessary; then
1341      truncate or extend to the size that is wanted; then use SUBREGs or
1342      convert_to_mode to get one of the modes we really wanted.  */
1343
1344   int_mode = int_mode_for_mode (tmode);
1345   if (int_mode == BLKmode)
1346     int_mode = int_mode_for_mode (mode);
1347   if (int_mode == BLKmode)
1348     abort ();    /* Should probably push op0 out to memory and then
1349                     do a load.  */
1350
1351   /* OFFSET is the number of words or bytes (UNIT says which)
1352      from STR_RTX to the first word or byte containing part of the field.  */
1353
1354   if (!MEM_P (op0))
1355     {
1356       if (offset != 0
1357           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1358         {
1359           if (!REG_P (op0))
1360             op0 = copy_to_reg (op0);
1361           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1362                                 op0, (offset * UNITS_PER_WORD));
1363         }
1364       offset = 0;
1365     }
1366
1367   /* Now OFFSET is nonzero only for memory operands.  */
1368
1369   if (unsignedp)
1370     {
1371       if (HAVE_extzv
1372           && (GET_MODE_BITSIZE (extzv_mode) >= bitsize)
1373           && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1374                 && (bitsize + bitpos > GET_MODE_BITSIZE (extzv_mode))))
1375         {
1376           unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1377           rtx bitsize_rtx, bitpos_rtx;
1378           rtx last = get_last_insn ();
1379           rtx xop0 = op0;
1380           rtx xtarget = target;
1381           rtx xspec_target = spec_target;
1382           rtx xspec_target_subreg = spec_target_subreg;
1383           rtx pat;
1384           enum machine_mode maxmode = mode_for_extraction (EP_extzv, 0);
1385
1386           if (MEM_P (xop0))
1387             {
1388               int save_volatile_ok = volatile_ok;
1389               volatile_ok = 1;
1390
1391               /* Is the memory operand acceptable?  */
1392               if (! ((*insn_data[(int) CODE_FOR_extzv].operand[1].predicate)
1393                      (xop0, GET_MODE (xop0))))
1394                 {
1395                   /* No, load into a reg and extract from there.  */
1396                   enum machine_mode bestmode;
1397
1398                   /* Get the mode to use for inserting into this field.  If
1399                      OP0 is BLKmode, get the smallest mode consistent with the
1400                      alignment. If OP0 is a non-BLKmode object that is no
1401                      wider than MAXMODE, use its mode. Otherwise, use the
1402                      smallest mode containing the field.  */
1403
1404                   if (GET_MODE (xop0) == BLKmode
1405                       || (GET_MODE_SIZE (GET_MODE (op0))
1406                           > GET_MODE_SIZE (maxmode)))
1407                     bestmode = get_best_mode (bitsize, bitnum,
1408                                               MEM_ALIGN (xop0), maxmode,
1409                                               MEM_VOLATILE_P (xop0));
1410                   else
1411                     bestmode = GET_MODE (xop0);
1412
1413                   if (bestmode == VOIDmode
1414                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1415                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1416                     goto extzv_loses;
1417
1418                   /* Compute offset as multiple of this unit,
1419                      counting in bytes.  */
1420                   unit = GET_MODE_BITSIZE (bestmode);
1421                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1422                   xbitpos = bitnum % unit;
1423                   xop0 = adjust_address (xop0, bestmode, xoffset);
1424
1425                   /* Fetch it to a register in that size.  */
1426                   xop0 = force_reg (bestmode, xop0);
1427
1428                   /* XBITPOS counts within UNIT, which is what is expected.  */
1429                 }
1430               else
1431                 /* Get ref to first byte containing part of the field.  */
1432                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1433
1434               volatile_ok = save_volatile_ok;
1435             }
1436
1437           /* If op0 is a register, we need it in MAXMODE (which is usually
1438              SImode). to make it acceptable to the format of extzv.  */
1439           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1440             goto extzv_loses;
1441           if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1442             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1443
1444           /* On big-endian machines, we count bits from the most significant.
1445              If the bit field insn does not, we must invert.  */
1446           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1447             xbitpos = unit - bitsize - xbitpos;
1448
1449           /* Now convert from counting within UNIT to counting in MAXMODE.  */
1450           if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1451             xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
1452
1453           unit = GET_MODE_BITSIZE (maxmode);
1454
1455           if (xtarget == 0
1456               || (flag_force_mem && MEM_P (xtarget)))
1457             xtarget = xspec_target = gen_reg_rtx (tmode);
1458
1459           if (GET_MODE (xtarget) != maxmode)
1460             {
1461               if (REG_P (xtarget))
1462                 {
1463                   int wider = (GET_MODE_SIZE (maxmode)
1464                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1465                   xtarget = gen_lowpart (maxmode, xtarget);
1466                   if (wider)
1467                     xspec_target_subreg = xtarget;
1468                 }
1469               else
1470                 xtarget = gen_reg_rtx (maxmode);
1471             }
1472
1473           /* If this machine's extzv insists on a register target,
1474              make sure we have one.  */
1475           if (! ((*insn_data[(int) CODE_FOR_extzv].operand[0].predicate)
1476                  (xtarget, maxmode)))
1477             xtarget = gen_reg_rtx (maxmode);
1478
1479           bitsize_rtx = GEN_INT (bitsize);
1480           bitpos_rtx = GEN_INT (xbitpos);
1481
1482           pat = gen_extzv (xtarget, xop0, bitsize_rtx, bitpos_rtx);
1483           if (pat)
1484             {
1485               emit_insn (pat);
1486               target = xtarget;
1487               spec_target = xspec_target;
1488               spec_target_subreg = xspec_target_subreg;
1489             }
1490           else
1491             {
1492               delete_insns_since (last);
1493               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1494                                                 bitpos, target, 1);
1495             }
1496         }
1497       else
1498       extzv_loses:
1499         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1500                                           bitpos, target, 1);
1501     }
1502   else
1503     {
1504       if (HAVE_extv
1505           && (GET_MODE_BITSIZE (extv_mode) >= bitsize)
1506           && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1507                 && (bitsize + bitpos > GET_MODE_BITSIZE (extv_mode))))
1508         {
1509           int xbitpos = bitpos, xoffset = offset;
1510           rtx bitsize_rtx, bitpos_rtx;
1511           rtx last = get_last_insn ();
1512           rtx xop0 = op0, xtarget = target;
1513           rtx xspec_target = spec_target;
1514           rtx xspec_target_subreg = spec_target_subreg;
1515           rtx pat;
1516           enum machine_mode maxmode = mode_for_extraction (EP_extv, 0);
1517
1518           if (MEM_P (xop0))
1519             {
1520               /* Is the memory operand acceptable?  */
1521               if (! ((*insn_data[(int) CODE_FOR_extv].operand[1].predicate)
1522                      (xop0, GET_MODE (xop0))))
1523                 {
1524                   /* No, load into a reg and extract from there.  */
1525                   enum machine_mode bestmode;
1526
1527                   /* Get the mode to use for inserting into this field.  If
1528                      OP0 is BLKmode, get the smallest mode consistent with the
1529                      alignment. If OP0 is a non-BLKmode object that is no
1530                      wider than MAXMODE, use its mode. Otherwise, use the
1531                      smallest mode containing the field.  */
1532
1533                   if (GET_MODE (xop0) == BLKmode
1534                       || (GET_MODE_SIZE (GET_MODE (op0))
1535                           > GET_MODE_SIZE (maxmode)))
1536                     bestmode = get_best_mode (bitsize, bitnum,
1537                                               MEM_ALIGN (xop0), maxmode,
1538                                               MEM_VOLATILE_P (xop0));
1539                   else
1540                     bestmode = GET_MODE (xop0);
1541
1542                   if (bestmode == VOIDmode
1543                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1544                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1545                     goto extv_loses;
1546
1547                   /* Compute offset as multiple of this unit,
1548                      counting in bytes.  */
1549                   unit = GET_MODE_BITSIZE (bestmode);
1550                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1551                   xbitpos = bitnum % unit;
1552                   xop0 = adjust_address (xop0, bestmode, xoffset);
1553
1554                   /* Fetch it to a register in that size.  */
1555                   xop0 = force_reg (bestmode, xop0);
1556
1557                   /* XBITPOS counts within UNIT, which is what is expected.  */
1558                 }
1559               else
1560                 /* Get ref to first byte containing part of the field.  */
1561                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1562             }
1563
1564           /* If op0 is a register, we need it in MAXMODE (which is usually
1565              SImode) to make it acceptable to the format of extv.  */
1566           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1567             goto extv_loses;
1568           if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1569             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1570
1571           /* On big-endian machines, we count bits from the most significant.
1572              If the bit field insn does not, we must invert.  */
1573           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1574             xbitpos = unit - bitsize - xbitpos;
1575
1576           /* XBITPOS counts within a size of UNIT.
1577              Adjust to count within a size of MAXMODE.  */
1578           if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1579             xbitpos += (GET_MODE_BITSIZE (maxmode) - unit);
1580
1581           unit = GET_MODE_BITSIZE (maxmode);
1582
1583           if (xtarget == 0
1584               || (flag_force_mem && MEM_P (xtarget)))
1585             xtarget = xspec_target = gen_reg_rtx (tmode);
1586
1587           if (GET_MODE (xtarget) != maxmode)
1588             {
1589               if (REG_P (xtarget))
1590                 {
1591                   int wider = (GET_MODE_SIZE (maxmode)
1592                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1593                   xtarget = gen_lowpart (maxmode, xtarget);
1594                   if (wider)
1595                     xspec_target_subreg = xtarget;
1596                 }
1597               else
1598                 xtarget = gen_reg_rtx (maxmode);
1599             }
1600
1601           /* If this machine's extv insists on a register target,
1602              make sure we have one.  */
1603           if (! ((*insn_data[(int) CODE_FOR_extv].operand[0].predicate)
1604                  (xtarget, maxmode)))
1605             xtarget = gen_reg_rtx (maxmode);
1606
1607           bitsize_rtx = GEN_INT (bitsize);
1608           bitpos_rtx = GEN_INT (xbitpos);
1609
1610           pat = gen_extv (xtarget, xop0, bitsize_rtx, bitpos_rtx);
1611           if (pat)
1612             {
1613               emit_insn (pat);
1614               target = xtarget;
1615               spec_target = xspec_target;
1616               spec_target_subreg = xspec_target_subreg;
1617             }
1618           else
1619             {
1620               delete_insns_since (last);
1621               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1622                                                 bitpos, target, 0);
1623             }
1624         }
1625       else
1626       extv_loses:
1627         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1628                                           bitpos, target, 0);
1629     }
1630   if (target == spec_target)
1631     return target;
1632   if (target == spec_target_subreg)
1633     return spec_target;
1634   if (GET_MODE (target) != tmode && GET_MODE (target) != mode)
1635     {
1636       /* If the target mode is floating-point, first convert to the
1637          integer mode of that size and then access it as a floating-point
1638          value via a SUBREG.  */
1639       if (GET_MODE_CLASS (tmode) != MODE_INT
1640           && GET_MODE_CLASS (tmode) != MODE_PARTIAL_INT)
1641         {
1642           target = convert_to_mode (mode_for_size (GET_MODE_BITSIZE (tmode),
1643                                                    MODE_INT, 0),
1644                                     target, unsignedp);
1645           return gen_lowpart (tmode, target);
1646         }
1647       else
1648         return convert_to_mode (tmode, target, unsignedp);
1649     }
1650   return target;
1651 }
1652 \f
1653 /* Extract a bit field using shifts and boolean operations
1654    Returns an rtx to represent the value.
1655    OP0 addresses a register (word) or memory (byte).
1656    BITPOS says which bit within the word or byte the bit field starts in.
1657    OFFSET says how many bytes farther the bit field starts;
1658     it is 0 if OP0 is a register.
1659    BITSIZE says how many bits long the bit field is.
1660     (If OP0 is a register, it may be narrower than a full word,
1661      but BITPOS still counts within a full word,
1662      which is significant on bigendian machines.)
1663
1664    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1665    If TARGET is nonzero, attempts to store the value there
1666    and return TARGET, but this is not guaranteed.
1667    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1668
1669 static rtx
1670 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1671                          unsigned HOST_WIDE_INT offset,
1672                          unsigned HOST_WIDE_INT bitsize,
1673                          unsigned HOST_WIDE_INT bitpos, rtx target,
1674                          int unsignedp)
1675 {
1676   unsigned int total_bits = BITS_PER_WORD;
1677   enum machine_mode mode;
1678
1679   if (GET_CODE (op0) == SUBREG || REG_P (op0))
1680     {
1681       /* Special treatment for a bit field split across two registers.  */
1682       if (bitsize + bitpos > BITS_PER_WORD)
1683         return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1684     }
1685   else
1686     {
1687       /* Get the proper mode to use for this field.  We want a mode that
1688          includes the entire field.  If such a mode would be larger than
1689          a word, we won't be doing the extraction the normal way.  */
1690
1691       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1692                             MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1693
1694       if (mode == VOIDmode)
1695         /* The only way this should occur is if the field spans word
1696            boundaries.  */
1697         return extract_split_bit_field (op0, bitsize,
1698                                         bitpos + offset * BITS_PER_UNIT,
1699                                         unsignedp);
1700
1701       total_bits = GET_MODE_BITSIZE (mode);
1702
1703       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1704          be in the range 0 to total_bits-1, and put any excess bytes in
1705          OFFSET.  */
1706       if (bitpos >= total_bits)
1707         {
1708           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1709           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1710                      * BITS_PER_UNIT);
1711         }
1712
1713       /* Get ref to an aligned byte, halfword, or word containing the field.
1714          Adjust BITPOS to be position within a word,
1715          and OFFSET to be the offset of that word.
1716          Then alter OP0 to refer to that word.  */
1717       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1718       offset -= (offset % (total_bits / BITS_PER_UNIT));
1719       op0 = adjust_address (op0, mode, offset);
1720     }
1721
1722   mode = GET_MODE (op0);
1723
1724   if (BYTES_BIG_ENDIAN)
1725     /* BITPOS is the distance between our msb and that of OP0.
1726        Convert it to the distance from the lsb.  */
1727     bitpos = total_bits - bitsize - bitpos;
1728
1729   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1730      We have reduced the big-endian case to the little-endian case.  */
1731
1732   if (unsignedp)
1733     {
1734       if (bitpos)
1735         {
1736           /* If the field does not already start at the lsb,
1737              shift it so it does.  */
1738           tree amount = build_int_2 (bitpos, 0);
1739           /* Maybe propagate the target for the shift.  */
1740           /* But not if we will return it--could confuse integrate.c.  */
1741           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1742           if (tmode != mode) subtarget = 0;
1743           op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1744         }
1745       /* Convert the value to the desired mode.  */
1746       if (mode != tmode)
1747         op0 = convert_to_mode (tmode, op0, 1);
1748
1749       /* Unless the msb of the field used to be the msb when we shifted,
1750          mask out the upper bits.  */
1751
1752       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1753         return expand_binop (GET_MODE (op0), and_optab, op0,
1754                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1755                              target, 1, OPTAB_LIB_WIDEN);
1756       return op0;
1757     }
1758
1759   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1760      then arithmetic-shift its lsb to the lsb of the word.  */
1761   op0 = force_reg (mode, op0);
1762   if (mode != tmode)
1763     target = 0;
1764
1765   /* Find the narrowest integer mode that contains the field.  */
1766
1767   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1768        mode = GET_MODE_WIDER_MODE (mode))
1769     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1770       {
1771         op0 = convert_to_mode (mode, op0, 0);
1772         break;
1773       }
1774
1775   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1776     {
1777       tree amount
1778         = build_int_2 (GET_MODE_BITSIZE (mode) - (bitsize + bitpos), 0);
1779       /* Maybe propagate the target for the shift.  */
1780       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1781       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1782     }
1783
1784   return expand_shift (RSHIFT_EXPR, mode, op0,
1785                        build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1786                        target, 0);
1787 }
1788 \f
1789 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1790    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1791    complement of that if COMPLEMENT.  The mask is truncated if
1792    necessary to the width of mode MODE.  The mask is zero-extended if
1793    BITSIZE+BITPOS is too small for MODE.  */
1794
1795 static rtx
1796 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1797 {
1798   HOST_WIDE_INT masklow, maskhigh;
1799
1800   if (bitsize == 0)
1801     masklow = 0;
1802   else if (bitpos < HOST_BITS_PER_WIDE_INT)
1803     masklow = (HOST_WIDE_INT) -1 << bitpos;
1804   else
1805     masklow = 0;
1806
1807   if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1808     masklow &= ((unsigned HOST_WIDE_INT) -1
1809                 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1810
1811   if (bitpos <= HOST_BITS_PER_WIDE_INT)
1812     maskhigh = -1;
1813   else
1814     maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1815
1816   if (bitsize == 0)
1817     maskhigh = 0;
1818   else if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1819     maskhigh &= ((unsigned HOST_WIDE_INT) -1
1820                  >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1821   else
1822     maskhigh = 0;
1823
1824   if (complement)
1825     {
1826       maskhigh = ~maskhigh;
1827       masklow = ~masklow;
1828     }
1829
1830   return immed_double_const (masklow, maskhigh, mode);
1831 }
1832
1833 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1834    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1835
1836 static rtx
1837 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1838 {
1839   unsigned HOST_WIDE_INT v = INTVAL (value);
1840   HOST_WIDE_INT low, high;
1841
1842   if (bitsize < HOST_BITS_PER_WIDE_INT)
1843     v &= ~((HOST_WIDE_INT) -1 << bitsize);
1844
1845   if (bitpos < HOST_BITS_PER_WIDE_INT)
1846     {
1847       low = v << bitpos;
1848       high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1849     }
1850   else
1851     {
1852       low = 0;
1853       high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1854     }
1855
1856   return immed_double_const (low, high, mode);
1857 }
1858 \f
1859 /* Extract a bit field that is split across two words
1860    and return an RTX for the result.
1861
1862    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1863    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1864    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1865
1866 static rtx
1867 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1868                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1869 {
1870   unsigned int unit;
1871   unsigned int bitsdone = 0;
1872   rtx result = NULL_RTX;
1873   int first = 1;
1874
1875   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1876      much at a time.  */
1877   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1878     unit = BITS_PER_WORD;
1879   else
1880     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1881
1882   while (bitsdone < bitsize)
1883     {
1884       unsigned HOST_WIDE_INT thissize;
1885       rtx part, word;
1886       unsigned HOST_WIDE_INT thispos;
1887       unsigned HOST_WIDE_INT offset;
1888
1889       offset = (bitpos + bitsdone) / unit;
1890       thispos = (bitpos + bitsdone) % unit;
1891
1892       /* THISSIZE must not overrun a word boundary.  Otherwise,
1893          extract_fixed_bit_field will call us again, and we will mutually
1894          recurse forever.  */
1895       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1896       thissize = MIN (thissize, unit - thispos);
1897
1898       /* If OP0 is a register, then handle OFFSET here.
1899
1900          When handling multiword bitfields, extract_bit_field may pass
1901          down a word_mode SUBREG of a larger REG for a bitfield that actually
1902          crosses a word boundary.  Thus, for a SUBREG, we must find
1903          the current word starting from the base register.  */
1904       if (GET_CODE (op0) == SUBREG)
1905         {
1906           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1907           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1908                                         GET_MODE (SUBREG_REG (op0)));
1909           offset = 0;
1910         }
1911       else if (REG_P (op0))
1912         {
1913           word = operand_subword_force (op0, offset, GET_MODE (op0));
1914           offset = 0;
1915         }
1916       else
1917         word = op0;
1918
1919       /* Extract the parts in bit-counting order,
1920          whose meaning is determined by BYTES_PER_UNIT.
1921          OFFSET is in UNITs, and UNIT is in bits.
1922          extract_fixed_bit_field wants offset in bytes.  */
1923       part = extract_fixed_bit_field (word_mode, word,
1924                                       offset * unit / BITS_PER_UNIT,
1925                                       thissize, thispos, 0, 1);
1926       bitsdone += thissize;
1927
1928       /* Shift this part into place for the result.  */
1929       if (BYTES_BIG_ENDIAN)
1930         {
1931           if (bitsize != bitsdone)
1932             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1933                                  build_int_2 (bitsize - bitsdone, 0), 0, 1);
1934         }
1935       else
1936         {
1937           if (bitsdone != thissize)
1938             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1939                                  build_int_2 (bitsdone - thissize, 0), 0, 1);
1940         }
1941
1942       if (first)
1943         result = part;
1944       else
1945         /* Combine the parts with bitwise or.  This works
1946            because we extracted each part as an unsigned bit field.  */
1947         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1948                                OPTAB_LIB_WIDEN);
1949
1950       first = 0;
1951     }
1952
1953   /* Unsigned bit field: we are done.  */
1954   if (unsignedp)
1955     return result;
1956   /* Signed bit field: sign-extend with two arithmetic shifts.  */
1957   result = expand_shift (LSHIFT_EXPR, word_mode, result,
1958                          build_int_2 (BITS_PER_WORD - bitsize, 0),
1959                          NULL_RTX, 0);
1960   return expand_shift (RSHIFT_EXPR, word_mode, result,
1961                        build_int_2 (BITS_PER_WORD - bitsize, 0), NULL_RTX, 0);
1962 }
1963 \f
1964 /* Add INC into TARGET.  */
1965
1966 void
1967 expand_inc (rtx target, rtx inc)
1968 {
1969   rtx value = expand_binop (GET_MODE (target), add_optab,
1970                             target, inc,
1971                             target, 0, OPTAB_LIB_WIDEN);
1972   if (value != target)
1973     emit_move_insn (target, value);
1974 }
1975
1976 /* Subtract DEC from TARGET.  */
1977
1978 void
1979 expand_dec (rtx target, rtx dec)
1980 {
1981   rtx value = expand_binop (GET_MODE (target), sub_optab,
1982                             target, dec,
1983                             target, 0, OPTAB_LIB_WIDEN);
1984   if (value != target)
1985     emit_move_insn (target, value);
1986 }
1987 \f
1988 /* Output a shift instruction for expression code CODE,
1989    with SHIFTED being the rtx for the value to shift,
1990    and AMOUNT the tree for the amount to shift by.
1991    Store the result in the rtx TARGET, if that is convenient.
1992    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
1993    Return the rtx for where the value is.  */
1994
1995 rtx
1996 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
1997               tree amount, rtx target, int unsignedp)
1998 {
1999   rtx op1, temp = 0;
2000   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2001   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2002   int try;
2003
2004   /* Previously detected shift-counts computed by NEGATE_EXPR
2005      and shifted in the other direction; but that does not work
2006      on all machines.  */
2007
2008   op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0);
2009
2010   if (SHIFT_COUNT_TRUNCATED)
2011     {
2012       if (GET_CODE (op1) == CONST_INT
2013           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2014               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2015         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2016                        % GET_MODE_BITSIZE (mode));
2017       else if (GET_CODE (op1) == SUBREG
2018                && subreg_lowpart_p (op1))
2019         op1 = SUBREG_REG (op1);
2020     }
2021
2022   if (op1 == const0_rtx)
2023     return shifted;
2024
2025   /* Check whether its cheaper to implement a left shift by a constant
2026      bit count by a sequence of additions.  */
2027   if (code == LSHIFT_EXPR
2028       && GET_CODE (op1) == CONST_INT
2029       && INTVAL (op1) > 0
2030       && INTVAL (op1) < GET_MODE_BITSIZE (mode)
2031       && shift_cost[mode][INTVAL (op1)] > INTVAL (op1) * add_cost[mode])
2032     {
2033       int i;
2034       for (i = 0; i < INTVAL (op1); i++)
2035         {
2036           temp = force_reg (mode, shifted);
2037           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2038                                   unsignedp, OPTAB_LIB_WIDEN);
2039         }
2040       return shifted;
2041     }
2042
2043   for (try = 0; temp == 0 && try < 3; try++)
2044     {
2045       enum optab_methods methods;
2046
2047       if (try == 0)
2048         methods = OPTAB_DIRECT;
2049       else if (try == 1)
2050         methods = OPTAB_WIDEN;
2051       else
2052         methods = OPTAB_LIB_WIDEN;
2053
2054       if (rotate)
2055         {
2056           /* Widening does not work for rotation.  */
2057           if (methods == OPTAB_WIDEN)
2058             continue;
2059           else if (methods == OPTAB_LIB_WIDEN)
2060             {
2061               /* If we have been unable to open-code this by a rotation,
2062                  do it as the IOR of two shifts.  I.e., to rotate A
2063                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2064                  where C is the bitsize of A.
2065
2066                  It is theoretically possible that the target machine might
2067                  not be able to perform either shift and hence we would
2068                  be making two libcalls rather than just the one for the
2069                  shift (similarly if IOR could not be done).  We will allow
2070                  this extremely unlikely lossage to avoid complicating the
2071                  code below.  */
2072
2073               rtx subtarget = target == shifted ? 0 : target;
2074               rtx temp1;
2075               tree type = TREE_TYPE (amount);
2076               tree new_amount = make_tree (type, op1);
2077               tree other_amount
2078                 = fold (build2 (MINUS_EXPR, type,
2079                                 convert (type,
2080                                          build_int_2 (GET_MODE_BITSIZE (mode),
2081                                                       0)),
2082                                 amount));
2083
2084               shifted = force_reg (mode, shifted);
2085
2086               temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2087                                    mode, shifted, new_amount, subtarget, 1);
2088               temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2089                                     mode, shifted, other_amount, 0, 1);
2090               return expand_binop (mode, ior_optab, temp, temp1, target,
2091                                    unsignedp, methods);
2092             }
2093
2094           temp = expand_binop (mode,
2095                                left ? rotl_optab : rotr_optab,
2096                                shifted, op1, target, unsignedp, methods);
2097
2098           /* If we don't have the rotate, but we are rotating by a constant
2099              that is in range, try a rotate in the opposite direction.  */
2100
2101           if (temp == 0 && GET_CODE (op1) == CONST_INT
2102               && INTVAL (op1) > 0
2103               && (unsigned int) INTVAL (op1) < GET_MODE_BITSIZE (mode))
2104             temp = expand_binop (mode,
2105                                  left ? rotr_optab : rotl_optab,
2106                                  shifted,
2107                                  GEN_INT (GET_MODE_BITSIZE (mode)
2108                                           - INTVAL (op1)),
2109                                  target, unsignedp, methods);
2110         }
2111       else if (unsignedp)
2112         temp = expand_binop (mode,
2113                              left ? ashl_optab : lshr_optab,
2114                              shifted, op1, target, unsignedp, methods);
2115
2116       /* Do arithmetic shifts.
2117          Also, if we are going to widen the operand, we can just as well
2118          use an arithmetic right-shift instead of a logical one.  */
2119       if (temp == 0 && ! rotate
2120           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2121         {
2122           enum optab_methods methods1 = methods;
2123
2124           /* If trying to widen a log shift to an arithmetic shift,
2125              don't accept an arithmetic shift of the same size.  */
2126           if (unsignedp)
2127             methods1 = OPTAB_MUST_WIDEN;
2128
2129           /* Arithmetic shift */
2130
2131           temp = expand_binop (mode,
2132                                left ? ashl_optab : ashr_optab,
2133                                shifted, op1, target, unsignedp, methods1);
2134         }
2135
2136       /* We used to try extzv here for logical right shifts, but that was
2137          only useful for one machine, the VAX, and caused poor code
2138          generation there for lshrdi3, so the code was deleted and a
2139          define_expand for lshrsi3 was added to vax.md.  */
2140     }
2141
2142   if (temp == 0)
2143     abort ();
2144   return temp;
2145 }
2146 \f
2147 enum alg_code { alg_zero, alg_m, alg_shift,
2148                   alg_add_t_m2, alg_sub_t_m2,
2149                   alg_add_factor, alg_sub_factor,
2150                   alg_add_t2_m, alg_sub_t2_m,
2151                   alg_add, alg_subtract, alg_factor, alg_shiftop };
2152
2153 /* This structure records a sequence of operations.
2154    `ops' is the number of operations recorded.
2155    `cost' is their total cost.
2156    The operations are stored in `op' and the corresponding
2157    logarithms of the integer coefficients in `log'.
2158
2159    These are the operations:
2160    alg_zero             total := 0;
2161    alg_m                total := multiplicand;
2162    alg_shift            total := total * coeff
2163    alg_add_t_m2         total := total + multiplicand * coeff;
2164    alg_sub_t_m2         total := total - multiplicand * coeff;
2165    alg_add_factor       total := total * coeff + total;
2166    alg_sub_factor       total := total * coeff - total;
2167    alg_add_t2_m         total := total * coeff + multiplicand;
2168    alg_sub_t2_m         total := total * coeff - multiplicand;
2169
2170    The first operand must be either alg_zero or alg_m.  */
2171
2172 struct algorithm
2173 {
2174   short cost;
2175   short ops;
2176   /* The size of the OP and LOG fields are not directly related to the
2177      word size, but the worst-case algorithms will be if we have few
2178      consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2179      In that case we will generate shift-by-2, add, shift-by-2, add,...,
2180      in total wordsize operations.  */
2181   enum alg_code op[MAX_BITS_PER_WORD];
2182   char log[MAX_BITS_PER_WORD];
2183 };
2184
2185 /* Indicates the type of fixup needed after a constant multiplication.
2186    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2187    the result should be negated, and ADD_VARIANT means that the
2188    multiplicand should be added to the result.  */
2189 enum mult_variant {basic_variant, negate_variant, add_variant};
2190
2191 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2192                         int, enum machine_mode mode);
2193 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2194                                  struct algorithm *, enum mult_variant *, int);
2195 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2196                               const struct algorithm *, enum mult_variant);
2197 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2198                                                  int, unsigned HOST_WIDE_INT *,
2199                                                  int *, int *);
2200 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2201 static rtx extract_high_half (enum machine_mode, rtx);
2202 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2203                                        int, int);
2204 /* Compute and return the best algorithm for multiplying by T.
2205    The algorithm must cost less than cost_limit
2206    If retval.cost >= COST_LIMIT, no algorithm was found and all
2207    other field of the returned struct are undefined.
2208    MODE is the machine mode of the multiplication.  */
2209
2210 static void
2211 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2212             int cost_limit, enum machine_mode mode)
2213 {
2214   int m;
2215   struct algorithm *alg_in, *best_alg;
2216   int cost;
2217   unsigned HOST_WIDE_INT q;
2218   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2219
2220   /* Indicate that no algorithm is yet found.  If no algorithm
2221      is found, this value will be returned and indicate failure.  */
2222   alg_out->cost = cost_limit;
2223
2224   if (cost_limit <= 0)
2225     return;
2226
2227   /* Restrict the bits of "t" to the multiplication's mode.  */
2228   t &= GET_MODE_MASK (mode);
2229
2230   /* t == 1 can be done in zero cost.  */
2231   if (t == 1)
2232     {
2233       alg_out->ops = 1;
2234       alg_out->cost = 0;
2235       alg_out->op[0] = alg_m;
2236       return;
2237     }
2238
2239   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2240      fail now.  */
2241   if (t == 0)
2242     {
2243       if (zero_cost >= cost_limit)
2244         return;
2245       else
2246         {
2247           alg_out->ops = 1;
2248           alg_out->cost = zero_cost;
2249           alg_out->op[0] = alg_zero;
2250           return;
2251         }
2252     }
2253
2254   /* We'll be needing a couple extra algorithm structures now.  */
2255
2256   alg_in = alloca (sizeof (struct algorithm));
2257   best_alg = alloca (sizeof (struct algorithm));
2258
2259   /* If we have a group of zero bits at the low-order part of T, try
2260      multiplying by the remaining bits and then doing a shift.  */
2261
2262   if ((t & 1) == 0)
2263     {
2264       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2265       if (m < maxm)
2266         {
2267           q = t >> m;
2268           /* The function expand_shift will choose between a shift and
2269              a sequence of additions, so the observed cost is given as
2270              MIN (m * add_cost[mode], shift_cost[mode][m]).  */
2271           cost = m * add_cost[mode];
2272           if (shift_cost[mode][m] < cost)
2273             cost = shift_cost[mode][m];
2274           synth_mult (alg_in, q, cost_limit - cost, mode);
2275
2276           cost += alg_in->cost;
2277           if (cost < cost_limit)
2278             {
2279               struct algorithm *x;
2280               x = alg_in, alg_in = best_alg, best_alg = x;
2281               best_alg->log[best_alg->ops] = m;
2282               best_alg->op[best_alg->ops] = alg_shift;
2283               cost_limit = cost;
2284             }
2285         }
2286     }
2287
2288   /* If we have an odd number, add or subtract one.  */
2289   if ((t & 1) != 0)
2290     {
2291       unsigned HOST_WIDE_INT w;
2292
2293       for (w = 1; (w & t) != 0; w <<= 1)
2294         ;
2295       /* If T was -1, then W will be zero after the loop.  This is another
2296          case where T ends with ...111.  Handling this with (T + 1) and
2297          subtract 1 produces slightly better code and results in algorithm
2298          selection much faster than treating it like the ...0111 case
2299          below.  */
2300       if (w == 0
2301           || (w > 2
2302               /* Reject the case where t is 3.
2303                  Thus we prefer addition in that case.  */
2304               && t != 3))
2305         {
2306           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2307
2308           cost = add_cost[mode];
2309           synth_mult (alg_in, t + 1, cost_limit - cost, mode);
2310
2311           cost += alg_in->cost;
2312           if (cost < cost_limit)
2313             {
2314               struct algorithm *x;
2315               x = alg_in, alg_in = best_alg, best_alg = x;
2316               best_alg->log[best_alg->ops] = 0;
2317               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2318               cost_limit = cost;
2319             }
2320         }
2321       else
2322         {
2323           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2324
2325           cost = add_cost[mode];
2326           synth_mult (alg_in, t - 1, cost_limit - cost, mode);
2327
2328           cost += alg_in->cost;
2329           if (cost < cost_limit)
2330             {
2331               struct algorithm *x;
2332               x = alg_in, alg_in = best_alg, best_alg = x;
2333               best_alg->log[best_alg->ops] = 0;
2334               best_alg->op[best_alg->ops] = alg_add_t_m2;
2335               cost_limit = cost;
2336             }
2337         }
2338     }
2339
2340   /* Look for factors of t of the form
2341      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2342      If we find such a factor, we can multiply by t using an algorithm that
2343      multiplies by q, shift the result by m and add/subtract it to itself.
2344
2345      We search for large factors first and loop down, even if large factors
2346      are less probable than small; if we find a large factor we will find a
2347      good sequence quickly, and therefore be able to prune (by decreasing
2348      COST_LIMIT) the search.  */
2349
2350   for (m = floor_log2 (t - 1); m >= 2; m--)
2351     {
2352       unsigned HOST_WIDE_INT d;
2353
2354       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2355       if (t % d == 0 && t > d && m < maxm)
2356         {
2357           cost = add_cost[mode] + shift_cost[mode][m];
2358           if (shiftadd_cost[mode][m] < cost)
2359             cost = shiftadd_cost[mode][m];
2360           synth_mult (alg_in, t / d, cost_limit - cost, mode);
2361
2362           cost += alg_in->cost;
2363           if (cost < cost_limit)
2364             {
2365               struct algorithm *x;
2366               x = alg_in, alg_in = best_alg, best_alg = x;
2367               best_alg->log[best_alg->ops] = m;
2368               best_alg->op[best_alg->ops] = alg_add_factor;
2369               cost_limit = cost;
2370             }
2371           /* Other factors will have been taken care of in the recursion.  */
2372           break;
2373         }
2374
2375       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2376       if (t % d == 0 && t > d && m < maxm)
2377         {
2378           cost = add_cost[mode] + shift_cost[mode][m];
2379           if (shiftsub_cost[mode][m] < cost)
2380             cost = shiftsub_cost[mode][m];
2381           synth_mult (alg_in, t / d, cost_limit - cost, mode);
2382
2383           cost += alg_in->cost;
2384           if (cost < cost_limit)
2385             {
2386               struct algorithm *x;
2387               x = alg_in, alg_in = best_alg, best_alg = x;
2388               best_alg->log[best_alg->ops] = m;
2389               best_alg->op[best_alg->ops] = alg_sub_factor;
2390               cost_limit = cost;
2391             }
2392           break;
2393         }
2394     }
2395
2396   /* Try shift-and-add (load effective address) instructions,
2397      i.e. do a*3, a*5, a*9.  */
2398   if ((t & 1) != 0)
2399     {
2400       q = t - 1;
2401       q = q & -q;
2402       m = exact_log2 (q);
2403       if (m >= 0 && m < maxm)
2404         {
2405           cost = shiftadd_cost[mode][m];
2406           synth_mult (alg_in, (t - 1) >> m, cost_limit - cost, mode);
2407
2408           cost += alg_in->cost;
2409           if (cost < cost_limit)
2410             {
2411               struct algorithm *x;
2412               x = alg_in, alg_in = best_alg, best_alg = x;
2413               best_alg->log[best_alg->ops] = m;
2414               best_alg->op[best_alg->ops] = alg_add_t2_m;
2415               cost_limit = cost;
2416             }
2417         }
2418
2419       q = t + 1;
2420       q = q & -q;
2421       m = exact_log2 (q);
2422       if (m >= 0 && m < maxm)
2423         {
2424           cost = shiftsub_cost[mode][m];
2425           synth_mult (alg_in, (t + 1) >> m, cost_limit - cost, mode);
2426
2427           cost += alg_in->cost;
2428           if (cost < cost_limit)
2429             {
2430               struct algorithm *x;
2431               x = alg_in, alg_in = best_alg, best_alg = x;
2432               best_alg->log[best_alg->ops] = m;
2433               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2434               cost_limit = cost;
2435             }
2436         }
2437     }
2438
2439   /* If cost_limit has not decreased since we stored it in alg_out->cost,
2440      we have not found any algorithm.  */
2441   if (cost_limit == alg_out->cost)
2442     return;
2443
2444   /* If we are getting a too long sequence for `struct algorithm'
2445      to record, make this search fail.  */
2446   if (best_alg->ops == MAX_BITS_PER_WORD)
2447     return;
2448
2449   /* Copy the algorithm from temporary space to the space at alg_out.
2450      We avoid using structure assignment because the majority of
2451      best_alg is normally undefined, and this is a critical function.  */
2452   alg_out->ops = best_alg->ops + 1;
2453   alg_out->cost = cost_limit;
2454   memcpy (alg_out->op, best_alg->op,
2455           alg_out->ops * sizeof *alg_out->op);
2456   memcpy (alg_out->log, best_alg->log,
2457           alg_out->ops * sizeof *alg_out->log);
2458 }
2459 \f
2460 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2461    Try three variations:
2462
2463        - a shift/add sequence based on VAL itself
2464        - a shift/add sequence based on -VAL, followed by a negation
2465        - a shift/add sequence based on VAL - 1, followed by an addition.
2466
2467    Return true if the cheapest of these cost less than MULT_COST,
2468    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2469
2470 static bool
2471 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2472                      struct algorithm *alg, enum mult_variant *variant,
2473                      int mult_cost)
2474 {
2475   struct algorithm alg2;
2476
2477   *variant = basic_variant;
2478   synth_mult (alg, val, mult_cost, mode);
2479
2480   /* This works only if the inverted value actually fits in an
2481      `unsigned int' */
2482   if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2483     {
2484       synth_mult (&alg2, -val, MIN (alg->cost, mult_cost) - neg_cost[mode],
2485                   mode);
2486       alg2.cost += neg_cost[mode];
2487       if (alg2.cost < alg->cost)
2488         *alg = alg2, *variant = negate_variant;
2489     }
2490
2491   /* This proves very useful for division-by-constant.  */
2492   synth_mult (&alg2, val - 1, MIN (alg->cost, mult_cost) - add_cost[mode],
2493               mode);
2494   alg2.cost += add_cost[mode];
2495   if (alg2.cost < alg->cost)
2496     *alg = alg2, *variant = add_variant;
2497
2498   return alg->cost < mult_cost;
2499 }
2500
2501 /* A subroutine of expand_mult, used for constant multiplications.
2502    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2503    convenient.  Use the shift/add sequence described by ALG and apply
2504    the final fixup specified by VARIANT.  */
2505
2506 static rtx
2507 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2508                    rtx target, const struct algorithm *alg,
2509                    enum mult_variant variant)
2510 {
2511   HOST_WIDE_INT val_so_far;
2512   rtx insn, accum, tem;
2513   int opno;
2514   enum machine_mode nmode;
2515
2516   /* Avoid referencing memory over and over.
2517      For speed, but also for correctness when mem is volatile.  */
2518   if (MEM_P (op0))
2519     op0 = force_reg (mode, op0);
2520
2521   /* ACCUM starts out either as OP0 or as a zero, depending on
2522      the first operation.  */
2523
2524   if (alg->op[0] == alg_zero)
2525     {
2526       accum = copy_to_mode_reg (mode, const0_rtx);
2527       val_so_far = 0;
2528     }
2529   else if (alg->op[0] == alg_m)
2530     {
2531       accum = copy_to_mode_reg (mode, op0);
2532       val_so_far = 1;
2533     }
2534   else
2535     abort ();
2536
2537   for (opno = 1; opno < alg->ops; opno++)
2538     {
2539       int log = alg->log[opno];
2540       int preserve = preserve_subexpressions_p ();
2541       rtx shift_subtarget = preserve ? 0 : accum;
2542       rtx add_target
2543         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2544            && ! preserve)
2545           ? target : 0;
2546       rtx accum_target = preserve ? 0 : accum;
2547
2548       switch (alg->op[opno])
2549         {
2550         case alg_shift:
2551           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2552                                 build_int_2 (log, 0), NULL_RTX, 0);
2553           val_so_far <<= log;
2554           break;
2555
2556         case alg_add_t_m2:
2557           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2558                               build_int_2 (log, 0), NULL_RTX, 0);
2559           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2560                                  add_target ? add_target : accum_target);
2561           val_so_far += (HOST_WIDE_INT) 1 << log;
2562           break;
2563
2564         case alg_sub_t_m2:
2565           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2566                               build_int_2 (log, 0), NULL_RTX, 0);
2567           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2568                                  add_target ? add_target : accum_target);
2569           val_so_far -= (HOST_WIDE_INT) 1 << log;
2570           break;
2571
2572         case alg_add_t2_m:
2573           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2574                                 build_int_2 (log, 0), shift_subtarget,
2575                                 0);
2576           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2577                                  add_target ? add_target : accum_target);
2578           val_so_far = (val_so_far << log) + 1;
2579           break;
2580
2581         case alg_sub_t2_m:
2582           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2583                                 build_int_2 (log, 0), shift_subtarget, 0);
2584           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2585                                  add_target ? add_target : accum_target);
2586           val_so_far = (val_so_far << log) - 1;
2587           break;
2588
2589         case alg_add_factor:
2590           tem = expand_shift (LSHIFT_EXPR, mode, accum,
2591                               build_int_2 (log, 0), NULL_RTX, 0);
2592           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2593                                  add_target ? add_target : accum_target);
2594           val_so_far += val_so_far << log;
2595           break;
2596
2597         case alg_sub_factor:
2598           tem = expand_shift (LSHIFT_EXPR, mode, accum,
2599                               build_int_2 (log, 0), NULL_RTX, 0);
2600           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2601                                  (add_target ? add_target
2602                                   : preserve ? 0 : tem));
2603           val_so_far = (val_so_far << log) - val_so_far;
2604           break;
2605
2606         default:
2607           abort ();
2608         }
2609
2610       /* Write a REG_EQUAL note on the last insn so that we can cse
2611          multiplication sequences.  Note that if ACCUM is a SUBREG,
2612          we've set the inner register and must properly indicate
2613          that.  */
2614
2615       tem = op0, nmode = mode;
2616       if (GET_CODE (accum) == SUBREG)
2617         {
2618           nmode = GET_MODE (SUBREG_REG (accum));
2619           tem = gen_lowpart (nmode, op0);
2620         }
2621
2622       insn = get_last_insn ();
2623       set_unique_reg_note (insn, REG_EQUAL,
2624                            gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)));
2625     }
2626
2627   if (variant == negate_variant)
2628     {
2629       val_so_far = -val_so_far;
2630       accum = expand_unop (mode, neg_optab, accum, target, 0);
2631     }
2632   else if (variant == add_variant)
2633     {
2634       val_so_far = val_so_far + 1;
2635       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
2636     }
2637
2638   /* Compare only the bits of val and val_so_far that are significant
2639      in the result mode, to avoid sign-/zero-extension confusion.  */
2640   val &= GET_MODE_MASK (mode);
2641   val_so_far &= GET_MODE_MASK (mode);
2642   if (val != val_so_far)
2643     abort ();
2644
2645   return accum;
2646 }
2647
2648 /* Perform a multiplication and return an rtx for the result.
2649    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
2650    TARGET is a suggestion for where to store the result (an rtx).
2651
2652    We check specially for a constant integer as OP1.
2653    If you want this check for OP0 as well, then before calling
2654    you should swap the two operands if OP0 would be constant.  */
2655
2656 rtx
2657 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
2658              int unsignedp)
2659 {
2660   rtx const_op1 = op1;
2661   enum mult_variant variant;
2662   struct algorithm algorithm;
2663
2664   /* synth_mult does an `unsigned int' multiply.  As long as the mode is
2665      less than or equal in size to `unsigned int' this doesn't matter.
2666      If the mode is larger than `unsigned int', then synth_mult works only
2667      if the constant value exactly fits in an `unsigned int' without any
2668      truncation.  This means that multiplying by negative values does
2669      not work; results are off by 2^32 on a 32 bit machine.  */
2670
2671   /* If we are multiplying in DImode, it may still be a win
2672      to try to work with shifts and adds.  */
2673   if (GET_CODE (op1) == CONST_DOUBLE
2674       && GET_MODE_CLASS (GET_MODE (op1)) == MODE_INT
2675       && HOST_BITS_PER_INT >= BITS_PER_WORD
2676       && CONST_DOUBLE_HIGH (op1) == 0)
2677     const_op1 = GEN_INT (CONST_DOUBLE_LOW (op1));
2678   else if (HOST_BITS_PER_INT < GET_MODE_BITSIZE (mode)
2679            && GET_CODE (op1) == CONST_INT
2680            && INTVAL (op1) < 0)
2681     const_op1 = 0;
2682
2683   /* We used to test optimize here, on the grounds that it's better to
2684      produce a smaller program when -O is not used.
2685      But this causes such a terrible slowdown sometimes
2686      that it seems better to use synth_mult always.  */
2687
2688   if (const_op1 && GET_CODE (const_op1) == CONST_INT
2689       && (unsignedp || !flag_trapv))
2690     {
2691       int mult_cost = rtx_cost (gen_rtx_MULT (mode, op0, op1), SET);
2692
2693       if (choose_mult_variant (mode, INTVAL (const_op1), &algorithm, &variant,
2694                                mult_cost))
2695         return expand_mult_const (mode, op0, INTVAL (const_op1), target,
2696                                   &algorithm, variant);
2697     }
2698
2699   if (GET_CODE (op0) == CONST_DOUBLE)
2700     {
2701       rtx temp = op0;
2702       op0 = op1;
2703       op1 = temp;
2704     }
2705
2706   /* Expand x*2.0 as x+x.  */
2707   if (GET_CODE (op1) == CONST_DOUBLE
2708       && GET_MODE_CLASS (mode) == MODE_FLOAT)
2709     {
2710       REAL_VALUE_TYPE d;
2711       REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
2712
2713       if (REAL_VALUES_EQUAL (d, dconst2))
2714         {
2715           op0 = force_reg (GET_MODE (op0), op0);
2716           return expand_binop (mode, add_optab, op0, op0,
2717                                target, unsignedp, OPTAB_LIB_WIDEN);
2718         }
2719     }
2720
2721   /* This used to use umul_optab if unsigned, but for non-widening multiply
2722      there is no difference between signed and unsigned.  */
2723   op0 = expand_binop (mode,
2724                       ! unsignedp
2725                       && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
2726                       ? smulv_optab : smul_optab,
2727                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
2728   if (op0 == 0)
2729     abort ();
2730   return op0;
2731 }
2732 \f
2733 /* Return the smallest n such that 2**n >= X.  */
2734
2735 int
2736 ceil_log2 (unsigned HOST_WIDE_INT x)
2737 {
2738   return floor_log2 (x - 1) + 1;
2739 }
2740
2741 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
2742    replace division by D, and put the least significant N bits of the result
2743    in *MULTIPLIER_PTR and return the most significant bit.
2744
2745    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
2746    needed precision is in PRECISION (should be <= N).
2747
2748    PRECISION should be as small as possible so this function can choose
2749    multiplier more freely.
2750
2751    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
2752    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
2753
2754    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
2755    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
2756
2757 static
2758 unsigned HOST_WIDE_INT
2759 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
2760                    unsigned HOST_WIDE_INT *multiplier_ptr,
2761                    int *post_shift_ptr, int *lgup_ptr)
2762 {
2763   HOST_WIDE_INT mhigh_hi, mlow_hi;
2764   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
2765   int lgup, post_shift;
2766   int pow, pow2;
2767   unsigned HOST_WIDE_INT nl, dummy1;
2768   HOST_WIDE_INT nh, dummy2;
2769
2770   /* lgup = ceil(log2(divisor)); */
2771   lgup = ceil_log2 (d);
2772
2773   if (lgup > n)
2774     abort ();
2775
2776   pow = n + lgup;
2777   pow2 = n + lgup - precision;
2778
2779   if (pow == 2 * HOST_BITS_PER_WIDE_INT)
2780     {
2781       /* We could handle this with some effort, but this case is much better
2782          handled directly with a scc insn, so rely on caller using that.  */
2783       abort ();
2784     }
2785
2786   /* mlow = 2^(N + lgup)/d */
2787  if (pow >= HOST_BITS_PER_WIDE_INT)
2788     {
2789       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
2790       nl = 0;
2791     }
2792   else
2793     {
2794       nh = 0;
2795       nl = (unsigned HOST_WIDE_INT) 1 << pow;
2796     }
2797   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2798                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
2799
2800   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
2801   if (pow2 >= HOST_BITS_PER_WIDE_INT)
2802     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
2803   else
2804     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
2805   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2806                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
2807
2808   if (mhigh_hi && nh - d >= d)
2809     abort ();
2810   if (mhigh_hi > 1 || mlow_hi > 1)
2811     abort ();
2812   /* Assert that mlow < mhigh.  */
2813   if (! (mlow_hi < mhigh_hi || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo)))
2814     abort ();
2815
2816   /* If precision == N, then mlow, mhigh exceed 2^N
2817      (but they do not exceed 2^(N+1)).  */
2818
2819   /* Reduce to lowest terms.  */
2820   for (post_shift = lgup; post_shift > 0; post_shift--)
2821     {
2822       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
2823       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
2824       if (ml_lo >= mh_lo)
2825         break;
2826
2827       mlow_hi = 0;
2828       mlow_lo = ml_lo;
2829       mhigh_hi = 0;
2830       mhigh_lo = mh_lo;
2831     }
2832
2833   *post_shift_ptr = post_shift;
2834   *lgup_ptr = lgup;
2835   if (n < HOST_BITS_PER_WIDE_INT)
2836     {
2837       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
2838       *multiplier_ptr = mhigh_lo & mask;
2839       return mhigh_lo >= mask;
2840     }
2841   else
2842     {
2843       *multiplier_ptr = mhigh_lo;
2844       return mhigh_hi;
2845     }
2846 }
2847
2848 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
2849    congruent to 1 (mod 2**N).  */
2850
2851 static unsigned HOST_WIDE_INT
2852 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
2853 {
2854   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
2855
2856   /* The algorithm notes that the choice y = x satisfies
2857      x*y == 1 mod 2^3, since x is assumed odd.
2858      Each iteration doubles the number of bits of significance in y.  */
2859
2860   unsigned HOST_WIDE_INT mask;
2861   unsigned HOST_WIDE_INT y = x;
2862   int nbit = 3;
2863
2864   mask = (n == HOST_BITS_PER_WIDE_INT
2865           ? ~(unsigned HOST_WIDE_INT) 0
2866           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
2867
2868   while (nbit < n)
2869     {
2870       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
2871       nbit *= 2;
2872     }
2873   return y;
2874 }
2875
2876 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
2877    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
2878    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
2879    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
2880    become signed.
2881
2882    The result is put in TARGET if that is convenient.
2883
2884    MODE is the mode of operation.  */
2885
2886 rtx
2887 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
2888                              rtx op1, rtx target, int unsignedp)
2889 {
2890   rtx tem;
2891   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
2892
2893   tem = expand_shift (RSHIFT_EXPR, mode, op0,
2894                       build_int_2 (GET_MODE_BITSIZE (mode) - 1, 0),
2895                       NULL_RTX, 0);
2896   tem = expand_and (mode, tem, op1, NULL_RTX);
2897   adj_operand
2898     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
2899                      adj_operand);
2900
2901   tem = expand_shift (RSHIFT_EXPR, mode, op1,
2902                       build_int_2 (GET_MODE_BITSIZE (mode) - 1, 0),
2903                       NULL_RTX, 0);
2904   tem = expand_and (mode, tem, op0, NULL_RTX);
2905   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
2906                           target);
2907
2908   return target;
2909 }
2910
2911 /* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */
2912
2913 static rtx
2914 extract_high_half (enum machine_mode mode, rtx op)
2915 {
2916   enum machine_mode wider_mode;
2917
2918   if (mode == word_mode)
2919     return gen_highpart (mode, op);
2920
2921   wider_mode = GET_MODE_WIDER_MODE (mode);
2922   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
2923                      build_int_2 (GET_MODE_BITSIZE (mode), 0), 0, 1);
2924   return convert_modes (mode, wider_mode, op, 0);
2925 }
2926
2927 /* Like expand_mult_highpart, but only consider using a multiplication
2928    optab.  OP1 is an rtx for the constant operand.  */
2929
2930 static rtx
2931 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
2932                             rtx target, int unsignedp, int max_cost)
2933 {
2934   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
2935   enum machine_mode wider_mode;
2936   optab moptab;
2937   rtx tem;
2938   int size;
2939
2940   wider_mode = GET_MODE_WIDER_MODE (mode);
2941   size = GET_MODE_BITSIZE (mode);
2942
2943   /* Firstly, try using a multiplication insn that only generates the needed
2944      high part of the product, and in the sign flavor of unsignedp.  */
2945   if (mul_highpart_cost[mode] < max_cost)
2946     {
2947       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
2948       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
2949                           unsignedp, OPTAB_DIRECT);
2950       if (tem)
2951         return tem;
2952     }
2953
2954   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
2955      Need to adjust the result after the multiplication.  */
2956   if (size - 1 < BITS_PER_WORD
2957       && (mul_highpart_cost[mode] + 2 * shift_cost[mode][size-1]
2958           + 4 * add_cost[mode] < max_cost))
2959     {
2960       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
2961       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
2962                           unsignedp, OPTAB_DIRECT);
2963       if (tem)
2964         /* We used the wrong signedness.  Adjust the result.  */
2965         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
2966                                             tem, unsignedp);
2967     }
2968
2969   /* Try widening multiplication.  */
2970   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
2971   if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
2972       && mul_widen_cost[wider_mode] < max_cost)
2973     {
2974       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
2975                           unsignedp, OPTAB_WIDEN);
2976       if (tem)
2977         return extract_high_half (mode, tem);
2978     }
2979
2980   /* Try widening the mode and perform a non-widening multiplication.  */
2981   moptab = smul_optab;
2982   if (smul_optab->handlers[wider_mode].insn_code != CODE_FOR_nothing
2983       && size - 1 < BITS_PER_WORD
2984       && mul_cost[wider_mode] + shift_cost[mode][size-1] < max_cost)
2985     {
2986       tem = expand_binop (wider_mode, moptab, op0, op1, 0,
2987                           unsignedp, OPTAB_WIDEN);
2988       if (tem)
2989         return extract_high_half (mode, tem);
2990     }
2991
2992   /* Try widening multiplication of opposite signedness, and adjust.  */
2993   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
2994   if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
2995       && size - 1 < BITS_PER_WORD
2996       && (mul_widen_cost[wider_mode] + 2 * shift_cost[mode][size-1]
2997           + 4 * add_cost[mode] < max_cost))
2998     {
2999       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3000                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3001       if (tem != 0)
3002         {
3003           tem = extract_high_half (mode, tem);
3004           /* We used the wrong signedness.  Adjust the result.  */
3005           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3006                                               target, unsignedp);
3007         }
3008     }
3009
3010   return 0;
3011 }
3012
3013 /* Emit code to multiply OP0 and CNST1, putting the high half of the result
3014    in TARGET if that is convenient, and return where the result is.  If the
3015    operation can not be performed, 0 is returned.
3016
3017    MODE is the mode of operation and result.
3018
3019    UNSIGNEDP nonzero means unsigned multiply.
3020
3021    MAX_COST is the total allowed cost for the expanded RTL.  */
3022
3023 rtx
3024 expand_mult_highpart (enum machine_mode mode, rtx op0,
3025                       unsigned HOST_WIDE_INT cnst1, rtx target,
3026                       int unsignedp, int max_cost)
3027 {
3028   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3029   int extra_cost;
3030   bool sign_adjust = false;
3031   enum mult_variant variant;
3032   struct algorithm alg;
3033   rtx op1, tem;
3034
3035   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3036   if (GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3037     abort ();
3038
3039   op1 = gen_int_mode (cnst1, wider_mode);
3040   cnst1 &= GET_MODE_MASK (mode);
3041
3042   /* We can't optimize modes wider than BITS_PER_WORD.
3043      ??? We might be able to perform double-word arithmetic if
3044      mode == word_mode, however all the cost calculations in
3045      synth_mult etc. assume single-word operations.  */
3046   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3047     return expand_mult_highpart_optab (mode, op0, op1, target,
3048                                        unsignedp, max_cost);
3049
3050   extra_cost = shift_cost[mode][GET_MODE_BITSIZE (mode) - 1];
3051
3052   /* Check whether we try to multiply by a negative constant.  */
3053   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3054     {
3055       sign_adjust = true;
3056       extra_cost += add_cost[mode];
3057     }
3058
3059   /* See whether shift/add multiplication is cheap enough.  */
3060   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3061                            max_cost - extra_cost))
3062     {
3063       /* See whether the specialized multiplication optabs are
3064          cheaper than the shift/add version.  */
3065       tem = expand_mult_highpart_optab (mode, op0, op1, target,
3066                                         unsignedp, alg.cost + extra_cost);
3067       if (tem)
3068         return tem;
3069
3070       tem = convert_to_mode (wider_mode, op0, unsignedp);
3071       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3072       tem = extract_high_half (mode, tem);
3073
3074       /* Adjust result for signedness.  */
3075       if (sign_adjust)
3076         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3077
3078       return tem;
3079     }
3080   return expand_mult_highpart_optab (mode, op0, op1, target,
3081                                      unsignedp, max_cost);
3082 }
3083
3084
3085 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3086
3087 static rtx
3088 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3089 {
3090   unsigned HOST_WIDE_INT mask;
3091   rtx result, temp, shift, label;
3092   int logd;
3093
3094   logd = floor_log2 (d);
3095   result = gen_reg_rtx (mode);
3096
3097   /* Avoid conditional branches when they're expensive.  */
3098   if (BRANCH_COST >= 2
3099       && !optimize_size)
3100     {
3101       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3102                                       mode, 0, -1);
3103       if (signmask)
3104         {
3105           signmask = force_reg (mode, signmask);
3106           mask = ((HOST_WIDE_INT) 1 << logd) - 1;
3107           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3108
3109           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3110              which instruction sequence to use.  If logical right shifts
3111              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3112              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3113
3114           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3115           if (lshr_optab->handlers[mode].insn_code == CODE_FOR_nothing
3116               || rtx_cost (temp, SET) > COSTS_N_INSNS (2))
3117             {
3118               temp = expand_binop (mode, xor_optab, op0, signmask,
3119                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3120               temp = expand_binop (mode, sub_optab, temp, signmask,
3121                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3122               temp = expand_binop (mode, and_optab, temp, GEN_INT (mask),
3123                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3124               temp = expand_binop (mode, xor_optab, temp, signmask,
3125                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3126               temp = expand_binop (mode, sub_optab, temp, signmask,
3127                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3128             }
3129           else
3130             {
3131               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3132                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3133               signmask = force_reg (mode, signmask);
3134
3135               temp = expand_binop (mode, add_optab, op0, signmask,
3136                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3137               temp = expand_binop (mode, and_optab, temp, GEN_INT (mask),
3138                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3139               temp = expand_binop (mode, sub_optab, temp, signmask,
3140                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3141             }
3142           return temp;
3143         }
3144     }
3145
3146   /* Mask contains the mode's signbit and the significant bits of the
3147      modulus.  By including the signbit in the operation, many targets
3148      can avoid an explicit compare operation in the following comparison
3149      against zero.  */
3150
3151   mask = (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1)
3152          | (((HOST_WIDE_INT) 1 << logd) - 1);
3153
3154   temp = expand_binop (mode, and_optab, op0, GEN_INT (mask), result,
3155                        1, OPTAB_LIB_WIDEN);
3156   if (temp != result)
3157     emit_move_insn (result, temp);
3158
3159   label = gen_label_rtx ();
3160   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3161
3162   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3163                        0, OPTAB_LIB_WIDEN);
3164   mask = (HOST_WIDE_INT) -1 << logd;
3165   temp = expand_binop (mode, ior_optab, temp, GEN_INT (mask), result,
3166                        1, OPTAB_LIB_WIDEN);
3167   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3168                        0, OPTAB_LIB_WIDEN);
3169   if (temp != result)
3170     emit_move_insn (result, temp);
3171   emit_label (label);
3172   return result;
3173 }
3174
3175 /* Expand signed division of OP0 by a power of two D in mode MODE.
3176    This routine is only called for positive values of D.  */
3177
3178 static rtx
3179 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3180 {
3181   rtx temp, label;
3182   tree shift;
3183   int logd;
3184
3185   logd = floor_log2 (d);
3186   shift = build_int_2 (logd, 0);
3187
3188   if (d == 2 && BRANCH_COST >= 1)
3189     {
3190       temp = gen_reg_rtx (mode);
3191       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3192       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3193                            0, OPTAB_LIB_WIDEN);
3194       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3195     }
3196
3197 #ifdef HAVE_conditional_move
3198   if (BRANCH_COST >= 2)
3199     {
3200       rtx temp2;
3201
3202       start_sequence ();
3203       temp2 = copy_to_mode_reg (mode, op0);
3204       temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3205                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3206       temp = force_reg (mode, temp);
3207
3208       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3209       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3210                                      mode, temp, temp2, mode, 0);
3211       if (temp2)
3212         {
3213           rtx seq = get_insns ();
3214           end_sequence ();
3215           emit_insn (seq);
3216           return expand_shift (RSHIFT_EXPR, mode, temp2, shift, NULL_RTX, 0);
3217         }
3218       end_sequence ();
3219     }
3220 #endif
3221
3222   if (BRANCH_COST >= 2)
3223     {
3224       int ushift = GET_MODE_BITSIZE (mode) - logd;
3225
3226       temp = gen_reg_rtx (mode);
3227       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3228       if (shift_cost[mode][ushift] > COSTS_N_INSNS (1))
3229         temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3230                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3231       else
3232         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3233                              build_int_2 (ushift, 0), NULL_RTX, 1);
3234       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3235                            0, OPTAB_LIB_WIDEN);
3236       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3237     }
3238
3239   label = gen_label_rtx ();
3240   temp = copy_to_mode_reg (mode, op0);
3241   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3242   expand_inc (temp, GEN_INT (d - 1));
3243   emit_label (label);
3244   return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3245 }
3246 \f
3247 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3248    if that is convenient, and returning where the result is.
3249    You may request either the quotient or the remainder as the result;
3250    specify REM_FLAG nonzero to get the remainder.
3251
3252    CODE is the expression code for which kind of division this is;
3253    it controls how rounding is done.  MODE is the machine mode to use.
3254    UNSIGNEDP nonzero means do unsigned division.  */
3255
3256 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3257    and then correct it by or'ing in missing high bits
3258    if result of ANDI is nonzero.
3259    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3260    This could optimize to a bfexts instruction.
3261    But C doesn't use these operations, so their optimizations are
3262    left for later.  */
3263 /* ??? For modulo, we don't actually need the highpart of the first product,
3264    the low part will do nicely.  And for small divisors, the second multiply
3265    can also be a low-part only multiply or even be completely left out.
3266    E.g. to calculate the remainder of a division by 3 with a 32 bit
3267    multiply, multiply with 0x55555556 and extract the upper two bits;
3268    the result is exact for inputs up to 0x1fffffff.
3269    The input range can be reduced by using cross-sum rules.
3270    For odd divisors >= 3, the following table gives right shift counts
3271    so that if a number is shifted by an integer multiple of the given
3272    amount, the remainder stays the same:
3273    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3274    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3275    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3276    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3277    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3278
3279    Cross-sum rules for even numbers can be derived by leaving as many bits
3280    to the right alone as the divisor has zeros to the right.
3281    E.g. if x is an unsigned 32 bit number:
3282    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3283    */
3284
3285 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
3286
3287 rtx
3288 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3289                rtx op0, rtx op1, rtx target, int unsignedp)
3290 {
3291   enum machine_mode compute_mode;
3292   rtx tquotient;
3293   rtx quotient = 0, remainder = 0;
3294   rtx last;
3295   int size;
3296   rtx insn, set;
3297   optab optab1, optab2;
3298   int op1_is_constant, op1_is_pow2 = 0;
3299   int max_cost, extra_cost;
3300   static HOST_WIDE_INT last_div_const = 0;
3301   static HOST_WIDE_INT ext_op1;
3302
3303   op1_is_constant = GET_CODE (op1) == CONST_INT;
3304   if (op1_is_constant)
3305     {
3306       ext_op1 = INTVAL (op1);
3307       if (unsignedp)
3308         ext_op1 &= GET_MODE_MASK (mode);
3309       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3310                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3311     }
3312
3313   /*
3314      This is the structure of expand_divmod:
3315
3316      First comes code to fix up the operands so we can perform the operations
3317      correctly and efficiently.
3318
3319      Second comes a switch statement with code specific for each rounding mode.
3320      For some special operands this code emits all RTL for the desired
3321      operation, for other cases, it generates only a quotient and stores it in
3322      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3323      to indicate that it has not done anything.
3324
3325      Last comes code that finishes the operation.  If QUOTIENT is set and
3326      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3327      QUOTIENT is not set, it is computed using trunc rounding.
3328
3329      We try to generate special code for division and remainder when OP1 is a
3330      constant.  If |OP1| = 2**n we can use shifts and some other fast
3331      operations.  For other values of OP1, we compute a carefully selected
3332      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3333      by m.
3334
3335      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3336      half of the product.  Different strategies for generating the product are
3337      implemented in expand_mult_highpart.
3338
3339      If what we actually want is the remainder, we generate that by another
3340      by-constant multiplication and a subtraction.  */
3341
3342   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3343      code below will malfunction if we are, so check here and handle
3344      the special case if so.  */
3345   if (op1 == const1_rtx)
3346     return rem_flag ? const0_rtx : op0;
3347
3348     /* When dividing by -1, we could get an overflow.
3349      negv_optab can handle overflows.  */
3350   if (! unsignedp && op1 == constm1_rtx)
3351     {
3352       if (rem_flag)
3353         return const0_rtx;
3354       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3355                           ? negv_optab : neg_optab, op0, target, 0);
3356     }
3357
3358   if (target
3359       /* Don't use the function value register as a target
3360          since we have to read it as well as write it,
3361          and function-inlining gets confused by this.  */
3362       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3363           /* Don't clobber an operand while doing a multi-step calculation.  */
3364           || ((rem_flag || op1_is_constant)
3365               && (reg_mentioned_p (target, op0)
3366                   || (MEM_P (op0) && MEM_P (target))))
3367           || reg_mentioned_p (target, op1)
3368           || (MEM_P (op1) && MEM_P (target))))
3369     target = 0;
3370
3371   /* Get the mode in which to perform this computation.  Normally it will
3372      be MODE, but sometimes we can't do the desired operation in MODE.
3373      If so, pick a wider mode in which we can do the operation.  Convert
3374      to that mode at the start to avoid repeated conversions.
3375
3376      First see what operations we need.  These depend on the expression
3377      we are evaluating.  (We assume that divxx3 insns exist under the
3378      same conditions that modxx3 insns and that these insns don't normally
3379      fail.  If these assumptions are not correct, we may generate less
3380      efficient code in some cases.)
3381
3382      Then see if we find a mode in which we can open-code that operation
3383      (either a division, modulus, or shift).  Finally, check for the smallest
3384      mode for which we can do the operation with a library call.  */
3385
3386   /* We might want to refine this now that we have division-by-constant
3387      optimization.  Since expand_mult_highpart tries so many variants, it is
3388      not straightforward to generalize this.  Maybe we should make an array
3389      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3390
3391   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3392             ? (unsignedp ? lshr_optab : ashr_optab)
3393             : (unsignedp ? udiv_optab : sdiv_optab));
3394   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3395             ? optab1
3396             : (unsignedp ? udivmod_optab : sdivmod_optab));
3397
3398   for (compute_mode = mode; compute_mode != VOIDmode;
3399        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3400     if (optab1->handlers[compute_mode].insn_code != CODE_FOR_nothing
3401         || optab2->handlers[compute_mode].insn_code != CODE_FOR_nothing)
3402       break;
3403
3404   if (compute_mode == VOIDmode)
3405     for (compute_mode = mode; compute_mode != VOIDmode;
3406          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3407       if (optab1->handlers[compute_mode].libfunc
3408           || optab2->handlers[compute_mode].libfunc)
3409         break;
3410
3411   /* If we still couldn't find a mode, use MODE, but we'll probably abort
3412      in expand_binop.  */
3413   if (compute_mode == VOIDmode)
3414     compute_mode = mode;
3415
3416   if (target && GET_MODE (target) == compute_mode)
3417     tquotient = target;
3418   else
3419     tquotient = gen_reg_rtx (compute_mode);
3420
3421   size = GET_MODE_BITSIZE (compute_mode);
3422 #if 0
3423   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3424      (mode), and thereby get better code when OP1 is a constant.  Do that
3425      later.  It will require going over all usages of SIZE below.  */
3426   size = GET_MODE_BITSIZE (mode);
3427 #endif
3428
3429   /* Only deduct something for a REM if the last divide done was
3430      for a different constant.   Then set the constant of the last
3431      divide.  */
3432   max_cost = div_cost[compute_mode]
3433     - (rem_flag && ! (last_div_const != 0 && op1_is_constant
3434                       && INTVAL (op1) == last_div_const)
3435        ? mul_cost[compute_mode] + add_cost[compute_mode]
3436        : 0);
3437
3438   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3439
3440   /* Now convert to the best mode to use.  */
3441   if (compute_mode != mode)
3442     {
3443       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3444       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3445
3446       /* convert_modes may have placed op1 into a register, so we
3447          must recompute the following.  */
3448       op1_is_constant = GET_CODE (op1) == CONST_INT;
3449       op1_is_pow2 = (op1_is_constant
3450                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3451                           || (! unsignedp
3452                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3453     }
3454
3455   /* If one of the operands is a volatile MEM, copy it into a register.  */
3456
3457   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3458     op0 = force_reg (compute_mode, op0);
3459   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3460     op1 = force_reg (compute_mode, op1);
3461
3462   /* If we need the remainder or if OP1 is constant, we need to
3463      put OP0 in a register in case it has any queued subexpressions.  */
3464   if (rem_flag || op1_is_constant)
3465     op0 = force_reg (compute_mode, op0);
3466
3467   last = get_last_insn ();
3468
3469   /* Promote floor rounding to trunc rounding for unsigned operations.  */
3470   if (unsignedp)
3471     {
3472       if (code == FLOOR_DIV_EXPR)
3473         code = TRUNC_DIV_EXPR;
3474       if (code == FLOOR_MOD_EXPR)
3475         code = TRUNC_MOD_EXPR;
3476       if (code == EXACT_DIV_EXPR && op1_is_pow2)
3477         code = TRUNC_DIV_EXPR;
3478     }
3479
3480   if (op1 != const0_rtx)
3481     switch (code)
3482       {
3483       case TRUNC_MOD_EXPR:
3484       case TRUNC_DIV_EXPR:
3485         if (op1_is_constant)
3486           {
3487             if (unsignedp)
3488               {
3489                 unsigned HOST_WIDE_INT mh, ml;
3490                 int pre_shift, post_shift;
3491                 int dummy;
3492                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
3493                                             & GET_MODE_MASK (compute_mode));
3494
3495                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3496                   {
3497                     pre_shift = floor_log2 (d);
3498                     if (rem_flag)
3499                       {
3500                         remainder
3501                           = expand_binop (compute_mode, and_optab, op0,
3502                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3503                                           remainder, 1,
3504                                           OPTAB_LIB_WIDEN);
3505                         if (remainder)
3506                           return gen_lowpart (mode, remainder);
3507                       }
3508                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3509                                              build_int_2 (pre_shift, 0),
3510                                              tquotient, 1);
3511                   }
3512                 else if (size <= HOST_BITS_PER_WIDE_INT)
3513                   {
3514                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
3515                       {
3516                         /* Most significant bit of divisor is set; emit an scc
3517                            insn.  */
3518                         quotient = emit_store_flag (tquotient, GEU, op0, op1,
3519                                                     compute_mode, 1, 1);
3520                         if (quotient == 0)
3521                           goto fail1;
3522                       }
3523                     else
3524                       {
3525                         /* Find a suitable multiplier and right shift count
3526                            instead of multiplying with D.  */
3527
3528                         mh = choose_multiplier (d, size, size,
3529                                                 &ml, &post_shift, &dummy);
3530
3531                         /* If the suggested multiplier is more than SIZE bits,
3532                            we can do better for even divisors, using an
3533                            initial right shift.  */
3534                         if (mh != 0 && (d & 1) == 0)
3535                           {
3536                             pre_shift = floor_log2 (d & -d);
3537                             mh = choose_multiplier (d >> pre_shift, size,
3538                                                     size - pre_shift,
3539                                                     &ml, &post_shift, &dummy);
3540                             if (mh)
3541                               abort ();
3542                           }
3543                         else
3544                           pre_shift = 0;
3545
3546                         if (mh != 0)
3547                           {
3548                             rtx t1, t2, t3, t4;
3549
3550                             if (post_shift - 1 >= BITS_PER_WORD)
3551                               goto fail1;
3552
3553                             extra_cost
3554                               = (shift_cost[compute_mode][post_shift - 1]
3555                                  + shift_cost[compute_mode][1]
3556                                  + 2 * add_cost[compute_mode]);
3557                             t1 = expand_mult_highpart (compute_mode, op0, ml,
3558                                                        NULL_RTX, 1,
3559                                                        max_cost - extra_cost);
3560                             if (t1 == 0)
3561                               goto fail1;
3562                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
3563                                                                op0, t1),
3564                                                 NULL_RTX);
3565                             t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3566                                                build_int_2 (1, 0), NULL_RTX,1);
3567                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
3568                                                               t1, t3),
3569                                                 NULL_RTX);
3570                             quotient
3571                               = expand_shift (RSHIFT_EXPR, compute_mode, t4,
3572                                               build_int_2 (post_shift - 1, 0),
3573                                               tquotient, 1);
3574                           }
3575                         else
3576                           {
3577                             rtx t1, t2;
3578
3579                             if (pre_shift >= BITS_PER_WORD
3580                                 || post_shift >= BITS_PER_WORD)
3581                               goto fail1;
3582
3583                             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3584                                                build_int_2 (pre_shift, 0),
3585                                                NULL_RTX, 1);
3586                             extra_cost
3587                               = (shift_cost[compute_mode][pre_shift]
3588                                  + shift_cost[compute_mode][post_shift]);
3589                             t2 = expand_mult_highpart (compute_mode, t1, ml,
3590                                                        NULL_RTX, 1,
3591                                                        max_cost - extra_cost);
3592                             if (t2 == 0)
3593                               goto fail1;
3594                             quotient
3595                               = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3596                                               build_int_2 (post_shift, 0),
3597                                               tquotient, 1);
3598                           }
3599                       }
3600                   }
3601                 else            /* Too wide mode to use tricky code */
3602                   break;
3603
3604                 insn = get_last_insn ();
3605                 if (insn != last
3606                     && (set = single_set (insn)) != 0
3607                     && SET_DEST (set) == quotient)
3608                   set_unique_reg_note (insn,
3609                                        REG_EQUAL,
3610                                        gen_rtx_UDIV (compute_mode, op0, op1));
3611               }
3612             else                /* TRUNC_DIV, signed */
3613               {
3614                 unsigned HOST_WIDE_INT ml;
3615                 int lgup, post_shift;
3616                 HOST_WIDE_INT d = INTVAL (op1);
3617                 unsigned HOST_WIDE_INT abs_d = d >= 0 ? d : -d;
3618
3619                 /* n rem d = n rem -d */
3620                 if (rem_flag && d < 0)
3621                   {
3622                     d = abs_d;
3623                     op1 = gen_int_mode (abs_d, compute_mode);
3624                   }
3625
3626                 if (d == 1)
3627                   quotient = op0;
3628                 else if (d == -1)
3629                   quotient = expand_unop (compute_mode, neg_optab, op0,
3630                                           tquotient, 0);
3631                 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
3632                   {
3633                     /* This case is not handled correctly below.  */
3634                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
3635                                                 compute_mode, 1, 1);
3636                     if (quotient == 0)
3637                       goto fail1;
3638                   }
3639                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
3640                          && (rem_flag ? smod_pow2_cheap[compute_mode]
3641                                       : sdiv_pow2_cheap[compute_mode])
3642                          /* We assume that cheap metric is true if the
3643                             optab has an expander for this mode.  */
3644                          && (((rem_flag ? smod_optab : sdiv_optab)
3645                               ->handlers[compute_mode].insn_code
3646                               != CODE_FOR_nothing)
3647                              || (sdivmod_optab->handlers[compute_mode]
3648                                  .insn_code != CODE_FOR_nothing)))
3649                   ;
3650                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
3651                   {
3652                     if (rem_flag)
3653                       {
3654                         remainder = expand_smod_pow2 (compute_mode, op0, d);
3655                         if (remainder)
3656                           return gen_lowpart (mode, remainder);
3657                       }
3658                     quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
3659
3660                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
3661                        negate the quotient.  */
3662                     if (d < 0)
3663                       {
3664                         insn = get_last_insn ();
3665                         if (insn != last
3666                             && (set = single_set (insn)) != 0
3667                             && SET_DEST (set) == quotient
3668                             && abs_d < ((unsigned HOST_WIDE_INT) 1
3669                                         << (HOST_BITS_PER_WIDE_INT - 1)))
3670                           set_unique_reg_note (insn,
3671                                                REG_EQUAL,
3672                                                gen_rtx_DIV (compute_mode,
3673                                                             op0,
3674                                                             GEN_INT
3675                                                             (trunc_int_for_mode
3676                                                              (abs_d,
3677                                                               compute_mode))));
3678
3679                         quotient = expand_unop (compute_mode, neg_optab,
3680                                                 quotient, quotient, 0);
3681                       }
3682                   }
3683                 else if (size <= HOST_BITS_PER_WIDE_INT)
3684                   {
3685                     choose_multiplier (abs_d, size, size - 1,
3686                                        &ml, &post_shift, &lgup);
3687                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
3688                       {
3689                         rtx t1, t2, t3;
3690
3691                         if (post_shift >= BITS_PER_WORD
3692                             || size - 1 >= BITS_PER_WORD)
3693                           goto fail1;
3694
3695                         extra_cost = (shift_cost[compute_mode][post_shift]
3696                                       + shift_cost[compute_mode][size - 1]
3697                                       + add_cost[compute_mode]);
3698                         t1 = expand_mult_highpart (compute_mode, op0, ml,
3699                                                    NULL_RTX, 0,
3700                                                    max_cost - extra_cost);
3701                         if (t1 == 0)
3702                           goto fail1;
3703                         t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3704                                            build_int_2 (post_shift, 0), NULL_RTX, 0);
3705                         t3 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3706                                            build_int_2 (size - 1, 0), NULL_RTX, 0);
3707                         if (d < 0)
3708                           quotient
3709                             = force_operand (gen_rtx_MINUS (compute_mode,
3710                                                             t3, t2),
3711                                              tquotient);
3712                         else
3713                           quotient
3714                             = force_operand (gen_rtx_MINUS (compute_mode,
3715                                                             t2, t3),
3716                                              tquotient);
3717                       }
3718                     else
3719                       {
3720                         rtx t1, t2, t3, t4;
3721
3722                         if (post_shift >= BITS_PER_WORD
3723                             || size - 1 >= BITS_PER_WORD)
3724                           goto fail1;
3725
3726                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
3727                         extra_cost = (shift_cost[compute_mode][post_shift]
3728                                       + shift_cost[compute_mode][size - 1]
3729                                       + 2 * add_cost[compute_mode]);
3730                         t1 = expand_mult_highpart (compute_mode, op0, ml,
3731                                                    NULL_RTX, 0,
3732                                                    max_cost - extra_cost);
3733                         if (t1 == 0)
3734                           goto fail1;
3735                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
3736                                                           t1, op0),
3737                                             NULL_RTX);
3738                         t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3739                                            build_int_2 (post_shift, 0),
3740                                            NULL_RTX, 0);
3741                         t4 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3742                                            build_int_2 (size - 1, 0),
3743                                            NULL_RTX, 0);
3744                         if (d < 0)
3745                           quotient
3746                             = force_operand (gen_rtx_MINUS (compute_mode,
3747                                                             t4, t3),
3748                                              tquotient);
3749                         else
3750                           quotient
3751                             = force_operand (gen_rtx_MINUS (compute_mode,
3752                                                             t3, t4),
3753                                              tquotient);
3754                       }
3755                   }
3756                 else            /* Too wide mode to use tricky code */
3757                   break;
3758
3759                 insn = get_last_insn ();
3760                 if (insn != last
3761                     && (set = single_set (insn)) != 0
3762                     && SET_DEST (set) == quotient)
3763                   set_unique_reg_note (insn,
3764                                        REG_EQUAL,
3765                                        gen_rtx_DIV (compute_mode, op0, op1));
3766               }
3767             break;
3768           }
3769       fail1:
3770         delete_insns_since (last);
3771         break;
3772
3773       case FLOOR_DIV_EXPR:
3774       case FLOOR_MOD_EXPR:
3775       /* We will come here only for signed operations.  */
3776         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
3777           {
3778             unsigned HOST_WIDE_INT mh, ml;
3779             int pre_shift, lgup, post_shift;
3780             HOST_WIDE_INT d = INTVAL (op1);
3781
3782             if (d > 0)
3783               {
3784                 /* We could just as easily deal with negative constants here,
3785                    but it does not seem worth the trouble for GCC 2.6.  */
3786                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3787                   {
3788                     pre_shift = floor_log2 (d);
3789                     if (rem_flag)
3790                       {
3791                         remainder = expand_binop (compute_mode, and_optab, op0,
3792                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3793                                                   remainder, 0, OPTAB_LIB_WIDEN);
3794                         if (remainder)
3795                           return gen_lowpart (mode, remainder);
3796                       }
3797                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3798                                              build_int_2 (pre_shift, 0),
3799                                              tquotient, 0);
3800                   }
3801                 else
3802                   {
3803                     rtx t1, t2, t3, t4;
3804
3805                     mh = choose_multiplier (d, size, size - 1,
3806                                             &ml, &post_shift, &lgup);
3807                     if (mh)
3808                       abort ();
3809
3810                     if (post_shift < BITS_PER_WORD
3811                         && size - 1 < BITS_PER_WORD)
3812                       {
3813                         t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3814                                            build_int_2 (size - 1, 0),
3815                                            NULL_RTX, 0);
3816                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
3817                                            NULL_RTX, 0, OPTAB_WIDEN);
3818                         extra_cost = (shift_cost[compute_mode][post_shift]
3819                                       + shift_cost[compute_mode][size - 1]
3820                                       + 2 * add_cost[compute_mode]);
3821                         t3 = expand_mult_highpart (compute_mode, t2, ml,
3822                                                    NULL_RTX, 1,
3823                                                    max_cost - extra_cost);
3824                         if (t3 != 0)
3825                           {
3826                             t4 = expand_shift (RSHIFT_EXPR, compute_mode, t3,
3827                                                build_int_2 (post_shift, 0),
3828                                                NULL_RTX, 1);
3829                             quotient = expand_binop (compute_mode, xor_optab,
3830                                                      t4, t1, tquotient, 0,
3831                                                      OPTAB_WIDEN);
3832                           }
3833                       }
3834                   }
3835               }
3836             else
3837               {
3838                 rtx nsign, t1, t2, t3, t4;
3839                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
3840                                                   op0, constm1_rtx), NULL_RTX);
3841                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
3842                                    0, OPTAB_WIDEN);
3843                 nsign = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3844                                       build_int_2 (size - 1, 0), NULL_RTX, 0);
3845                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
3846                                     NULL_RTX);
3847                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
3848                                     NULL_RTX, 0);
3849                 if (t4)
3850                   {
3851                     rtx t5;
3852                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
3853                                       NULL_RTX, 0);
3854                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
3855                                                             t4, t5),
3856                                               tquotient);
3857                   }
3858               }
3859           }
3860
3861         if (quotient != 0)
3862           break;
3863         delete_insns_since (last);
3864
3865         /* Try using an instruction that produces both the quotient and
3866            remainder, using truncation.  We can easily compensate the quotient
3867            or remainder to get floor rounding, once we have the remainder.
3868            Notice that we compute also the final remainder value here,
3869            and return the result right away.  */
3870         if (target == 0 || GET_MODE (target) != compute_mode)
3871           target = gen_reg_rtx (compute_mode);
3872
3873         if (rem_flag)
3874           {
3875             remainder
3876               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
3877             quotient = gen_reg_rtx (compute_mode);
3878           }
3879         else
3880           {
3881             quotient
3882               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
3883             remainder = gen_reg_rtx (compute_mode);
3884           }
3885
3886         if (expand_twoval_binop (sdivmod_optab, op0, op1,
3887                                  quotient, remainder, 0))
3888           {
3889             /* This could be computed with a branch-less sequence.
3890                Save that for later.  */
3891             rtx tem;
3892             rtx label = gen_label_rtx ();
3893             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
3894             tem = expand_binop (compute_mode, xor_optab, op0, op1,
3895                                 NULL_RTX, 0, OPTAB_WIDEN);
3896             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
3897             expand_dec (quotient, const1_rtx);
3898             expand_inc (remainder, op1);
3899             emit_label (label);
3900             return gen_lowpart (mode, rem_flag ? remainder : quotient);
3901           }
3902
3903         /* No luck with division elimination or divmod.  Have to do it
3904            by conditionally adjusting op0 *and* the result.  */
3905         {
3906           rtx label1, label2, label3, label4, label5;
3907           rtx adjusted_op0;
3908           rtx tem;
3909
3910           quotient = gen_reg_rtx (compute_mode);
3911           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3912           label1 = gen_label_rtx ();
3913           label2 = gen_label_rtx ();
3914           label3 = gen_label_rtx ();
3915           label4 = gen_label_rtx ();
3916           label5 = gen_label_rtx ();
3917           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
3918           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
3919           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3920                               quotient, 0, OPTAB_LIB_WIDEN);
3921           if (tem != quotient)
3922             emit_move_insn (quotient, tem);
3923           emit_jump_insn (gen_jump (label5));
3924           emit_barrier ();
3925           emit_label (label1);
3926           expand_inc (adjusted_op0, const1_rtx);
3927           emit_jump_insn (gen_jump (label4));
3928           emit_barrier ();
3929           emit_label (label2);
3930           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
3931           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3932                               quotient, 0, OPTAB_LIB_WIDEN);
3933           if (tem != quotient)
3934             emit_move_insn (quotient, tem);
3935           emit_jump_insn (gen_jump (label5));
3936           emit_barrier ();
3937           emit_label (label3);
3938           expand_dec (adjusted_op0, const1_rtx);
3939           emit_label (label4);
3940           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3941                               quotient, 0, OPTAB_LIB_WIDEN);
3942           if (tem != quotient)
3943             emit_move_insn (quotient, tem);
3944           expand_dec (quotient, const1_rtx);
3945           emit_label (label5);
3946         }
3947         break;
3948
3949       case CEIL_DIV_EXPR:
3950       case CEIL_MOD_EXPR:
3951         if (unsignedp)
3952           {
3953             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
3954               {
3955                 rtx t1, t2, t3;
3956                 unsigned HOST_WIDE_INT d = INTVAL (op1);
3957                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3958                                    build_int_2 (floor_log2 (d), 0),
3959                                    tquotient, 1);
3960                 t2 = expand_binop (compute_mode, and_optab, op0,
3961                                    GEN_INT (d - 1),
3962                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3963                 t3 = gen_reg_rtx (compute_mode);
3964                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
3965                                       compute_mode, 1, 1);
3966                 if (t3 == 0)
3967                   {
3968                     rtx lab;
3969                     lab = gen_label_rtx ();
3970                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
3971                     expand_inc (t1, const1_rtx);
3972                     emit_label (lab);
3973                     quotient = t1;
3974                   }
3975                 else
3976                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
3977                                                           t1, t3),
3978                                             tquotient);
3979                 break;
3980               }
3981
3982             /* Try using an instruction that produces both the quotient and
3983                remainder, using truncation.  We can easily compensate the
3984                quotient or remainder to get ceiling rounding, once we have the
3985                remainder.  Notice that we compute also the final remainder
3986                value here, and return the result right away.  */
3987             if (target == 0 || GET_MODE (target) != compute_mode)
3988               target = gen_reg_rtx (compute_mode);
3989
3990             if (rem_flag)
3991               {
3992                 remainder = (REG_P (target)
3993                              ? target : gen_reg_rtx (compute_mode));
3994                 quotient = gen_reg_rtx (compute_mode);
3995               }
3996             else
3997               {
3998                 quotient = (REG_P (target)
3999                             ? target : gen_reg_rtx (compute_mode));
4000                 remainder = gen_reg_rtx (compute_mode);
4001               }
4002
4003             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4004                                      remainder, 1))
4005               {
4006                 /* This could be computed with a branch-less sequence.
4007                    Save that for later.  */
4008                 rtx label = gen_label_rtx ();
4009                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4010                                  compute_mode, label);
4011                 expand_inc (quotient, const1_rtx);
4012                 expand_dec (remainder, op1);
4013                 emit_label (label);
4014                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4015               }
4016
4017             /* No luck with division elimination or divmod.  Have to do it
4018                by conditionally adjusting op0 *and* the result.  */
4019             {
4020               rtx label1, label2;
4021               rtx adjusted_op0, tem;
4022
4023               quotient = gen_reg_rtx (compute_mode);
4024               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4025               label1 = gen_label_rtx ();
4026               label2 = gen_label_rtx ();
4027               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4028                                compute_mode, label1);
4029               emit_move_insn  (quotient, const0_rtx);
4030               emit_jump_insn (gen_jump (label2));
4031               emit_barrier ();
4032               emit_label (label1);
4033               expand_dec (adjusted_op0, const1_rtx);
4034               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4035                                   quotient, 1, OPTAB_LIB_WIDEN);
4036               if (tem != quotient)
4037                 emit_move_insn (quotient, tem);
4038               expand_inc (quotient, const1_rtx);
4039               emit_label (label2);
4040             }
4041           }
4042         else /* signed */
4043           {
4044             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4045                 && INTVAL (op1) >= 0)
4046               {
4047                 /* This is extremely similar to the code for the unsigned case
4048                    above.  For 2.7 we should merge these variants, but for
4049                    2.6.1 I don't want to touch the code for unsigned since that
4050                    get used in C.  The signed case will only be used by other
4051                    languages (Ada).  */
4052
4053                 rtx t1, t2, t3;
4054                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4055                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4056                                    build_int_2 (floor_log2 (d), 0),
4057                                    tquotient, 0);
4058                 t2 = expand_binop (compute_mode, and_optab, op0,
4059                                    GEN_INT (d - 1),
4060                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4061                 t3 = gen_reg_rtx (compute_mode);
4062                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4063                                       compute_mode, 1, 1);
4064                 if (t3 == 0)
4065                   {
4066                     rtx lab;
4067                     lab = gen_label_rtx ();
4068                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4069                     expand_inc (t1, const1_rtx);
4070                     emit_label (lab);
4071                     quotient = t1;
4072                   }
4073                 else
4074                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4075                                                           t1, t3),
4076                                             tquotient);
4077                 break;
4078               }
4079
4080             /* Try using an instruction that produces both the quotient and
4081                remainder, using truncation.  We can easily compensate the
4082                quotient or remainder to get ceiling rounding, once we have the
4083                remainder.  Notice that we compute also the final remainder
4084                value here, and return the result right away.  */
4085             if (target == 0 || GET_MODE (target) != compute_mode)
4086               target = gen_reg_rtx (compute_mode);
4087             if (rem_flag)
4088               {
4089                 remainder= (REG_P (target)
4090                             ? target : gen_reg_rtx (compute_mode));
4091                 quotient = gen_reg_rtx (compute_mode);
4092               }
4093             else
4094               {
4095                 quotient = (REG_P (target)
4096                             ? target : gen_reg_rtx (compute_mode));
4097                 remainder = gen_reg_rtx (compute_mode);
4098               }
4099
4100             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4101                                      remainder, 0))
4102               {
4103                 /* This could be computed with a branch-less sequence.
4104                    Save that for later.  */
4105                 rtx tem;
4106                 rtx label = gen_label_rtx ();
4107                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4108                                  compute_mode, label);
4109                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4110                                     NULL_RTX, 0, OPTAB_WIDEN);
4111                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4112                 expand_inc (quotient, const1_rtx);
4113                 expand_dec (remainder, op1);
4114                 emit_label (label);
4115                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4116               }
4117
4118             /* No luck with division elimination or divmod.  Have to do it
4119                by conditionally adjusting op0 *and* the result.  */
4120             {
4121               rtx label1, label2, label3, label4, label5;
4122               rtx adjusted_op0;
4123               rtx tem;
4124
4125               quotient = gen_reg_rtx (compute_mode);
4126               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4127               label1 = gen_label_rtx ();
4128               label2 = gen_label_rtx ();
4129               label3 = gen_label_rtx ();
4130               label4 = gen_label_rtx ();
4131               label5 = gen_label_rtx ();
4132               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4133               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4134                                compute_mode, label1);
4135               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4136                                   quotient, 0, OPTAB_LIB_WIDEN);
4137               if (tem != quotient)
4138                 emit_move_insn (quotient, tem);
4139               emit_jump_insn (gen_jump (label5));
4140               emit_barrier ();
4141               emit_label (label1);
4142               expand_dec (adjusted_op0, const1_rtx);
4143               emit_jump_insn (gen_jump (label4));
4144               emit_barrier ();
4145               emit_label (label2);
4146               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4147                                compute_mode, label3);
4148               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4149                                   quotient, 0, OPTAB_LIB_WIDEN);
4150               if (tem != quotient)
4151                 emit_move_insn (quotient, tem);
4152               emit_jump_insn (gen_jump (label5));
4153               emit_barrier ();
4154               emit_label (label3);
4155               expand_inc (adjusted_op0, const1_rtx);
4156               emit_label (label4);
4157               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4158                                   quotient, 0, OPTAB_LIB_WIDEN);
4159               if (tem != quotient)
4160                 emit_move_insn (quotient, tem);
4161               expand_inc (quotient, const1_rtx);
4162               emit_label (label5);
4163             }
4164           }
4165         break;
4166
4167       case EXACT_DIV_EXPR:
4168         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4169           {
4170             HOST_WIDE_INT d = INTVAL (op1);
4171             unsigned HOST_WIDE_INT ml;
4172             int pre_shift;
4173             rtx t1;
4174
4175             pre_shift = floor_log2 (d & -d);
4176             ml = invert_mod2n (d >> pre_shift, size);
4177             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4178                                build_int_2 (pre_shift, 0), NULL_RTX, unsignedp);
4179             quotient = expand_mult (compute_mode, t1,
4180                                     gen_int_mode (ml, compute_mode),
4181                                     NULL_RTX, 1);
4182
4183             insn = get_last_insn ();
4184             set_unique_reg_note (insn,
4185                                  REG_EQUAL,
4186                                  gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4187                                                  compute_mode,
4188                                                  op0, op1));
4189           }
4190         break;
4191
4192       case ROUND_DIV_EXPR:
4193       case ROUND_MOD_EXPR:
4194         if (unsignedp)
4195           {
4196             rtx tem;
4197             rtx label;
4198             label = gen_label_rtx ();
4199             quotient = gen_reg_rtx (compute_mode);
4200             remainder = gen_reg_rtx (compute_mode);
4201             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4202               {
4203                 rtx tem;
4204                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4205                                          quotient, 1, OPTAB_LIB_WIDEN);
4206                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4207                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4208                                           remainder, 1, OPTAB_LIB_WIDEN);
4209               }
4210             tem = plus_constant (op1, -1);
4211             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4212                                 build_int_2 (1, 0), NULL_RTX, 1);
4213             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4214             expand_inc (quotient, const1_rtx);
4215             expand_dec (remainder, op1);
4216             emit_label (label);
4217           }
4218         else
4219           {
4220             rtx abs_rem, abs_op1, tem, mask;
4221             rtx label;
4222             label = gen_label_rtx ();
4223             quotient = gen_reg_rtx (compute_mode);
4224             remainder = gen_reg_rtx (compute_mode);
4225             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4226               {
4227                 rtx tem;
4228                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4229                                          quotient, 0, OPTAB_LIB_WIDEN);
4230                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4231                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4232                                           remainder, 0, OPTAB_LIB_WIDEN);
4233               }
4234             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4235             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4236             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4237                                 build_int_2 (1, 0), NULL_RTX, 1);
4238             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4239             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4240                                 NULL_RTX, 0, OPTAB_WIDEN);
4241             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4242                                 build_int_2 (size - 1, 0), NULL_RTX, 0);
4243             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4244                                 NULL_RTX, 0, OPTAB_WIDEN);
4245             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4246                                 NULL_RTX, 0, OPTAB_WIDEN);
4247             expand_inc (quotient, tem);
4248             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4249                                 NULL_RTX, 0, OPTAB_WIDEN);
4250             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4251                                 NULL_RTX, 0, OPTAB_WIDEN);
4252             expand_dec (remainder, tem);
4253             emit_label (label);
4254           }
4255         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4256
4257       default:
4258         abort ();
4259       }
4260
4261   if (quotient == 0)
4262     {
4263       if (target && GET_MODE (target) != compute_mode)
4264         target = 0;
4265
4266       if (rem_flag)
4267         {
4268           /* Try to produce the remainder without producing the quotient.
4269              If we seem to have a divmod pattern that does not require widening,
4270              don't try widening here.  We should really have a WIDEN argument
4271              to expand_twoval_binop, since what we'd really like to do here is
4272              1) try a mod insn in compute_mode
4273              2) try a divmod insn in compute_mode
4274              3) try a div insn in compute_mode and multiply-subtract to get
4275                 remainder
4276              4) try the same things with widening allowed.  */
4277           remainder
4278             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4279                                  op0, op1, target,
4280                                  unsignedp,
4281                                  ((optab2->handlers[compute_mode].insn_code
4282                                    != CODE_FOR_nothing)
4283                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4284           if (remainder == 0)
4285             {
4286               /* No luck there.  Can we do remainder and divide at once
4287                  without a library call?  */
4288               remainder = gen_reg_rtx (compute_mode);
4289               if (! expand_twoval_binop ((unsignedp
4290                                           ? udivmod_optab
4291                                           : sdivmod_optab),
4292                                          op0, op1,
4293                                          NULL_RTX, remainder, unsignedp))
4294                 remainder = 0;
4295             }
4296
4297           if (remainder)
4298             return gen_lowpart (mode, remainder);
4299         }
4300
4301       /* Produce the quotient.  Try a quotient insn, but not a library call.
4302          If we have a divmod in this mode, use it in preference to widening
4303          the div (for this test we assume it will not fail). Note that optab2
4304          is set to the one of the two optabs that the call below will use.  */
4305       quotient
4306         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4307                              op0, op1, rem_flag ? NULL_RTX : target,
4308                              unsignedp,
4309                              ((optab2->handlers[compute_mode].insn_code
4310                                != CODE_FOR_nothing)
4311                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4312
4313       if (quotient == 0)
4314         {
4315           /* No luck there.  Try a quotient-and-remainder insn,
4316              keeping the quotient alone.  */
4317           quotient = gen_reg_rtx (compute_mode);
4318           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4319                                      op0, op1,
4320                                      quotient, NULL_RTX, unsignedp))
4321             {
4322               quotient = 0;
4323               if (! rem_flag)
4324                 /* Still no luck.  If we are not computing the remainder,
4325                    use a library call for the quotient.  */
4326                 quotient = sign_expand_binop (compute_mode,
4327                                               udiv_optab, sdiv_optab,
4328                                               op0, op1, target,
4329                                               unsignedp, OPTAB_LIB_WIDEN);
4330             }
4331         }
4332     }
4333
4334   if (rem_flag)
4335     {
4336       if (target && GET_MODE (target) != compute_mode)
4337         target = 0;
4338
4339       if (quotient == 0)
4340         /* No divide instruction either.  Use library for remainder.  */
4341         remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4342                                        op0, op1, target,
4343                                        unsignedp, OPTAB_LIB_WIDEN);
4344       else
4345         {
4346           /* We divided.  Now finish doing X - Y * (X / Y).  */
4347           remainder = expand_mult (compute_mode, quotient, op1,
4348                                    NULL_RTX, unsignedp);
4349           remainder = expand_binop (compute_mode, sub_optab, op0,
4350                                     remainder, target, unsignedp,
4351                                     OPTAB_LIB_WIDEN);
4352         }
4353     }
4354
4355   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4356 }
4357 \f
4358 /* Return a tree node with data type TYPE, describing the value of X.
4359    Usually this is an VAR_DECL, if there is no obvious better choice.
4360    X may be an expression, however we only support those expressions
4361    generated by loop.c.  */
4362
4363 tree
4364 make_tree (tree type, rtx x)
4365 {
4366   tree t;
4367
4368   switch (GET_CODE (x))
4369     {
4370     case CONST_INT:
4371       t = build_int_2 (INTVAL (x),
4372                        (TYPE_UNSIGNED (type)
4373                         && (GET_MODE_BITSIZE (TYPE_MODE (type))
4374                             < HOST_BITS_PER_WIDE_INT))
4375                        || INTVAL (x) >= 0 ? 0 : -1);
4376       TREE_TYPE (t) = type;
4377       return t;
4378
4379     case CONST_DOUBLE:
4380       if (GET_MODE (x) == VOIDmode)
4381         {
4382           t = build_int_2 (CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4383           TREE_TYPE (t) = type;
4384         }
4385       else
4386         {
4387           REAL_VALUE_TYPE d;
4388
4389           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4390           t = build_real (type, d);
4391         }
4392
4393       return t;
4394
4395     case CONST_VECTOR:
4396       {
4397         int i, units;
4398         rtx elt;
4399         tree t = NULL_TREE;
4400
4401         units = CONST_VECTOR_NUNITS (x);
4402
4403         /* Build a tree with vector elements.  */
4404         for (i = units - 1; i >= 0; --i)
4405           {
4406             elt = CONST_VECTOR_ELT (x, i);
4407             t = tree_cons (NULL_TREE, make_tree (type, elt), t);
4408           }
4409
4410         return build_vector (type, t);
4411       }
4412
4413     case PLUS:
4414       return fold (build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4415                            make_tree (type, XEXP (x, 1))));
4416
4417     case MINUS:
4418       return fold (build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4419                            make_tree (type, XEXP (x, 1))));
4420
4421     case NEG:
4422       return fold (build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0))));
4423
4424     case MULT:
4425       return fold (build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4426                            make_tree (type, XEXP (x, 1))));
4427
4428     case ASHIFT:
4429       return fold (build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4430                            make_tree (type, XEXP (x, 1))));
4431
4432     case LSHIFTRT:
4433       t = lang_hooks.types.unsigned_type (type);
4434       return fold (convert (type,
4435                             build2 (RSHIFT_EXPR, t,
4436                                     make_tree (t, XEXP (x, 0)),
4437                                     make_tree (type, XEXP (x, 1)))));
4438
4439     case ASHIFTRT:
4440       t = lang_hooks.types.signed_type (type);
4441       return fold (convert (type,
4442                             build2 (RSHIFT_EXPR, t,
4443                                     make_tree (t, XEXP (x, 0)),
4444                                     make_tree (type, XEXP (x, 1)))));
4445
4446     case DIV:
4447       if (TREE_CODE (type) != REAL_TYPE)
4448         t = lang_hooks.types.signed_type (type);
4449       else
4450         t = type;
4451
4452       return fold (convert (type,
4453                             build2 (TRUNC_DIV_EXPR, t,
4454                                     make_tree (t, XEXP (x, 0)),
4455                                     make_tree (t, XEXP (x, 1)))));
4456     case UDIV:
4457       t = lang_hooks.types.unsigned_type (type);
4458       return fold (convert (type,
4459                             build2 (TRUNC_DIV_EXPR, t,
4460                                     make_tree (t, XEXP (x, 0)),
4461                                     make_tree (t, XEXP (x, 1)))));
4462
4463     case SIGN_EXTEND:
4464     case ZERO_EXTEND:
4465       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
4466                                           GET_CODE (x) == ZERO_EXTEND);
4467       return fold (convert (type, make_tree (t, XEXP (x, 0))));
4468
4469     default:
4470       t = build_decl (VAR_DECL, NULL_TREE, type);
4471
4472       /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
4473          ptr_mode.  So convert.  */
4474       if (POINTER_TYPE_P (type))
4475         x = convert_memory_address (TYPE_MODE (type), x);
4476
4477       /* Note that we do *not* use SET_DECL_RTL here, because we do not
4478          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
4479       t->decl.rtl = x;
4480
4481       return t;
4482     }
4483 }
4484
4485 /* Check whether the multiplication X * MULT + ADD overflows.
4486    X, MULT and ADD must be CONST_*.
4487    MODE is the machine mode for the computation.
4488    X and MULT must have mode MODE.  ADD may have a different mode.
4489    So can X (defaults to same as MODE).
4490    UNSIGNEDP is nonzero to do unsigned multiplication.  */
4491
4492 bool
4493 const_mult_add_overflow_p (rtx x, rtx mult, rtx add, enum machine_mode mode, int unsignedp)
4494 {
4495   tree type, mult_type, add_type, result;
4496
4497   type = lang_hooks.types.type_for_mode (mode, unsignedp);
4498
4499   /* In order to get a proper overflow indication from an unsigned
4500      type, we have to pretend that it's a sizetype.  */
4501   mult_type = type;
4502   if (unsignedp)
4503     {
4504       mult_type = copy_node (type);
4505       TYPE_IS_SIZETYPE (mult_type) = 1;
4506     }
4507
4508   add_type = (GET_MODE (add) == VOIDmode ? mult_type
4509               : lang_hooks.types.type_for_mode (GET_MODE (add), unsignedp));
4510
4511   result = fold (build2 (PLUS_EXPR, mult_type,
4512                          fold (build2 (MULT_EXPR, mult_type,
4513                                        make_tree (mult_type, x),
4514                                        make_tree (mult_type, mult))),
4515                          make_tree (add_type, add)));
4516
4517   return TREE_CONSTANT_OVERFLOW (result);
4518 }
4519
4520 /* Return an rtx representing the value of X * MULT + ADD.
4521    TARGET is a suggestion for where to store the result (an rtx).
4522    MODE is the machine mode for the computation.
4523    X and MULT must have mode MODE.  ADD may have a different mode.
4524    So can X (defaults to same as MODE).
4525    UNSIGNEDP is nonzero to do unsigned multiplication.
4526    This may emit insns.  */
4527
4528 rtx
4529 expand_mult_add (rtx x, rtx target, rtx mult, rtx add, enum machine_mode mode,
4530                  int unsignedp)
4531 {
4532   tree type = lang_hooks.types.type_for_mode (mode, unsignedp);
4533   tree add_type = (GET_MODE (add) == VOIDmode
4534                    ? type: lang_hooks.types.type_for_mode (GET_MODE (add),
4535                                                            unsignedp));
4536   tree result =  fold (build2 (PLUS_EXPR, type,
4537                                fold (build2 (MULT_EXPR, type,
4538                                              make_tree (type, x),
4539                                              make_tree (type, mult))),
4540                                make_tree (add_type, add)));
4541
4542   return expand_expr (result, target, VOIDmode, 0);
4543 }
4544 \f
4545 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
4546    and returning TARGET.
4547
4548    If TARGET is 0, a pseudo-register or constant is returned.  */
4549
4550 rtx
4551 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
4552 {
4553   rtx tem = 0;
4554
4555   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
4556     tem = simplify_binary_operation (AND, mode, op0, op1);
4557   if (tem == 0)
4558     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
4559
4560   if (target == 0)
4561     target = tem;
4562   else if (tem != target)
4563     emit_move_insn (target, tem);
4564   return target;
4565 }
4566 \f
4567 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
4568    and storing in TARGET.  Normally return TARGET.
4569    Return 0 if that cannot be done.
4570
4571    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
4572    it is VOIDmode, they cannot both be CONST_INT.
4573
4574    UNSIGNEDP is for the case where we have to widen the operands
4575    to perform the operation.  It says to use zero-extension.
4576
4577    NORMALIZEP is 1 if we should convert the result to be either zero
4578    or one.  Normalize is -1 if we should convert the result to be
4579    either zero or -1.  If NORMALIZEP is zero, the result will be left
4580    "raw" out of the scc insn.  */
4581
4582 rtx
4583 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
4584                  enum machine_mode mode, int unsignedp, int normalizep)
4585 {
4586   rtx subtarget;
4587   enum insn_code icode;
4588   enum machine_mode compare_mode;
4589   enum machine_mode target_mode = GET_MODE (target);
4590   rtx tem;
4591   rtx last = get_last_insn ();
4592   rtx pattern, comparison;
4593
4594   if (unsignedp)
4595     code = unsigned_condition (code);
4596
4597   /* If one operand is constant, make it the second one.  Only do this
4598      if the other operand is not constant as well.  */
4599
4600   if (swap_commutative_operands_p (op0, op1))
4601     {
4602       tem = op0;
4603       op0 = op1;
4604       op1 = tem;
4605       code = swap_condition (code);
4606     }
4607
4608   if (mode == VOIDmode)
4609     mode = GET_MODE (op0);
4610
4611   /* For some comparisons with 1 and -1, we can convert this to
4612      comparisons with zero.  This will often produce more opportunities for
4613      store-flag insns.  */
4614
4615   switch (code)
4616     {
4617     case LT:
4618       if (op1 == const1_rtx)
4619         op1 = const0_rtx, code = LE;
4620       break;
4621     case LE:
4622       if (op1 == constm1_rtx)
4623         op1 = const0_rtx, code = LT;
4624       break;
4625     case GE:
4626       if (op1 == const1_rtx)
4627         op1 = const0_rtx, code = GT;
4628       break;
4629     case GT:
4630       if (op1 == constm1_rtx)
4631         op1 = const0_rtx, code = GE;
4632       break;
4633     case GEU:
4634       if (op1 == const1_rtx)
4635         op1 = const0_rtx, code = NE;
4636       break;
4637     case LTU:
4638       if (op1 == const1_rtx)
4639         op1 = const0_rtx, code = EQ;
4640       break;
4641     default:
4642       break;
4643     }
4644
4645   /* If we are comparing a double-word integer with zero or -1, we can
4646      convert the comparison into one involving a single word.  */
4647   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
4648       && GET_MODE_CLASS (mode) == MODE_INT
4649       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
4650     {
4651       if ((code == EQ || code == NE)
4652           && (op1 == const0_rtx || op1 == constm1_rtx))
4653         {
4654           rtx op00, op01, op0both;
4655
4656           /* Do a logical OR or AND of the two words and compare the result.  */
4657           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
4658           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
4659           op0both = expand_binop (word_mode,
4660                                   op1 == const0_rtx ? ior_optab : and_optab,
4661                                   op00, op01, NULL_RTX, unsignedp, OPTAB_DIRECT);
4662
4663           if (op0both != 0)
4664             return emit_store_flag (target, code, op0both, op1, word_mode,
4665                                     unsignedp, normalizep);
4666         }
4667       else if ((code == LT || code == GE) && op1 == const0_rtx)
4668         {
4669           rtx op0h;
4670
4671           /* If testing the sign bit, can just test on high word.  */
4672           op0h = simplify_gen_subreg (word_mode, op0, mode,
4673                                       subreg_highpart_offset (word_mode, mode));
4674           return emit_store_flag (target, code, op0h, op1, word_mode,
4675                                   unsignedp, normalizep);
4676         }
4677     }
4678
4679   /* From now on, we won't change CODE, so set ICODE now.  */
4680   icode = setcc_gen_code[(int) code];
4681
4682   /* If this is A < 0 or A >= 0, we can do this by taking the ones
4683      complement of A (for GE) and shifting the sign bit to the low bit.  */
4684   if (op1 == const0_rtx && (code == LT || code == GE)
4685       && GET_MODE_CLASS (mode) == MODE_INT
4686       && (normalizep || STORE_FLAG_VALUE == 1
4687           || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4688               && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
4689                   == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
4690     {
4691       subtarget = target;
4692
4693       /* If the result is to be wider than OP0, it is best to convert it
4694          first.  If it is to be narrower, it is *incorrect* to convert it
4695          first.  */
4696       if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
4697         {
4698           op0 = convert_modes (target_mode, mode, op0, 0);
4699           mode = target_mode;
4700         }
4701
4702       if (target_mode != mode)
4703         subtarget = 0;
4704
4705       if (code == GE)
4706         op0 = expand_unop (mode, one_cmpl_optab, op0,
4707                            ((STORE_FLAG_VALUE == 1 || normalizep)
4708                             ? 0 : subtarget), 0);
4709
4710       if (STORE_FLAG_VALUE == 1 || normalizep)
4711         /* If we are supposed to produce a 0/1 value, we want to do
4712            a logical shift from the sign bit to the low-order bit; for
4713            a -1/0 value, we do an arithmetic shift.  */
4714         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
4715                             size_int (GET_MODE_BITSIZE (mode) - 1),
4716                             subtarget, normalizep != -1);
4717
4718       if (mode != target_mode)
4719         op0 = convert_modes (target_mode, mode, op0, 0);
4720
4721       return op0;
4722     }
4723
4724   if (icode != CODE_FOR_nothing)
4725     {
4726       insn_operand_predicate_fn pred;
4727
4728       /* We think we may be able to do this with a scc insn.  Emit the
4729          comparison and then the scc insn.  */
4730
4731       do_pending_stack_adjust ();
4732       last = get_last_insn ();
4733
4734       comparison
4735         = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX);
4736       if (CONSTANT_P (comparison))
4737         {
4738           if (GET_CODE (comparison) == CONST_INT)
4739             {
4740               if (comparison == const0_rtx)
4741                 return const0_rtx;
4742             }
4743 #ifdef FLOAT_STORE_FLAG_VALUE
4744           else if (GET_CODE (comparison) == CONST_DOUBLE)
4745             {
4746               if (comparison == CONST0_RTX (GET_MODE (comparison)))
4747                 return const0_rtx;
4748             }
4749 #endif
4750           else
4751             abort ();
4752           if (normalizep == 1)
4753             return const1_rtx;
4754           if (normalizep == -1)
4755             return constm1_rtx;
4756           return const_true_rtx;
4757         }
4758
4759       /* The code of COMPARISON may not match CODE if compare_from_rtx
4760          decided to swap its operands and reverse the original code.
4761
4762          We know that compare_from_rtx returns either a CONST_INT or
4763          a new comparison code, so it is safe to just extract the
4764          code from COMPARISON.  */
4765       code = GET_CODE (comparison);
4766
4767       /* Get a reference to the target in the proper mode for this insn.  */
4768       compare_mode = insn_data[(int) icode].operand[0].mode;
4769       subtarget = target;
4770       pred = insn_data[(int) icode].operand[0].predicate;
4771       if (preserve_subexpressions_p ()
4772           || ! (*pred) (subtarget, compare_mode))
4773         subtarget = gen_reg_rtx (compare_mode);
4774
4775       pattern = GEN_FCN (icode) (subtarget);
4776       if (pattern)
4777         {
4778           emit_insn (pattern);
4779
4780           /* If we are converting to a wider mode, first convert to
4781              TARGET_MODE, then normalize.  This produces better combining
4782              opportunities on machines that have a SIGN_EXTRACT when we are
4783              testing a single bit.  This mostly benefits the 68k.
4784
4785              If STORE_FLAG_VALUE does not have the sign bit set when
4786              interpreted in COMPARE_MODE, we can do this conversion as
4787              unsigned, which is usually more efficient.  */
4788           if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (compare_mode))
4789             {
4790               convert_move (target, subtarget,
4791                             (GET_MODE_BITSIZE (compare_mode)
4792                              <= HOST_BITS_PER_WIDE_INT)
4793                             && 0 == (STORE_FLAG_VALUE
4794                                      & ((HOST_WIDE_INT) 1
4795                                         << (GET_MODE_BITSIZE (compare_mode) -1))));
4796               op0 = target;
4797               compare_mode = target_mode;
4798             }
4799           else
4800             op0 = subtarget;
4801
4802           /* If we want to keep subexpressions around, don't reuse our
4803              last target.  */
4804
4805           if (preserve_subexpressions_p ())
4806             subtarget = 0;
4807
4808           /* Now normalize to the proper value in COMPARE_MODE.  Sometimes
4809              we don't have to do anything.  */
4810           if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
4811             ;
4812           /* STORE_FLAG_VALUE might be the most negative number, so write
4813              the comparison this way to avoid a compiler-time warning.  */
4814           else if (- normalizep == STORE_FLAG_VALUE)
4815             op0 = expand_unop (compare_mode, neg_optab, op0, subtarget, 0);
4816
4817           /* We don't want to use STORE_FLAG_VALUE < 0 below since this
4818              makes it hard to use a value of just the sign bit due to
4819              ANSI integer constant typing rules.  */
4820           else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_WIDE_INT
4821                    && (STORE_FLAG_VALUE
4822                        & ((HOST_WIDE_INT) 1
4823                           << (GET_MODE_BITSIZE (compare_mode) - 1))))
4824             op0 = expand_shift (RSHIFT_EXPR, compare_mode, op0,
4825                                 size_int (GET_MODE_BITSIZE (compare_mode) - 1),
4826                                 subtarget, normalizep == 1);
4827           else if (STORE_FLAG_VALUE & 1)
4828             {
4829               op0 = expand_and (compare_mode, op0, const1_rtx, subtarget);
4830               if (normalizep == -1)
4831                 op0 = expand_unop (compare_mode, neg_optab, op0, op0, 0);
4832             }
4833           else
4834             abort ();
4835
4836           /* If we were converting to a smaller mode, do the
4837              conversion now.  */
4838           if (target_mode != compare_mode)
4839             {
4840               convert_move (target, op0, 0);
4841               return target;
4842             }
4843           else
4844             return op0;
4845         }
4846     }
4847
4848   delete_insns_since (last);
4849
4850   /* If expensive optimizations, use different pseudo registers for each
4851      insn, instead of reusing the same pseudo.  This leads to better CSE,
4852      but slows down the compiler, since there are more pseudos */
4853   subtarget = (!flag_expensive_optimizations
4854                && (target_mode == mode)) ? target : NULL_RTX;
4855
4856   /* If we reached here, we can't do this with a scc insn.  However, there
4857      are some comparisons that can be done directly.  For example, if
4858      this is an equality comparison of integers, we can try to exclusive-or
4859      (or subtract) the two operands and use a recursive call to try the
4860      comparison with zero.  Don't do any of these cases if branches are
4861      very cheap.  */
4862
4863   if (BRANCH_COST > 0
4864       && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
4865       && op1 != const0_rtx)
4866     {
4867       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
4868                           OPTAB_WIDEN);
4869
4870       if (tem == 0)
4871         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
4872                             OPTAB_WIDEN);
4873       if (tem != 0)
4874         tem = emit_store_flag (target, code, tem, const0_rtx,
4875                                mode, unsignedp, normalizep);
4876       if (tem == 0)
4877         delete_insns_since (last);
4878       return tem;
4879     }
4880
4881   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
4882      the constant zero.  Reject all other comparisons at this point.  Only
4883      do LE and GT if branches are expensive since they are expensive on
4884      2-operand machines.  */
4885
4886   if (BRANCH_COST == 0
4887       || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
4888       || (code != EQ && code != NE
4889           && (BRANCH_COST <= 1 || (code != LE && code != GT))))
4890     return 0;
4891
4892   /* See what we need to return.  We can only return a 1, -1, or the
4893      sign bit.  */
4894
4895   if (normalizep == 0)
4896     {
4897       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
4898         normalizep = STORE_FLAG_VALUE;
4899
4900       else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4901                && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
4902                    == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
4903         ;
4904       else
4905         return 0;
4906     }
4907
4908   /* Try to put the result of the comparison in the sign bit.  Assume we can't
4909      do the necessary operation below.  */
4910
4911   tem = 0;
4912
4913   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
4914      the sign bit set.  */
4915
4916   if (code == LE)
4917     {
4918       /* This is destructive, so SUBTARGET can't be OP0.  */
4919       if (rtx_equal_p (subtarget, op0))
4920         subtarget = 0;
4921
4922       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
4923                           OPTAB_WIDEN);
4924       if (tem)
4925         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
4926                             OPTAB_WIDEN);
4927     }
4928
4929   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
4930      number of bits in the mode of OP0, minus one.  */
4931
4932   if (code == GT)
4933     {
4934       if (rtx_equal_p (subtarget, op0))
4935         subtarget = 0;
4936
4937       tem = expand_shift (RSHIFT_EXPR, mode, op0,
4938                           size_int (GET_MODE_BITSIZE (mode) - 1),
4939                           subtarget, 0);
4940       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
4941                           OPTAB_WIDEN);
4942     }
4943
4944   if (code == EQ || code == NE)
4945     {
4946       /* For EQ or NE, one way to do the comparison is to apply an operation
4947          that converts the operand into a positive number if it is nonzero
4948          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
4949          for NE we negate.  This puts the result in the sign bit.  Then we
4950          normalize with a shift, if needed.
4951
4952          Two operations that can do the above actions are ABS and FFS, so try
4953          them.  If that doesn't work, and MODE is smaller than a full word,
4954          we can use zero-extension to the wider mode (an unsigned conversion)
4955          as the operation.  */
4956
4957       /* Note that ABS doesn't yield a positive number for INT_MIN, but
4958          that is compensated by the subsequent overflow when subtracting
4959          one / negating.  */
4960
4961       if (abs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
4962         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
4963       else if (ffs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
4964         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
4965       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
4966         {
4967           tem = convert_modes (word_mode, mode, op0, 1);
4968           mode = word_mode;
4969         }
4970
4971       if (tem != 0)
4972         {
4973           if (code == EQ)
4974             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
4975                                 0, OPTAB_WIDEN);
4976           else
4977             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
4978         }
4979
4980       /* If we couldn't do it that way, for NE we can "or" the two's complement
4981          of the value with itself.  For EQ, we take the one's complement of
4982          that "or", which is an extra insn, so we only handle EQ if branches
4983          are expensive.  */
4984
4985       if (tem == 0 && (code == NE || BRANCH_COST > 1))
4986         {
4987           if (rtx_equal_p (subtarget, op0))
4988             subtarget = 0;
4989
4990           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
4991           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
4992                               OPTAB_WIDEN);
4993
4994           if (tem && code == EQ)
4995             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
4996         }
4997     }
4998
4999   if (tem && normalizep)
5000     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5001                         size_int (GET_MODE_BITSIZE (mode) - 1),
5002                         subtarget, normalizep == 1);
5003
5004   if (tem)
5005     {
5006       if (GET_MODE (tem) != target_mode)
5007         {
5008           convert_move (target, tem, 0);
5009           tem = target;
5010         }
5011       else if (!subtarget)
5012         {
5013           emit_move_insn (target, tem);
5014           tem = target;
5015         }
5016     }
5017   else
5018     delete_insns_since (last);
5019
5020   return tem;
5021 }
5022
5023 /* Like emit_store_flag, but always succeeds.  */
5024
5025 rtx
5026 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5027                        enum machine_mode mode, int unsignedp, int normalizep)
5028 {
5029   rtx tem, label;
5030
5031   /* First see if emit_store_flag can do the job.  */
5032   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5033   if (tem != 0)
5034     return tem;
5035
5036   if (normalizep == 0)
5037     normalizep = 1;
5038
5039   /* If this failed, we have to do this with set/compare/jump/set code.  */
5040
5041   if (!REG_P (target)
5042       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5043     target = gen_reg_rtx (GET_MODE (target));
5044
5045   emit_move_insn (target, const1_rtx);
5046   label = gen_label_rtx ();
5047   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5048                            NULL_RTX, label);
5049
5050   emit_move_insn (target, const0_rtx);
5051   emit_label (label);
5052
5053   return target;
5054 }
5055 \f
5056 /* Perform possibly multi-word comparison and conditional jump to LABEL
5057    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE
5058
5059    The algorithm is based on the code in expr.c:do_jump.
5060
5061    Note that this does not perform a general comparison.  Only variants
5062    generated within expmed.c are correctly handled, others abort (but could
5063    be handled if needed).  */
5064
5065 static void
5066 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5067                  rtx label)
5068 {
5069   /* If this mode is an integer too wide to compare properly,
5070      compare word by word.  Rely on cse to optimize constant cases.  */
5071
5072   if (GET_MODE_CLASS (mode) == MODE_INT
5073       && ! can_compare_p (op, mode, ccp_jump))
5074     {
5075       rtx label2 = gen_label_rtx ();
5076
5077       switch (op)
5078         {
5079         case LTU:
5080           do_jump_by_parts_greater_rtx (mode, 1, arg2, arg1, label2, label);
5081           break;
5082
5083         case LEU:
5084           do_jump_by_parts_greater_rtx (mode, 1, arg1, arg2, label, label2);
5085           break;
5086
5087         case LT:
5088           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label2, label);
5089           break;
5090
5091         case GT:
5092           do_jump_by_parts_greater_rtx (mode, 0, arg1, arg2, label2, label);
5093           break;
5094
5095         case GE:
5096           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label, label2);
5097           break;
5098
5099           /* do_jump_by_parts_equality_rtx compares with zero.  Luckily
5100              that's the only equality operations we do */
5101         case EQ:
5102           if (arg2 != const0_rtx || mode != GET_MODE(arg1))
5103             abort ();
5104           do_jump_by_parts_equality_rtx (arg1, label2, label);
5105           break;
5106
5107         case NE:
5108           if (arg2 != const0_rtx || mode != GET_MODE(arg1))
5109             abort ();
5110           do_jump_by_parts_equality_rtx (arg1, label, label2);
5111           break;
5112
5113         default:
5114           abort ();
5115         }
5116
5117       emit_label (label2);
5118     }
5119   else
5120     emit_cmp_and_jump_insns (arg1, arg2, op, NULL_RTX, mode, 0, label);
5121 }