gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 2, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING.  If not, write to the Free
  20 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  21 02111-1307, USA.  */
  22
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "coretypes.h"
  27 #include "tm.h"
  28 #include "toplev.h"
  29 #include "rtl.h"
  30 #include "tree.h"
  31 #include "tm_p.h"
  32 #include "flags.h"
  33 #include "insn-config.h"
  34 #include "expr.h"
  35 #include "optabs.h"
  36 #include "real.h"
  37 #include "recog.h"
  38 #include "langhooks.h"
  39
  40 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  41                                    unsigned HOST_WIDE_INT,
  42                                    unsigned HOST_WIDE_INT, rtx);
  43 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  44                                    unsigned HOST_WIDE_INT, rtx);
  45 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  46                                     unsigned HOST_WIDE_INT,
  47                                     unsigned HOST_WIDE_INT,
  48                                     unsigned HOST_WIDE_INT, rtx, int);
  49 static rtx mask_rtx (enum machine_mode, int, int, int);
  50 static rtx lshift_value (enum machine_mode, rtx, int, int);
  51 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  52                                     unsigned HOST_WIDE_INT, int);
  53 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  54 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  55 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  56
  57 /* Nonzero means divides or modulus operations are relatively cheap for
  58    powers of two, so don't use branches; emit the operation instead.
  59    Usually, this will mean that the MD file will emit non-branch
  60    sequences.  */
  61
  62 static bool sdiv_pow2_cheap[NUM_MACHINE_MODES];
  63 static bool smod_pow2_cheap[NUM_MACHINE_MODES];
  64
  65 #ifndef SLOW_UNALIGNED_ACCESS
  66 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  67 #endif
  68
  69 /* For compilers that support multiple targets with different word sizes,
  70    MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
  71    is the H8/300(H) compiler.  */
  72
  73 #ifndef MAX_BITS_PER_WORD
  74 #define MAX_BITS_PER_WORD BITS_PER_WORD
  75 #endif
  76
  77 /* Reduce conditional compilation elsewhere.  */
  78 #ifndef HAVE_insv
  79 #define HAVE_insv       0
  80 #define CODE_FOR_insv   CODE_FOR_nothing
  81 #define gen_insv(a,b,c,d) NULL_RTX
  82 #endif
  83 #ifndef HAVE_extv
  84 #define HAVE_extv       0
  85 #define CODE_FOR_extv   CODE_FOR_nothing
  86 #define gen_extv(a,b,c,d) NULL_RTX
  87 #endif
  88 #ifndef HAVE_extzv
  89 #define HAVE_extzv      0
  90 #define CODE_FOR_extzv  CODE_FOR_nothing
  91 #define gen_extzv(a,b,c,d) NULL_RTX
  92 #endif
  93
  94 /* Cost of various pieces of RTL.  Note that some of these are indexed by
  95    shift count and some by mode.  */
  96 static int zero_cost;
  97 static int add_cost[NUM_MACHINE_MODES];
  98 static int neg_cost[NUM_MACHINE_MODES];
  99 static int shift_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 100 static int shiftadd_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 101 static int shiftsub_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 102 static int mul_cost[NUM_MACHINE_MODES];
 103 static int div_cost[NUM_MACHINE_MODES];
 104 static int mul_widen_cost[NUM_MACHINE_MODES];
 105 static int mul_highpart_cost[NUM_MACHINE_MODES];
 106
 107 void
 108 init_expmed (void)
 109 {
 110   struct
 111   {
 112     struct rtx_def reg;         rtunion reg_fld[2];
 113     struct rtx_def plus;        rtunion plus_fld1;
 114     struct rtx_def neg;
 115     struct rtx_def udiv;        rtunion udiv_fld1;
 116     struct rtx_def mult;        rtunion mult_fld1;
 117     struct rtx_def div;         rtunion div_fld1;
 118     struct rtx_def mod;         rtunion mod_fld1;
 119     struct rtx_def zext;
 120     struct rtx_def wide_mult;   rtunion wide_mult_fld1;
 121     struct rtx_def wide_lshr;   rtunion wide_lshr_fld1;
 122     struct rtx_def wide_trunc;
 123     struct rtx_def shift;       rtunion shift_fld1;
 124     struct rtx_def shift_mult;  rtunion shift_mult_fld1;
 125     struct rtx_def shift_add;   rtunion shift_add_fld1;
 126     struct rtx_def shift_sub;   rtunion shift_sub_fld1;
 127   } all;
 128
 129   rtx pow2[MAX_BITS_PER_WORD];
 130   rtx cint[MAX_BITS_PER_WORD];
 131   int m, n;
 132   enum machine_mode mode, wider_mode;
 133
 134   zero_cost = rtx_cost (const0_rtx, 0);
 135
 136   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 137     {
 138       pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 139       cint[m] = GEN_INT (m);
 140     }
 141
 142   memset (&all, 0, sizeof all);
 143
 144   PUT_CODE (&all.reg, REG);
 145   REGNO (&all.reg) = 10000;
 146
 147   PUT_CODE (&all.plus, PLUS);
 148   XEXP (&all.plus, 0) = &all.reg;
 149   XEXP (&all.plus, 1) = &all.reg;
 150
 151   PUT_CODE (&all.neg, NEG);
 152   XEXP (&all.neg, 0) = &all.reg;
 153
 154   PUT_CODE (&all.udiv, UDIV);
 155   XEXP (&all.udiv, 0) = &all.reg;
 156   XEXP (&all.udiv, 1) = &all.reg;
 157
 158   PUT_CODE (&all.mult, MULT);
 159   XEXP (&all.mult, 0) = &all.reg;
 160   XEXP (&all.mult, 1) = &all.reg;
 161
 162   PUT_CODE (&all.div, DIV);
 163   XEXP (&all.div, 0) = &all.reg;
 164   XEXP (&all.div, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
 165
 166   PUT_CODE (&all.mod, MOD);
 167   XEXP (&all.mod, 0) = &all.reg;
 168   XEXP (&all.mod, 1) = XEXP (&all.div, 1);
 169
 170   PUT_CODE (&all.zext, ZERO_EXTEND);
 171   XEXP (&all.zext, 0) = &all.reg;
 172
 173   PUT_CODE (&all.wide_mult, MULT);
 174   XEXP (&all.wide_mult, 0) = &all.zext;
 175   XEXP (&all.wide_mult, 1) = &all.zext;
 176
 177   PUT_CODE (&all.wide_lshr, LSHIFTRT);
 178   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
 179
 180   PUT_CODE (&all.wide_trunc, TRUNCATE);
 181   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
 182
 183   PUT_CODE (&all.shift, ASHIFT);
 184   XEXP (&all.shift, 0) = &all.reg;
 185
 186   PUT_CODE (&all.shift_mult, MULT);
 187   XEXP (&all.shift_mult, 0) = &all.reg;
 188
 189   PUT_CODE (&all.shift_add, PLUS);
 190   XEXP (&all.shift_add, 0) = &all.shift_mult;
 191   XEXP (&all.shift_add, 1) = &all.reg;
 192
 193   PUT_CODE (&all.shift_sub, MINUS);
 194   XEXP (&all.shift_sub, 0) = &all.shift_mult;
 195   XEXP (&all.shift_sub, 1) = &all.reg;
 196
 197   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 198        mode != VOIDmode;
 199        mode = GET_MODE_WIDER_MODE (mode))
 200     {
 201       PUT_MODE (&all.reg, mode);
 202       PUT_MODE (&all.plus, mode);
 203       PUT_MODE (&all.neg, mode);
 204       PUT_MODE (&all.udiv, mode);
 205       PUT_MODE (&all.mult, mode);
 206       PUT_MODE (&all.div, mode);
 207       PUT_MODE (&all.mod, mode);
 208       PUT_MODE (&all.wide_trunc, mode);
 209       PUT_MODE (&all.shift, mode);
 210       PUT_MODE (&all.shift_mult, mode);
 211       PUT_MODE (&all.shift_add, mode);
 212       PUT_MODE (&all.shift_sub, mode);
 213
 214       add_cost[mode] = rtx_cost (&all.plus, SET);
 215       neg_cost[mode] = rtx_cost (&all.neg, SET);
 216       div_cost[mode] = rtx_cost (&all.udiv, SET);
 217       mul_cost[mode] = rtx_cost (&all.mult, SET);
 218
 219       sdiv_pow2_cheap[mode] = (rtx_cost (&all.div, SET) <= 2 * add_cost[mode]);
 220       smod_pow2_cheap[mode] = (rtx_cost (&all.mod, SET) <= 4 * add_cost[mode]);
 221
 222       wider_mode = GET_MODE_WIDER_MODE (mode);
 223       if (wider_mode != VOIDmode)
 224         {
 225           PUT_MODE (&all.zext, wider_mode);
 226           PUT_MODE (&all.wide_mult, wider_mode);
 227           PUT_MODE (&all.wide_lshr, wider_mode);
 228           XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
 229
 230           mul_widen_cost[wider_mode] = rtx_cost (&all.wide_mult, SET);
 231           mul_highpart_cost[mode] = rtx_cost (&all.wide_trunc, SET);
 232         }
 233
 234       shift_cost[mode][0] = 0;
 235       shiftadd_cost[mode][0] = shiftsub_cost[mode][0] = add_cost[mode];
 236
 237       n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
 238       for (m = 1; m < n; m++)
 239         {
 240           XEXP (&all.shift, 1) = cint[m];
 241           XEXP (&all.shift_mult, 1) = pow2[m];
 242
 243           shift_cost[mode][m] = rtx_cost (&all.shift, SET);
 244           shiftadd_cost[mode][m] = rtx_cost (&all.shift_add, SET);
 245           shiftsub_cost[mode][m] = rtx_cost (&all.shift_sub, SET);
 246         }
 247     }
 248 }
 249
 250 /* Return an rtx representing minus the value of X.
 251    MODE is the intended mode of the result,
 252    useful if X is a CONST_INT.  */
 253
 254 rtx
 255 negate_rtx (enum machine_mode mode, rtx x)
 256 {
 257   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 258
 259   if (result == 0)
 260     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 261
 262   return result;
 263 }
 264
 265 /* Report on the availability of insv/extv/extzv and the desired mode
 266    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
 267    is false; else the mode of the specified operand.  If OPNO is -1,
 268    all the caller cares about is whether the insn is available.  */
 269 enum machine_mode
 270 mode_for_extraction (enum extraction_pattern pattern, int opno)
 271 {
 272   const struct insn_data *data;
 273
 274   switch (pattern)
 275     {
 276     case EP_insv:
 277       if (HAVE_insv)
 278         {
 279           data = &insn_data[CODE_FOR_insv];
 280           break;
 281         }
 282       return MAX_MACHINE_MODE;
 283
 284     case EP_extv:
 285       if (HAVE_extv)
 286         {
 287           data = &insn_data[CODE_FOR_extv];
 288           break;
 289         }
 290       return MAX_MACHINE_MODE;
 291
 292     case EP_extzv:
 293       if (HAVE_extzv)
 294         {
 295           data = &insn_data[CODE_FOR_extzv];
 296           break;
 297         }
 298       return MAX_MACHINE_MODE;
 299
 300     default:
 301       abort ();
 302     }
 303
 304   if (opno == -1)
 305     return VOIDmode;
 306
 307   /* Everyone who uses this function used to follow it with
 308      if (result == VOIDmode) result = word_mode; */
 309   if (data->operand[opno].mode == VOIDmode)
 310     return word_mode;
 311   return data->operand[opno].mode;
 312 }
 313
 314 \f
 315 /* Generate code to store value from rtx VALUE
 316    into a bit-field within structure STR_RTX
 317    containing BITSIZE bits starting at bit BITNUM.
 318    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
 319    ALIGN is the alignment that STR_RTX is known to have.
 320    TOTAL_SIZE is the size of the structure in bytes, or -1 if varying.  */
 321
 322 /* ??? Note that there are two different ideas here for how
 323    to determine the size to count bits within, for a register.
 324    One is BITS_PER_WORD, and the other is the size of operand 3
 325    of the insv pattern.
 326
 327    If operand 3 of the insv pattern is VOIDmode, then we will use BITS_PER_WORD
 328    else, we use the mode of operand 3.  */
 329
 330 rtx
 331 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 332                  unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
 333                  rtx value)
 334 {
 335   unsigned int unit
 336     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
 337   unsigned HOST_WIDE_INT offset = bitnum / unit;
 338   unsigned HOST_WIDE_INT bitpos = bitnum % unit;
 339   rtx op0 = str_rtx;
 340   int byte_offset;
 341
 342   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
 343
 344   while (GET_CODE (op0) == SUBREG)
 345     {
 346       /* The following line once was done only if WORDS_BIG_ENDIAN,
 347          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 348          meaningful at a much higher level; when structures are copied
 349          between memory and regs, the higher-numbered regs
 350          always get higher addresses.  */
 351       offset += (SUBREG_BYTE (op0) / UNITS_PER_WORD);
 352       /* We used to adjust BITPOS here, but now we do the whole adjustment
 353          right after the loop.  */
 354       op0 = SUBREG_REG (op0);
 355     }
 356
 357   /* Use vec_set patterns for inserting parts of vectors whenever
 358      available.  */
 359   if (VECTOR_MODE_P (GET_MODE (op0))
 360       && !MEM_P (op0)
 361       && (vec_set_optab->handlers[GET_MODE (op0)].insn_code
 362           != CODE_FOR_nothing)
 363       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 364       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 365       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 366     {
 367       enum machine_mode outermode = GET_MODE (op0);
 368       enum machine_mode innermode = GET_MODE_INNER (outermode);
 369       int icode = (int) vec_set_optab->handlers[outermode].insn_code;
 370       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 371       rtx rtxpos = GEN_INT (pos);
 372       rtx src = value;
 373       rtx dest = op0;
 374       rtx pat, seq;
 375       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
 376       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
 377       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
 378
 379       start_sequence ();
 380
 381       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
 382         src = copy_to_mode_reg (mode1, src);
 383
 384       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
 385         rtxpos = copy_to_mode_reg (mode1, rtxpos);
 386
 387       /* We could handle this, but we should always be called with a pseudo
 388          for our targets and all insns should take them as outputs.  */
 389       if (! (*insn_data[icode].operand[0].predicate) (dest, mode0)
 390           || ! (*insn_data[icode].operand[1].predicate) (src, mode1)
 391           || ! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
 392         abort ();
 393       pat = GEN_FCN (icode) (dest, src, rtxpos);
 394       seq = get_insns ();
 395       end_sequence ();
 396       if (pat)
 397         {
 398           emit_insn (seq);
 399           emit_insn (pat);
 400           return dest;
 401         }
 402     }
 403
 404   if (flag_force_mem)
 405     {
 406       int old_generating_concat_p = generating_concat_p;
 407       generating_concat_p = 0;
 408       value = force_not_mem (value);
 409       generating_concat_p = old_generating_concat_p;
 410     }
 411
 412   /* If the target is a register, overwriting the entire object, or storing
 413      a full-word or multi-word field can be done with just a SUBREG.
 414
 415      If the target is memory, storing any naturally aligned field can be
 416      done with a simple store.  For targets that support fast unaligned
 417      memory, any naturally sized, unit aligned field can be done directly.  */
 418
 419   byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 420                 + (offset * UNITS_PER_WORD);
 421
 422   if (bitpos == 0
 423       && bitsize == GET_MODE_BITSIZE (fieldmode)
 424       && (!MEM_P (op0)
 425           ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 426              || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
 427              && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
 428           : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 429              || (offset * BITS_PER_UNIT % bitsize == 0
 430                  && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
 431     {
 432       if (GET_MODE (op0) != fieldmode)
 433         {
 434           if (GET_CODE (op0) == SUBREG)
 435             {
 436               if (GET_MODE (SUBREG_REG (op0)) == fieldmode
 437                   || GET_MODE_CLASS (fieldmode) == MODE_INT
 438                   || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT)
 439                 op0 = SUBREG_REG (op0);
 440               else
 441                 /* Else we've got some float mode source being extracted into
 442                    a different float mode destination -- this combination of
 443                    subregs results in Severe Tire Damage.  */
 444                 abort ();
 445             }
 446           if (REG_P (op0))
 447             op0 = gen_rtx_SUBREG (fieldmode, op0, byte_offset);
 448           else
 449             op0 = adjust_address (op0, fieldmode, offset);
 450         }
 451       emit_move_insn (op0, value);
 452       return value;
 453     }
 454
 455   /* Make sure we are playing with integral modes.  Pun with subregs
 456      if we aren't.  This must come after the entire register case above,
 457      since that case is valid for any mode.  The following cases are only
 458      valid for integral modes.  */
 459   {
 460     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 461     if (imode != GET_MODE (op0))
 462       {
 463         if (MEM_P (op0))
 464           op0 = adjust_address (op0, imode, 0);
 465         else if (imode != BLKmode)
 466           op0 = gen_lowpart (imode, op0);
 467         else
 468           abort ();
 469       }
 470   }
 471
 472   /* We may be accessing data outside the field, which means
 473      we can alias adjacent data.  */
 474   if (MEM_P (op0))
 475     {
 476       op0 = shallow_copy_rtx (op0);
 477       set_mem_alias_set (op0, 0);
 478       set_mem_expr (op0, 0);
 479     }
 480
 481   /* If OP0 is a register, BITPOS must count within a word.
 482      But as we have it, it counts within whatever size OP0 now has.
 483      On a bigendian machine, these are not the same, so convert.  */
 484   if (BYTES_BIG_ENDIAN
 485       && !MEM_P (op0)
 486       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 487     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 488
 489   /* Storing an lsb-aligned field in a register
 490      can be done with a movestrict instruction.  */
 491
 492   if (!MEM_P (op0)
 493       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 494       && bitsize == GET_MODE_BITSIZE (fieldmode)
 495       && (movstrict_optab->handlers[fieldmode].insn_code
 496           != CODE_FOR_nothing))
 497     {
 498       int icode = movstrict_optab->handlers[fieldmode].insn_code;
 499
 500       /* Get appropriate low part of the value being stored.  */
 501       if (GET_CODE (value) == CONST_INT || REG_P (value))
 502         value = gen_lowpart (fieldmode, value);
 503       else if (!(GET_CODE (value) == SYMBOL_REF
 504                  || GET_CODE (value) == LABEL_REF
 505                  || GET_CODE (value) == CONST))
 506         value = convert_to_mode (fieldmode, value, 0);
 507
 508       if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
 509         value = copy_to_mode_reg (fieldmode, value);
 510
 511       if (GET_CODE (op0) == SUBREG)
 512         {
 513           if (GET_MODE (SUBREG_REG (op0)) == fieldmode
 514               || GET_MODE_CLASS (fieldmode) == MODE_INT
 515               || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT)
 516             op0 = SUBREG_REG (op0);
 517           else
 518             /* Else we've got some float mode source being extracted into
 519                a different float mode destination -- this combination of
 520                subregs results in Severe Tire Damage.  */
 521             abort ();
 522         }
 523
 524       emit_insn (GEN_FCN (icode)
 525                  (gen_rtx_SUBREG (fieldmode, op0,
 526                                   (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 527                                   + (offset * UNITS_PER_WORD)),
 528                                   value));
 529
 530       return value;
 531     }
 532
 533   /* Handle fields bigger than a word.  */
 534
 535   if (bitsize > BITS_PER_WORD)
 536     {
 537       /* Here we transfer the words of the field
 538          in the order least significant first.
 539          This is because the most significant word is the one which may
 540          be less than full.
 541          However, only do that if the value is not BLKmode.  */
 542
 543       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 544       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 545       unsigned int i;
 546
 547       /* This is the mode we must force value to, so that there will be enough
 548          subwords to extract.  Note that fieldmode will often (always?) be
 549          VOIDmode, because that is what store_field uses to indicate that this
 550          is a bit field, but passing VOIDmode to operand_subword_force will
 551          result in an abort.  */
 552       fieldmode = GET_MODE (value);
 553       if (fieldmode == VOIDmode)
 554         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 555
 556       for (i = 0; i < nwords; i++)
 557         {
 558           /* If I is 0, use the low-order word in both field and target;
 559              if I is 1, use the next to lowest word; and so on.  */
 560           unsigned int wordnum = (backwards ? nwords - i - 1 : i);
 561           unsigned int bit_offset = (backwards
 562                                      ? MAX ((int) bitsize - ((int) i + 1)
 563                                             * BITS_PER_WORD,
 564                                             0)
 565                                      : (int) i * BITS_PER_WORD);
 566
 567           store_bit_field (op0, MIN (BITS_PER_WORD,
 568                                      bitsize - i * BITS_PER_WORD),
 569                            bitnum + bit_offset, word_mode,
 570                            operand_subword_force (value, wordnum, fieldmode));
 571         }
 572       return value;
 573     }
 574
 575   /* From here on we can assume that the field to be stored in is
 576      a full-word (whatever type that is), since it is shorter than a word.  */
 577
 578   /* OFFSET is the number of words or bytes (UNIT says which)
 579      from STR_RTX to the first word or byte containing part of the field.  */
 580
 581   if (!MEM_P (op0))
 582     {
 583       if (offset != 0
 584           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 585         {
 586           if (!REG_P (op0))
 587             {
 588               /* Since this is a destination (lvalue), we can't copy it to a
 589                  pseudo.  We can trivially remove a SUBREG that does not
 590                  change the size of the operand.  Such a SUBREG may have been
 591                  added above.  Otherwise, abort.  */
 592               if (GET_CODE (op0) == SUBREG
 593                   && (GET_MODE_SIZE (GET_MODE (op0))
 594                       == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))))
 595                 op0 = SUBREG_REG (op0);
 596               else
 597                 abort ();
 598             }
 599           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 600                                 op0, (offset * UNITS_PER_WORD));
 601         }
 602       offset = 0;
 603     }
 604
 605   /* If VALUE is a floating-point mode, access it as an integer of the
 606      corresponding size.  This can occur on a machine with 64 bit registers
 607      that uses SFmode for float.  This can also occur for unaligned float
 608      structure fields.  */
 609   if (GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 610       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 611     value = gen_lowpart ((GET_MODE (value) == VOIDmode
 612                           ? word_mode : int_mode_for_mode (GET_MODE (value))),
 613                          value);
 614
 615   /* Now OFFSET is nonzero only if OP0 is memory
 616      and is therefore always measured in bytes.  */
 617
 618   if (HAVE_insv
 619       && GET_MODE (value) != BLKmode
 620       && !(bitsize == 1 && GET_CODE (value) == CONST_INT)
 621       /* Ensure insv's size is wide enough for this field.  */
 622       && (GET_MODE_BITSIZE (op_mode) >= bitsize)
 623       && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
 624             && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))))
 625     {
 626       int xbitpos = bitpos;
 627       rtx value1;
 628       rtx xop0 = op0;
 629       rtx last = get_last_insn ();
 630       rtx pat;
 631       enum machine_mode maxmode = mode_for_extraction (EP_insv, 3);
 632       int save_volatile_ok = volatile_ok;
 633
 634       volatile_ok = 1;
 635
 636       /* If this machine's insv can only insert into a register, copy OP0
 637          into a register and save it back later.  */
 638       /* This used to check flag_force_mem, but that was a serious
 639          de-optimization now that flag_force_mem is enabled by -O2.  */
 640       if (MEM_P (op0)
 641           && ! ((*insn_data[(int) CODE_FOR_insv].operand[0].predicate)
 642                 (op0, VOIDmode)))
 643         {
 644           rtx tempreg;
 645           enum machine_mode bestmode;
 646
 647           /* Get the mode to use for inserting into this field.  If OP0 is
 648              BLKmode, get the smallest mode consistent with the alignment. If
 649              OP0 is a non-BLKmode object that is no wider than MAXMODE, use its
 650              mode. Otherwise, use the smallest mode containing the field.  */
 651
 652           if (GET_MODE (op0) == BLKmode
 653               || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (maxmode))
 654             bestmode
 655               = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0), maxmode,
 656                                MEM_VOLATILE_P (op0));
 657           else
 658             bestmode = GET_MODE (op0);
 659
 660           if (bestmode == VOIDmode
 661               || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
 662                   && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
 663             goto insv_loses;
 664
 665           /* Adjust address to point to the containing unit of that mode.
 666              Compute offset as multiple of this unit, counting in bytes.  */
 667           unit = GET_MODE_BITSIZE (bestmode);
 668           offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 669           bitpos = bitnum % unit;
 670           op0 = adjust_address (op0, bestmode,  offset);
 671
 672           /* Fetch that unit, store the bitfield in it, then store
 673              the unit.  */
 674           tempreg = copy_to_reg (op0);
 675           store_bit_field (tempreg, bitsize, bitpos, fieldmode, value);
 676           emit_move_insn (op0, tempreg);
 677           return value;
 678         }
 679       volatile_ok = save_volatile_ok;
 680
 681       /* Add OFFSET into OP0's address.  */
 682       if (MEM_P (xop0))
 683         xop0 = adjust_address (xop0, byte_mode, offset);
 684
 685       /* If xop0 is a register, we need it in MAXMODE
 686          to make it acceptable to the format of insv.  */
 687       if (GET_CODE (xop0) == SUBREG)
 688         /* We can't just change the mode, because this might clobber op0,
 689            and we will need the original value of op0 if insv fails.  */
 690         xop0 = gen_rtx_SUBREG (maxmode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 691       if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
 692         xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
 693
 694       /* On big-endian machines, we count bits from the most significant.
 695          If the bit field insn does not, we must invert.  */
 696
 697       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 698         xbitpos = unit - bitsize - xbitpos;
 699
 700       /* We have been counting XBITPOS within UNIT.
 701          Count instead within the size of the register.  */
 702       if (BITS_BIG_ENDIAN && !MEM_P (xop0))
 703         xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
 704
 705       unit = GET_MODE_BITSIZE (maxmode);
 706
 707       /* Convert VALUE to maxmode (which insv insn wants) in VALUE1.  */
 708       value1 = value;
 709       if (GET_MODE (value) != maxmode)
 710         {
 711           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 712             {
 713               /* Optimization: Don't bother really extending VALUE
 714                  if it has all the bits we will actually use.  However,
 715                  if we must narrow it, be sure we do it correctly.  */
 716
 717               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (maxmode))
 718                 {
 719                   rtx tmp;
 720
 721                   tmp = simplify_subreg (maxmode, value1, GET_MODE (value), 0);
 722                   if (! tmp)
 723                     tmp = simplify_gen_subreg (maxmode,
 724                                                force_reg (GET_MODE (value),
 725                                                           value1),
 726                                                GET_MODE (value), 0);
 727                   value1 = tmp;
 728                 }
 729               else
 730                 value1 = gen_lowpart (maxmode, value1);
 731             }
 732           else if (GET_CODE (value) == CONST_INT)
 733             value1 = gen_int_mode (INTVAL (value), maxmode);
 734           else if (!CONSTANT_P (value))
 735             /* Parse phase is supposed to make VALUE's data type
 736                match that of the component reference, which is a type
 737                at least as wide as the field; so VALUE should have
 738                a mode that corresponds to that type.  */
 739             abort ();
 740         }
 741
 742       /* If this machine's insv insists on a register,
 743          get VALUE1 into a register.  */
 744       if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
 745              (value1, maxmode)))
 746         value1 = force_reg (maxmode, value1);
 747
 748       pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
 749       if (pat)
 750         emit_insn (pat);
 751       else
 752         {
 753           delete_insns_since (last);
 754           store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 755         }
 756     }
 757   else
 758     insv_loses:
 759     /* Insv is not available; store using shifts and boolean ops.  */
 760     store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 761   return value;
 762 }
 763 \f
 764 /* Use shifts and boolean operations to store VALUE
 765    into a bit field of width BITSIZE
 766    in a memory location specified by OP0 except offset by OFFSET bytes.
 767      (OFFSET must be 0 if OP0 is a register.)
 768    The field starts at position BITPOS within the byte.
 769     (If OP0 is a register, it may be a full word or a narrower mode,
 770      but BITPOS still counts within a full word,
 771      which is significant on bigendian machines.)  */
 772
 773 static void
 774 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
 775                        unsigned HOST_WIDE_INT bitsize,
 776                        unsigned HOST_WIDE_INT bitpos, rtx value)
 777 {
 778   enum machine_mode mode;
 779   unsigned int total_bits = BITS_PER_WORD;
 780   rtx subtarget, temp;
 781   int all_zero = 0;
 782   int all_one = 0;
 783
 784   /* There is a case not handled here:
 785      a structure with a known alignment of just a halfword
 786      and a field split across two aligned halfwords within the structure.
 787      Or likewise a structure with a known alignment of just a byte
 788      and a field split across two bytes.
 789      Such cases are not supposed to be able to occur.  */
 790
 791   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 792     {
 793       if (offset != 0)
 794         abort ();
 795       /* Special treatment for a bit field split across two registers.  */
 796       if (bitsize + bitpos > BITS_PER_WORD)
 797         {
 798           store_split_bit_field (op0, bitsize, bitpos, value);
 799           return;
 800         }
 801     }
 802   else
 803     {
 804       /* Get the proper mode to use for this field.  We want a mode that
 805          includes the entire field.  If such a mode would be larger than
 806          a word, we won't be doing the extraction the normal way.
 807          We don't want a mode bigger than the destination.  */
 808
 809       mode = GET_MODE (op0);
 810       if (GET_MODE_BITSIZE (mode) == 0
 811           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 812         mode = word_mode;
 813       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 814                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 815
 816       if (mode == VOIDmode)
 817         {
 818           /* The only way this should occur is if the field spans word
 819              boundaries.  */
 820           store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
 821                                  value);
 822           return;
 823         }
 824
 825       total_bits = GET_MODE_BITSIZE (mode);
 826
 827       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 828          be in the range 0 to total_bits-1, and put any excess bytes in
 829          OFFSET.  */
 830       if (bitpos >= total_bits)
 831         {
 832           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 833           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 834                      * BITS_PER_UNIT);
 835         }
 836
 837       /* Get ref to an aligned byte, halfword, or word containing the field.
 838          Adjust BITPOS to be position within a word,
 839          and OFFSET to be the offset of that word.
 840          Then alter OP0 to refer to that word.  */
 841       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 842       offset -= (offset % (total_bits / BITS_PER_UNIT));
 843       op0 = adjust_address (op0, mode, offset);
 844     }
 845
 846   mode = GET_MODE (op0);
 847
 848   /* Now MODE is either some integral mode for a MEM as OP0,
 849      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 850      The bit field is contained entirely within OP0.
 851      BITPOS is the starting bit number within OP0.
 852      (OP0's mode may actually be narrower than MODE.)  */
 853
 854   if (BYTES_BIG_ENDIAN)
 855       /* BITPOS is the distance between our msb
 856          and that of the containing datum.
 857          Convert it to the distance from the lsb.  */
 858       bitpos = total_bits - bitsize - bitpos;
 859
 860   /* Now BITPOS is always the distance between our lsb
 861      and that of OP0.  */
 862
 863   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 864      we must first convert its mode to MODE.  */
 865
 866   if (GET_CODE (value) == CONST_INT)
 867     {
 868       HOST_WIDE_INT v = INTVAL (value);
 869
 870       if (bitsize < HOST_BITS_PER_WIDE_INT)
 871         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 872
 873       if (v == 0)
 874         all_zero = 1;
 875       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 876                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 877                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 878         all_one = 1;
 879
 880       value = lshift_value (mode, value, bitpos, bitsize);
 881     }
 882   else
 883     {
 884       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 885                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 886
 887       if (GET_MODE (value) != mode)
 888         {
 889           if ((REG_P (value) || GET_CODE (value) == SUBREG)
 890               && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
 891             value = gen_lowpart (mode, value);
 892           else
 893             value = convert_to_mode (mode, value, 1);
 894         }
 895
 896       if (must_and)
 897         value = expand_binop (mode, and_optab, value,
 898                               mask_rtx (mode, 0, bitsize, 0),
 899                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
 900       if (bitpos > 0)
 901         value = expand_shift (LSHIFT_EXPR, mode, value,
 902                               build_int_cst (NULL_TREE, bitpos), NULL_RTX, 1);
 903     }
 904
 905   /* Now clear the chosen bits in OP0,
 906      except that if VALUE is -1 we need not bother.  */
 907
 908   subtarget = (REG_P (op0) || ! flag_force_mem) ? op0 : 0;
 909
 910   if (! all_one)
 911     {
 912       temp = expand_binop (mode, and_optab, op0,
 913                            mask_rtx (mode, bitpos, bitsize, 1),
 914                            subtarget, 1, OPTAB_LIB_WIDEN);
 915       subtarget = temp;
 916     }
 917   else
 918     temp = op0;
 919
 920   /* Now logical-or VALUE into OP0, unless it is zero.  */
 921
 922   if (! all_zero)
 923     temp = expand_binop (mode, ior_optab, temp, value,
 924                          subtarget, 1, OPTAB_LIB_WIDEN);
 925   if (op0 != temp)
 926     emit_move_insn (op0, temp);
 927 }
 928 \f
 929 /* Store a bit field that is split across multiple accessible memory objects.
 930
 931    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
 932    BITSIZE is the field width; BITPOS the position of its first bit
 933    (within the word).
 934    VALUE is the value to store.
 935
 936    This does not yet handle fields wider than BITS_PER_WORD.  */
 937
 938 static void
 939 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
 940                        unsigned HOST_WIDE_INT bitpos, rtx value)
 941 {
 942   unsigned int unit;
 943   unsigned int bitsdone = 0;
 944
 945   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
 946      much at a time.  */
 947   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 948     unit = BITS_PER_WORD;
 949   else
 950     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
 951
 952   /* If VALUE is a constant other than a CONST_INT, get it into a register in
 953      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
 954      that VALUE might be a floating-point constant.  */
 955   if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
 956     {
 957       rtx word = gen_lowpart_common (word_mode, value);
 958
 959       if (word && (value != word))
 960         value = word;
 961       else
 962         value = gen_lowpart_common (word_mode,
 963                                     force_reg (GET_MODE (value) != VOIDmode
 964                                                ? GET_MODE (value)
 965                                                : word_mode, value));
 966     }
 967
 968   while (bitsdone < bitsize)
 969     {
 970       unsigned HOST_WIDE_INT thissize;
 971       rtx part, word;
 972       unsigned HOST_WIDE_INT thispos;
 973       unsigned HOST_WIDE_INT offset;
 974
 975       offset = (bitpos + bitsdone) / unit;
 976       thispos = (bitpos + bitsdone) % unit;
 977
 978       /* THISSIZE must not overrun a word boundary.  Otherwise,
 979          store_fixed_bit_field will call us again, and we will mutually
 980          recurse forever.  */
 981       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
 982       thissize = MIN (thissize, unit - thispos);
 983
 984       if (BYTES_BIG_ENDIAN)
 985         {
 986           int total_bits;
 987
 988           /* We must do an endian conversion exactly the same way as it is
 989              done in extract_bit_field, so that the two calls to
 990              extract_fixed_bit_field will have comparable arguments.  */
 991           if (!MEM_P (value) || GET_MODE (value) == BLKmode)
 992             total_bits = BITS_PER_WORD;
 993           else
 994             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
 995
 996           /* Fetch successively less significant portions.  */
 997           if (GET_CODE (value) == CONST_INT)
 998             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 999                              >> (bitsize - bitsdone - thissize))
1000                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1001           else
1002             /* The args are chosen so that the last part includes the
1003                lsb.  Give extract_bit_field the value it needs (with
1004                endianness compensation) to fetch the piece we want.  */
1005             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1006                                             total_bits - bitsize + bitsdone,
1007                                             NULL_RTX, 1);
1008         }
1009       else
1010         {
1011           /* Fetch successively more significant portions.  */
1012           if (GET_CODE (value) == CONST_INT)
1013             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1014                              >> bitsdone)
1015                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1016           else
1017             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1018                                             bitsdone, NULL_RTX, 1);
1019         }
1020
1021       /* If OP0 is a register, then handle OFFSET here.
1022
1023          When handling multiword bitfields, extract_bit_field may pass
1024          down a word_mode SUBREG of a larger REG for a bitfield that actually
1025          crosses a word boundary.  Thus, for a SUBREG, we must find
1026          the current word starting from the base register.  */
1027       if (GET_CODE (op0) == SUBREG)
1028         {
1029           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1030           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1031                                         GET_MODE (SUBREG_REG (op0)));
1032           offset = 0;
1033         }
1034       else if (REG_P (op0))
1035         {
1036           word = operand_subword_force (op0, offset, GET_MODE (op0));
1037           offset = 0;
1038         }
1039       else
1040         word = op0;
1041
1042       /* OFFSET is in UNITs, and UNIT is in bits.
1043          store_fixed_bit_field wants offset in bytes.  */
1044       store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1045                              thispos, part);
1046       bitsdone += thissize;
1047     }
1048 }
1049 \f
1050 /* Generate code to extract a byte-field from STR_RTX
1051    containing BITSIZE bits, starting at BITNUM,
1052    and put it in TARGET if possible (if TARGET is nonzero).
1053    Regardless of TARGET, we return the rtx for where the value is placed.
1054
1055    STR_RTX is the structure containing the byte (a REG or MEM).
1056    UNSIGNEDP is nonzero if this is an unsigned bit field.
1057    MODE is the natural mode of the field value once extracted.
1058    TMODE is the mode the caller would like the value to have;
1059    but the value may be returned with type MODE instead.
1060
1061    TOTAL_SIZE is the size in bytes of the containing structure,
1062    or -1 if varying.
1063
1064    If a TARGET is specified and we can store in it at no extra cost,
1065    we do so, and return TARGET.
1066    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1067    if they are equally easy.  */
1068
1069 rtx
1070 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1071                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1072                    enum machine_mode mode, enum machine_mode tmode)
1073 {
1074   unsigned int unit
1075     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1076   unsigned HOST_WIDE_INT offset = bitnum / unit;
1077   unsigned HOST_WIDE_INT bitpos = bitnum % unit;
1078   rtx op0 = str_rtx;
1079   rtx spec_target = target;
1080   rtx spec_target_subreg = 0;
1081   enum machine_mode int_mode;
1082   enum machine_mode extv_mode = mode_for_extraction (EP_extv, 0);
1083   enum machine_mode extzv_mode = mode_for_extraction (EP_extzv, 0);
1084   enum machine_mode mode1;
1085   int byte_offset;
1086
1087   if (tmode == VOIDmode)
1088     tmode = mode;
1089
1090   while (GET_CODE (op0) == SUBREG)
1091     {
1092       bitpos += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1093       if (bitpos > unit)
1094         {
1095           offset += (bitpos / unit);
1096           bitpos %= unit;
1097         }
1098       op0 = SUBREG_REG (op0);
1099     }
1100
1101   if (REG_P (op0)
1102       && mode == GET_MODE (op0)
1103       && bitnum == 0
1104       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1105     {
1106       /* We're trying to extract a full register from itself.  */
1107       return op0;
1108     }
1109
1110   /* Use vec_extract patterns for extracting parts of vectors whenever
1111      available.  */
1112   if (VECTOR_MODE_P (GET_MODE (op0))
1113       && !MEM_P (op0)
1114       && (vec_extract_optab->handlers[GET_MODE (op0)].insn_code
1115           != CODE_FOR_nothing)
1116       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1117           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1118     {
1119       enum machine_mode outermode = GET_MODE (op0);
1120       enum machine_mode innermode = GET_MODE_INNER (outermode);
1121       int icode = (int) vec_extract_optab->handlers[outermode].insn_code;
1122       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1123       rtx rtxpos = GEN_INT (pos);
1124       rtx src = op0;
1125       rtx dest = NULL, pat, seq;
1126       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
1127       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
1128       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
1129
1130       if (innermode == tmode || innermode == mode)
1131         dest = target;
1132
1133       if (!dest)
1134         dest = gen_reg_rtx (innermode);
1135
1136       start_sequence ();
1137
1138       if (! (*insn_data[icode].operand[0].predicate) (dest, mode0))
1139         dest = copy_to_mode_reg (mode0, dest);
1140
1141       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
1142         src = copy_to_mode_reg (mode1, src);
1143
1144       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1145         rtxpos = copy_to_mode_reg (mode1, rtxpos);
1146
1147       /* We could handle this, but we should always be called with a pseudo
1148          for our targets and all insns should take them as outputs.  */
1149       if (! (*insn_data[icode].operand[0].predicate) (dest, mode0)
1150           || ! (*insn_data[icode].operand[1].predicate) (src, mode1)
1151           || ! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1152         abort ();
1153
1154       pat = GEN_FCN (icode) (dest, src, rtxpos);
1155       seq = get_insns ();
1156       end_sequence ();
1157       if (pat)
1158         {
1159           emit_insn (seq);
1160           emit_insn (pat);
1161           return dest;
1162         }
1163     }
1164
1165   /* Make sure we are playing with integral modes.  Pun with subregs
1166      if we aren't.  */
1167   {
1168     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1169     if (imode != GET_MODE (op0))
1170       {
1171         if (MEM_P (op0))
1172           op0 = adjust_address (op0, imode, 0);
1173         else if (imode != BLKmode)
1174           op0 = gen_lowpart (imode, op0);
1175         else
1176           abort ();
1177       }
1178   }
1179
1180   /* We may be accessing data outside the field, which means
1181      we can alias adjacent data.  */
1182   if (MEM_P (op0))
1183     {
1184       op0 = shallow_copy_rtx (op0);
1185       set_mem_alias_set (op0, 0);
1186       set_mem_expr (op0, 0);
1187     }
1188
1189   /* Extraction of a full-word or multi-word value from a structure
1190      in a register or aligned memory can be done with just a SUBREG.
1191      A subword value in the least significant part of a register
1192      can also be extracted with a SUBREG.  For this, we need the
1193      byte offset of the value in op0.  */
1194
1195   byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1196
1197   /* If OP0 is a register, BITPOS must count within a word.
1198      But as we have it, it counts within whatever size OP0 now has.
1199      On a bigendian machine, these are not the same, so convert.  */
1200   if (BYTES_BIG_ENDIAN
1201       && !MEM_P (op0)
1202       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1203     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1204
1205   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1206      If that's wrong, the solution is to test for it and set TARGET to 0
1207      if needed.  */
1208
1209   /* Only scalar integer modes can be converted via subregs.  There is an
1210      additional problem for FP modes here in that they can have a precision
1211      which is different from the size.  mode_for_size uses precision, but
1212      we want a mode based on the size, so we must avoid calling it for FP
1213      modes.  */
1214   mode1  = (SCALAR_INT_MODE_P (tmode)
1215             ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1216             : mode);
1217
1218   if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1219         && bitpos % BITS_PER_WORD == 0)
1220        || (mode1 != BLKmode
1221            /* ??? The big endian test here is wrong.  This is correct
1222               if the value is in a register, and if mode_for_size is not
1223               the same mode as op0.  This causes us to get unnecessarily
1224               inefficient code from the Thumb port when -mbig-endian.  */
1225            && (BYTES_BIG_ENDIAN
1226                ? bitpos + bitsize == BITS_PER_WORD
1227                : bitpos == 0)))
1228       && ((!MEM_P (op0)
1229            && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
1230                                      GET_MODE_BITSIZE (GET_MODE (op0)))
1231            && GET_MODE_SIZE (mode1) != 0
1232            && byte_offset % GET_MODE_SIZE (mode1) == 0)
1233           || (MEM_P (op0)
1234               && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1235                   || (offset * BITS_PER_UNIT % bitsize == 0
1236                       && MEM_ALIGN (op0) % bitsize == 0)))))
1237     {
1238       if (mode1 != GET_MODE (op0))
1239         {
1240           if (GET_CODE (op0) == SUBREG)
1241             {
1242               if (GET_MODE (SUBREG_REG (op0)) == mode1
1243                   || GET_MODE_CLASS (mode1) == MODE_INT
1244                   || GET_MODE_CLASS (mode1) == MODE_PARTIAL_INT)
1245                 op0 = SUBREG_REG (op0);
1246               else
1247                 /* Else we've got some float mode source being extracted into
1248                    a different float mode destination -- this combination of
1249                    subregs results in Severe Tire Damage.  */
1250                 goto no_subreg_mode_swap;
1251             }
1252           if (REG_P (op0))
1253             op0 = gen_rtx_SUBREG (mode1, op0, byte_offset);
1254           else
1255             op0 = adjust_address (op0, mode1, offset);
1256         }
1257       if (mode1 != mode)
1258         return convert_to_mode (tmode, op0, unsignedp);
1259       return op0;
1260     }
1261  no_subreg_mode_swap:
1262
1263   /* Handle fields bigger than a word.  */
1264
1265   if (bitsize > BITS_PER_WORD)
1266     {
1267       /* Here we transfer the words of the field
1268          in the order least significant first.
1269          This is because the most significant word is the one which may
1270          be less than full.  */
1271
1272       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1273       unsigned int i;
1274
1275       if (target == 0 || !REG_P (target))
1276         target = gen_reg_rtx (mode);
1277
1278       /* Indicate for flow that the entire target reg is being set.  */
1279       emit_insn (gen_rtx_CLOBBER (VOIDmode, target));
1280
1281       for (i = 0; i < nwords; i++)
1282         {
1283           /* If I is 0, use the low-order word in both field and target;
1284              if I is 1, use the next to lowest word; and so on.  */
1285           /* Word number in TARGET to use.  */
1286           unsigned int wordnum
1287             = (WORDS_BIG_ENDIAN
1288                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1289                : i);
1290           /* Offset from start of field in OP0.  */
1291           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1292                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1293                                                 * (int) BITS_PER_WORD))
1294                                      : (int) i * BITS_PER_WORD);
1295           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1296           rtx result_part
1297             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1298                                            bitsize - i * BITS_PER_WORD),
1299                                  bitnum + bit_offset, 1, target_part, mode,
1300                                  word_mode);
1301
1302           if (target_part == 0)
1303             abort ();
1304
1305           if (result_part != target_part)
1306             emit_move_insn (target_part, result_part);
1307         }
1308
1309       if (unsignedp)
1310         {
1311           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1312              need to be zero'd out.  */
1313           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1314             {
1315               unsigned int i, total_words;
1316
1317               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1318               for (i = nwords; i < total_words; i++)
1319                 emit_move_insn
1320                   (operand_subword (target,
1321                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1322                                     1, VOIDmode),
1323                    const0_rtx);
1324             }
1325           return target;
1326         }
1327
1328       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1329       target = expand_shift (LSHIFT_EXPR, mode, target,
1330                              build_int_cst (NULL_TREE,
1331                                             GET_MODE_BITSIZE (mode) - bitsize),
1332                              NULL_RTX, 0);
1333       return expand_shift (RSHIFT_EXPR, mode, target,
1334                            build_int_cst (NULL_TREE,
1335                                           GET_MODE_BITSIZE (mode) - bitsize),
1336                            NULL_RTX, 0);
1337     }
1338
1339   /* From here on we know the desired field is smaller than a word.  */
1340
1341   /* Check if there is a correspondingly-sized integer field, so we can
1342      safely extract it as one size of integer, if necessary; then
1343      truncate or extend to the size that is wanted; then use SUBREGs or
1344      convert_to_mode to get one of the modes we really wanted.  */
1345
1346   int_mode = int_mode_for_mode (tmode);
1347   if (int_mode == BLKmode)
1348     int_mode = int_mode_for_mode (mode);
1349   if (int_mode == BLKmode)
1350     abort ();    /* Should probably push op0 out to memory and then
1351                     do a load.  */
1352
1353   /* OFFSET is the number of words or bytes (UNIT says which)
1354      from STR_RTX to the first word or byte containing part of the field.  */
1355
1356   if (!MEM_P (op0))
1357     {
1358       if (offset != 0
1359           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1360         {
1361           if (!REG_P (op0))
1362             op0 = copy_to_reg (op0);
1363           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1364                                 op0, (offset * UNITS_PER_WORD));
1365         }
1366       offset = 0;
1367     }
1368
1369   /* Now OFFSET is nonzero only for memory operands.  */
1370
1371   if (unsignedp)
1372     {
1373       if (HAVE_extzv
1374           && (GET_MODE_BITSIZE (extzv_mode) >= bitsize)
1375           && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1376                 && (bitsize + bitpos > GET_MODE_BITSIZE (extzv_mode))))
1377         {
1378           unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1379           rtx bitsize_rtx, bitpos_rtx;
1380           rtx last = get_last_insn ();
1381           rtx xop0 = op0;
1382           rtx xtarget = target;
1383           rtx xspec_target = spec_target;
1384           rtx xspec_target_subreg = spec_target_subreg;
1385           rtx pat;
1386           enum machine_mode maxmode = mode_for_extraction (EP_extzv, 0);
1387
1388           if (MEM_P (xop0))
1389             {
1390               int save_volatile_ok = volatile_ok;
1391               volatile_ok = 1;
1392
1393               /* Is the memory operand acceptable?  */
1394               if (! ((*insn_data[(int) CODE_FOR_extzv].operand[1].predicate)
1395                      (xop0, GET_MODE (xop0))))
1396                 {
1397                   /* No, load into a reg and extract from there.  */
1398                   enum machine_mode bestmode;
1399
1400                   /* Get the mode to use for inserting into this field.  If
1401                      OP0 is BLKmode, get the smallest mode consistent with the
1402                      alignment. If OP0 is a non-BLKmode object that is no
1403                      wider than MAXMODE, use its mode. Otherwise, use the
1404                      smallest mode containing the field.  */
1405
1406                   if (GET_MODE (xop0) == BLKmode
1407                       || (GET_MODE_SIZE (GET_MODE (op0))
1408                           > GET_MODE_SIZE (maxmode)))
1409                     bestmode = get_best_mode (bitsize, bitnum,
1410                                               MEM_ALIGN (xop0), maxmode,
1411                                               MEM_VOLATILE_P (xop0));
1412                   else
1413                     bestmode = GET_MODE (xop0);
1414
1415                   if (bestmode == VOIDmode
1416                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1417                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1418                     goto extzv_loses;
1419
1420                   /* Compute offset as multiple of this unit,
1421                      counting in bytes.  */
1422                   unit = GET_MODE_BITSIZE (bestmode);
1423                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1424                   xbitpos = bitnum % unit;
1425                   xop0 = adjust_address (xop0, bestmode, xoffset);
1426
1427                   /* Fetch it to a register in that size.  */
1428                   xop0 = force_reg (bestmode, xop0);
1429
1430                   /* XBITPOS counts within UNIT, which is what is expected.  */
1431                 }
1432               else
1433                 /* Get ref to first byte containing part of the field.  */
1434                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1435
1436               volatile_ok = save_volatile_ok;
1437             }
1438
1439           /* If op0 is a register, we need it in MAXMODE (which is usually
1440              SImode). to make it acceptable to the format of extzv.  */
1441           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1442             goto extzv_loses;
1443           if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1444             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1445
1446           /* On big-endian machines, we count bits from the most significant.
1447              If the bit field insn does not, we must invert.  */
1448           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1449             xbitpos = unit - bitsize - xbitpos;
1450
1451           /* Now convert from counting within UNIT to counting in MAXMODE.  */
1452           if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1453             xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
1454
1455           unit = GET_MODE_BITSIZE (maxmode);
1456
1457           if (xtarget == 0
1458               || (flag_force_mem && MEM_P (xtarget)))
1459             xtarget = xspec_target = gen_reg_rtx (tmode);
1460
1461           if (GET_MODE (xtarget) != maxmode)
1462             {
1463               if (REG_P (xtarget))
1464                 {
1465                   int wider = (GET_MODE_SIZE (maxmode)
1466                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1467                   xtarget = gen_lowpart (maxmode, xtarget);
1468                   if (wider)
1469                     xspec_target_subreg = xtarget;
1470                 }
1471               else
1472                 xtarget = gen_reg_rtx (maxmode);
1473             }
1474
1475           /* If this machine's extzv insists on a register target,
1476              make sure we have one.  */
1477           if (! ((*insn_data[(int) CODE_FOR_extzv].operand[0].predicate)
1478                  (xtarget, maxmode)))
1479             xtarget = gen_reg_rtx (maxmode);
1480
1481           bitsize_rtx = GEN_INT (bitsize);
1482           bitpos_rtx = GEN_INT (xbitpos);
1483
1484           pat = gen_extzv (xtarget, xop0, bitsize_rtx, bitpos_rtx);
1485           if (pat)
1486             {
1487               emit_insn (pat);
1488               target = xtarget;
1489               spec_target = xspec_target;
1490               spec_target_subreg = xspec_target_subreg;
1491             }
1492           else
1493             {
1494               delete_insns_since (last);
1495               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1496                                                 bitpos, target, 1);
1497             }
1498         }
1499       else
1500       extzv_loses:
1501         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1502                                           bitpos, target, 1);
1503     }
1504   else
1505     {
1506       if (HAVE_extv
1507           && (GET_MODE_BITSIZE (extv_mode) >= bitsize)
1508           && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1509                 && (bitsize + bitpos > GET_MODE_BITSIZE (extv_mode))))
1510         {
1511           int xbitpos = bitpos, xoffset = offset;
1512           rtx bitsize_rtx, bitpos_rtx;
1513           rtx last = get_last_insn ();
1514           rtx xop0 = op0, xtarget = target;
1515           rtx xspec_target = spec_target;
1516           rtx xspec_target_subreg = spec_target_subreg;
1517           rtx pat;
1518           enum machine_mode maxmode = mode_for_extraction (EP_extv, 0);
1519
1520           if (MEM_P (xop0))
1521             {
1522               /* Is the memory operand acceptable?  */
1523               if (! ((*insn_data[(int) CODE_FOR_extv].operand[1].predicate)
1524                      (xop0, GET_MODE (xop0))))
1525                 {
1526                   /* No, load into a reg and extract from there.  */
1527                   enum machine_mode bestmode;
1528
1529                   /* Get the mode to use for inserting into this field.  If
1530                      OP0 is BLKmode, get the smallest mode consistent with the
1531                      alignment. If OP0 is a non-BLKmode object that is no
1532                      wider than MAXMODE, use its mode. Otherwise, use the
1533                      smallest mode containing the field.  */
1534
1535                   if (GET_MODE (xop0) == BLKmode
1536                       || (GET_MODE_SIZE (GET_MODE (op0))
1537                           > GET_MODE_SIZE (maxmode)))
1538                     bestmode = get_best_mode (bitsize, bitnum,
1539                                               MEM_ALIGN (xop0), maxmode,
1540                                               MEM_VOLATILE_P (xop0));
1541                   else
1542                     bestmode = GET_MODE (xop0);
1543
1544                   if (bestmode == VOIDmode
1545                       || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1546                           && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1547                     goto extv_loses;
1548
1549                   /* Compute offset as multiple of this unit,
1550                      counting in bytes.  */
1551                   unit = GET_MODE_BITSIZE (bestmode);
1552                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1553                   xbitpos = bitnum % unit;
1554                   xop0 = adjust_address (xop0, bestmode, xoffset);
1555
1556                   /* Fetch it to a register in that size.  */
1557                   xop0 = force_reg (bestmode, xop0);
1558
1559                   /* XBITPOS counts within UNIT, which is what is expected.  */
1560                 }
1561               else
1562                 /* Get ref to first byte containing part of the field.  */
1563                 xop0 = adjust_address (xop0, byte_mode, xoffset);
1564             }
1565
1566           /* If op0 is a register, we need it in MAXMODE (which is usually
1567              SImode) to make it acceptable to the format of extv.  */
1568           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1569             goto extv_loses;
1570           if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1571             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1572
1573           /* On big-endian machines, we count bits from the most significant.
1574              If the bit field insn does not, we must invert.  */
1575           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1576             xbitpos = unit - bitsize - xbitpos;
1577
1578           /* XBITPOS counts within a size of UNIT.
1579              Adjust to count within a size of MAXMODE.  */
1580           if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1581             xbitpos += (GET_MODE_BITSIZE (maxmode) - unit);
1582
1583           unit = GET_MODE_BITSIZE (maxmode);
1584
1585           if (xtarget == 0
1586               || (flag_force_mem && MEM_P (xtarget)))
1587             xtarget = xspec_target = gen_reg_rtx (tmode);
1588
1589           if (GET_MODE (xtarget) != maxmode)
1590             {
1591               if (REG_P (xtarget))
1592                 {
1593                   int wider = (GET_MODE_SIZE (maxmode)
1594                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1595                   xtarget = gen_lowpart (maxmode, xtarget);
1596                   if (wider)
1597                     xspec_target_subreg = xtarget;
1598                 }
1599               else
1600                 xtarget = gen_reg_rtx (maxmode);
1601             }
1602
1603           /* If this machine's extv insists on a register target,
1604              make sure we have one.  */
1605           if (! ((*insn_data[(int) CODE_FOR_extv].operand[0].predicate)
1606                  (xtarget, maxmode)))
1607             xtarget = gen_reg_rtx (maxmode);
1608
1609           bitsize_rtx = GEN_INT (bitsize);
1610           bitpos_rtx = GEN_INT (xbitpos);
1611
1612           pat = gen_extv (xtarget, xop0, bitsize_rtx, bitpos_rtx);
1613           if (pat)
1614             {
1615               emit_insn (pat);
1616               target = xtarget;
1617               spec_target = xspec_target;
1618               spec_target_subreg = xspec_target_subreg;
1619             }
1620           else
1621             {
1622               delete_insns_since (last);
1623               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1624                                                 bitpos, target, 0);
1625             }
1626         }
1627       else
1628       extv_loses:
1629         target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1630                                           bitpos, target, 0);
1631     }
1632   if (target == spec_target)
1633     return target;
1634   if (target == spec_target_subreg)
1635     return spec_target;
1636   if (GET_MODE (target) != tmode && GET_MODE (target) != mode)
1637     {
1638       /* If the target mode is floating-point, first convert to the
1639          integer mode of that size and then access it as a floating-point
1640          value via a SUBREG.  */
1641       if (GET_MODE_CLASS (tmode) != MODE_INT
1642           && GET_MODE_CLASS (tmode) != MODE_PARTIAL_INT)
1643         {
1644           target = convert_to_mode (mode_for_size (GET_MODE_BITSIZE (tmode),
1645                                                    MODE_INT, 0),
1646                                     target, unsignedp);
1647           return gen_lowpart (tmode, target);
1648         }
1649       else
1650         return convert_to_mode (tmode, target, unsignedp);
1651     }
1652   return target;
1653 }
1654 \f
1655 /* Extract a bit field using shifts and boolean operations
1656    Returns an rtx to represent the value.
1657    OP0 addresses a register (word) or memory (byte).
1658    BITPOS says which bit within the word or byte the bit field starts in.
1659    OFFSET says how many bytes farther the bit field starts;
1660     it is 0 if OP0 is a register.
1661    BITSIZE says how many bits long the bit field is.
1662     (If OP0 is a register, it may be narrower than a full word,
1663      but BITPOS still counts within a full word,
1664      which is significant on bigendian machines.)
1665
1666    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1667    If TARGET is nonzero, attempts to store the value there
1668    and return TARGET, but this is not guaranteed.
1669    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1670
1671 static rtx
1672 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1673                          unsigned HOST_WIDE_INT offset,
1674                          unsigned HOST_WIDE_INT bitsize,
1675                          unsigned HOST_WIDE_INT bitpos, rtx target,
1676                          int unsignedp)
1677 {
1678   unsigned int total_bits = BITS_PER_WORD;
1679   enum machine_mode mode;
1680
1681   if (GET_CODE (op0) == SUBREG || REG_P (op0))
1682     {
1683       /* Special treatment for a bit field split across two registers.  */
1684       if (bitsize + bitpos > BITS_PER_WORD)
1685         return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1686     }
1687   else
1688     {
1689       /* Get the proper mode to use for this field.  We want a mode that
1690          includes the entire field.  If such a mode would be larger than
1691          a word, we won't be doing the extraction the normal way.  */
1692
1693       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1694                             MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1695
1696       if (mode == VOIDmode)
1697         /* The only way this should occur is if the field spans word
1698            boundaries.  */
1699         return extract_split_bit_field (op0, bitsize,
1700                                         bitpos + offset * BITS_PER_UNIT,
1701                                         unsignedp);
1702
1703       total_bits = GET_MODE_BITSIZE (mode);
1704
1705       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1706          be in the range 0 to total_bits-1, and put any excess bytes in
1707          OFFSET.  */
1708       if (bitpos >= total_bits)
1709         {
1710           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1711           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1712                      * BITS_PER_UNIT);
1713         }
1714
1715       /* Get ref to an aligned byte, halfword, or word containing the field.
1716          Adjust BITPOS to be position within a word,
1717          and OFFSET to be the offset of that word.
1718          Then alter OP0 to refer to that word.  */
1719       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1720       offset -= (offset % (total_bits / BITS_PER_UNIT));
1721       op0 = adjust_address (op0, mode, offset);
1722     }
1723
1724   mode = GET_MODE (op0);
1725
1726   if (BYTES_BIG_ENDIAN)
1727     /* BITPOS is the distance between our msb and that of OP0.
1728        Convert it to the distance from the lsb.  */
1729     bitpos = total_bits - bitsize - bitpos;
1730
1731   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1732      We have reduced the big-endian case to the little-endian case.  */
1733
1734   if (unsignedp)
1735     {
1736       if (bitpos)
1737         {
1738           /* If the field does not already start at the lsb,
1739              shift it so it does.  */
1740           tree amount = build_int_cst (NULL_TREE, bitpos);
1741           /* Maybe propagate the target for the shift.  */
1742           /* But not if we will return it--could confuse integrate.c.  */
1743           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1744           if (tmode != mode) subtarget = 0;
1745           op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1746         }
1747       /* Convert the value to the desired mode.  */
1748       if (mode != tmode)
1749         op0 = convert_to_mode (tmode, op0, 1);
1750
1751       /* Unless the msb of the field used to be the msb when we shifted,
1752          mask out the upper bits.  */
1753
1754       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1755         return expand_binop (GET_MODE (op0), and_optab, op0,
1756                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1757                              target, 1, OPTAB_LIB_WIDEN);
1758       return op0;
1759     }
1760
1761   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1762      then arithmetic-shift its lsb to the lsb of the word.  */
1763   op0 = force_reg (mode, op0);
1764   if (mode != tmode)
1765     target = 0;
1766
1767   /* Find the narrowest integer mode that contains the field.  */
1768
1769   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1770        mode = GET_MODE_WIDER_MODE (mode))
1771     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1772       {
1773         op0 = convert_to_mode (mode, op0, 0);
1774         break;
1775       }
1776
1777   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1778     {
1779       tree amount
1780         = build_int_cst (NULL_TREE,
1781                          GET_MODE_BITSIZE (mode) - (bitsize + bitpos));
1782       /* Maybe propagate the target for the shift.  */
1783       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1784       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1785     }
1786
1787   return expand_shift (RSHIFT_EXPR, mode, op0,
1788                        build_int_cst (NULL_TREE,
1789                                       GET_MODE_BITSIZE (mode) - bitsize),
1790                        target, 0);
1791 }
1792 \f
1793 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1794    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1795    complement of that if COMPLEMENT.  The mask is truncated if
1796    necessary to the width of mode MODE.  The mask is zero-extended if
1797    BITSIZE+BITPOS is too small for MODE.  */
1798
1799 static rtx
1800 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1801 {
1802   HOST_WIDE_INT masklow, maskhigh;
1803
1804   if (bitsize == 0)
1805     masklow = 0;
1806   else if (bitpos < HOST_BITS_PER_WIDE_INT)
1807     masklow = (HOST_WIDE_INT) -1 << bitpos;
1808   else
1809     masklow = 0;
1810
1811   if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1812     masklow &= ((unsigned HOST_WIDE_INT) -1
1813                 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1814
1815   if (bitpos <= HOST_BITS_PER_WIDE_INT)
1816     maskhigh = -1;
1817   else
1818     maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1819
1820   if (bitsize == 0)
1821     maskhigh = 0;
1822   else if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1823     maskhigh &= ((unsigned HOST_WIDE_INT) -1
1824                  >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1825   else
1826     maskhigh = 0;
1827
1828   if (complement)
1829     {
1830       maskhigh = ~maskhigh;
1831       masklow = ~masklow;
1832     }
1833
1834   return immed_double_const (masklow, maskhigh, mode);
1835 }
1836
1837 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1838    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1839
1840 static rtx
1841 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1842 {
1843   unsigned HOST_WIDE_INT v = INTVAL (value);
1844   HOST_WIDE_INT low, high;
1845
1846   if (bitsize < HOST_BITS_PER_WIDE_INT)
1847     v &= ~((HOST_WIDE_INT) -1 << bitsize);
1848
1849   if (bitpos < HOST_BITS_PER_WIDE_INT)
1850     {
1851       low = v << bitpos;
1852       high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1853     }
1854   else
1855     {
1856       low = 0;
1857       high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1858     }
1859
1860   return immed_double_const (low, high, mode);
1861 }
1862 \f
1863 /* Extract a bit field that is split across two words
1864    and return an RTX for the result.
1865
1866    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1867    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1868    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1869
1870 static rtx
1871 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1872                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1873 {
1874   unsigned int unit;
1875   unsigned int bitsdone = 0;
1876   rtx result = NULL_RTX;
1877   int first = 1;
1878
1879   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1880      much at a time.  */
1881   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1882     unit = BITS_PER_WORD;
1883   else
1884     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1885
1886   while (bitsdone < bitsize)
1887     {
1888       unsigned HOST_WIDE_INT thissize;
1889       rtx part, word;
1890       unsigned HOST_WIDE_INT thispos;
1891       unsigned HOST_WIDE_INT offset;
1892
1893       offset = (bitpos + bitsdone) / unit;
1894       thispos = (bitpos + bitsdone) % unit;
1895
1896       /* THISSIZE must not overrun a word boundary.  Otherwise,
1897          extract_fixed_bit_field will call us again, and we will mutually
1898          recurse forever.  */
1899       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1900       thissize = MIN (thissize, unit - thispos);
1901
1902       /* If OP0 is a register, then handle OFFSET here.
1903
1904          When handling multiword bitfields, extract_bit_field may pass
1905          down a word_mode SUBREG of a larger REG for a bitfield that actually
1906          crosses a word boundary.  Thus, for a SUBREG, we must find
1907          the current word starting from the base register.  */
1908       if (GET_CODE (op0) == SUBREG)
1909         {
1910           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1911           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1912                                         GET_MODE (SUBREG_REG (op0)));
1913           offset = 0;
1914         }
1915       else if (REG_P (op0))
1916         {
1917           word = operand_subword_force (op0, offset, GET_MODE (op0));
1918           offset = 0;
1919         }
1920       else
1921         word = op0;
1922
1923       /* Extract the parts in bit-counting order,
1924          whose meaning is determined by BYTES_PER_UNIT.
1925          OFFSET is in UNITs, and UNIT is in bits.
1926          extract_fixed_bit_field wants offset in bytes.  */
1927       part = extract_fixed_bit_field (word_mode, word,
1928                                       offset * unit / BITS_PER_UNIT,
1929                                       thissize, thispos, 0, 1);
1930       bitsdone += thissize;
1931
1932       /* Shift this part into place for the result.  */
1933       if (BYTES_BIG_ENDIAN)
1934         {
1935           if (bitsize != bitsdone)
1936             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1937                                  build_int_cst (NULL_TREE, bitsize - bitsdone),
1938                                  0, 1);
1939         }
1940       else
1941         {
1942           if (bitsdone != thissize)
1943             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1944                                  build_int_cst (NULL_TREE,
1945                                                 bitsdone - thissize), 0, 1);
1946         }
1947
1948       if (first)
1949         result = part;
1950       else
1951         /* Combine the parts with bitwise or.  This works
1952            because we extracted each part as an unsigned bit field.  */
1953         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1954                                OPTAB_LIB_WIDEN);
1955
1956       first = 0;
1957     }
1958
1959   /* Unsigned bit field: we are done.  */
1960   if (unsignedp)
1961     return result;
1962   /* Signed bit field: sign-extend with two arithmetic shifts.  */
1963   result = expand_shift (LSHIFT_EXPR, word_mode, result,
1964                          build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
1965                          NULL_RTX, 0);
1966   return expand_shift (RSHIFT_EXPR, word_mode, result,
1967                        build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
1968                        NULL_RTX, 0);
1969 }
1970 \f
1971 /* Add INC into TARGET.  */
1972
1973 void
1974 expand_inc (rtx target, rtx inc)
1975 {
1976   rtx value = expand_binop (GET_MODE (target), add_optab,
1977                             target, inc,
1978                             target, 0, OPTAB_LIB_WIDEN);
1979   if (value != target)
1980     emit_move_insn (target, value);
1981 }
1982
1983 /* Subtract DEC from TARGET.  */
1984
1985 void
1986 expand_dec (rtx target, rtx dec)
1987 {
1988   rtx value = expand_binop (GET_MODE (target), sub_optab,
1989                             target, dec,
1990                             target, 0, OPTAB_LIB_WIDEN);
1991   if (value != target)
1992     emit_move_insn (target, value);
1993 }
1994 \f
1995 /* Output a shift instruction for expression code CODE,
1996    with SHIFTED being the rtx for the value to shift,
1997    and AMOUNT the tree for the amount to shift by.
1998    Store the result in the rtx TARGET, if that is convenient.
1999    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2000    Return the rtx for where the value is.  */
2001
2002 rtx
2003 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2004               tree amount, rtx target, int unsignedp)
2005 {
2006   rtx op1, temp = 0;
2007   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2008   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2009   int try;
2010
2011   /* Previously detected shift-counts computed by NEGATE_EXPR
2012      and shifted in the other direction; but that does not work
2013      on all machines.  */
2014
2015   op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0);
2016
2017   if (SHIFT_COUNT_TRUNCATED)
2018     {
2019       if (GET_CODE (op1) == CONST_INT
2020           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2021               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2022         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2023                        % GET_MODE_BITSIZE (mode));
2024       else if (GET_CODE (op1) == SUBREG
2025                && subreg_lowpart_p (op1))
2026         op1 = SUBREG_REG (op1);
2027     }
2028
2029   if (op1 == const0_rtx)
2030     return shifted;
2031
2032   /* Check whether its cheaper to implement a left shift by a constant
2033      bit count by a sequence of additions.  */
2034   if (code == LSHIFT_EXPR
2035       && GET_CODE (op1) == CONST_INT
2036       && INTVAL (op1) > 0
2037       && INTVAL (op1) < GET_MODE_BITSIZE (mode)
2038       && shift_cost[mode][INTVAL (op1)] > INTVAL (op1) * add_cost[mode])
2039     {
2040       int i;
2041       for (i = 0; i < INTVAL (op1); i++)
2042         {
2043           temp = force_reg (mode, shifted);
2044           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2045                                   unsignedp, OPTAB_LIB_WIDEN);
2046         }
2047       return shifted;
2048     }
2049
2050   for (try = 0; temp == 0 && try < 3; try++)
2051     {
2052       enum optab_methods methods;
2053
2054       if (try == 0)
2055         methods = OPTAB_DIRECT;
2056       else if (try == 1)
2057         methods = OPTAB_WIDEN;
2058       else
2059         methods = OPTAB_LIB_WIDEN;
2060
2061       if (rotate)
2062         {
2063           /* Widening does not work for rotation.  */
2064           if (methods == OPTAB_WIDEN)
2065             continue;
2066           else if (methods == OPTAB_LIB_WIDEN)
2067             {
2068               /* If we have been unable to open-code this by a rotation,
2069                  do it as the IOR of two shifts.  I.e., to rotate A
2070                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2071                  where C is the bitsize of A.
2072
2073                  It is theoretically possible that the target machine might
2074                  not be able to perform either shift and hence we would
2075                  be making two libcalls rather than just the one for the
2076                  shift (similarly if IOR could not be done).  We will allow
2077                  this extremely unlikely lossage to avoid complicating the
2078                  code below.  */
2079
2080               rtx subtarget = target == shifted ? 0 : target;
2081               rtx temp1;
2082               tree type = TREE_TYPE (amount);
2083               tree new_amount = make_tree (type, op1);
2084               tree other_amount
2085                 = fold (build2 (MINUS_EXPR, type, convert
2086                                 (type, build_int_cst
2087                                  (NULL_TREE, GET_MODE_BITSIZE (mode))),
2088                                 amount));
2089
2090               shifted = force_reg (mode, shifted);
2091
2092               temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2093                                    mode, shifted, new_amount, subtarget, 1);
2094               temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2095                                     mode, shifted, other_amount, 0, 1);
2096               return expand_binop (mode, ior_optab, temp, temp1, target,
2097                                    unsignedp, methods);
2098             }
2099
2100           temp = expand_binop (mode,
2101                                left ? rotl_optab : rotr_optab,
2102                                shifted, op1, target, unsignedp, methods);
2103
2104           /* If we don't have the rotate, but we are rotating by a constant
2105              that is in range, try a rotate in the opposite direction.  */
2106
2107           if (temp == 0 && GET_CODE (op1) == CONST_INT
2108               && INTVAL (op1) > 0
2109               && (unsigned int) INTVAL (op1) < GET_MODE_BITSIZE (mode))
2110             temp = expand_binop (mode,
2111                                  left ? rotr_optab : rotl_optab,
2112                                  shifted,
2113                                  GEN_INT (GET_MODE_BITSIZE (mode)
2114                                           - INTVAL (op1)),
2115                                  target, unsignedp, methods);
2116         }
2117       else if (unsignedp)
2118         temp = expand_binop (mode,
2119                              left ? ashl_optab : lshr_optab,
2120                              shifted, op1, target, unsignedp, methods);
2121
2122       /* Do arithmetic shifts.
2123          Also, if we are going to widen the operand, we can just as well
2124          use an arithmetic right-shift instead of a logical one.  */
2125       if (temp == 0 && ! rotate
2126           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2127         {
2128           enum optab_methods methods1 = methods;
2129
2130           /* If trying to widen a log shift to an arithmetic shift,
2131              don't accept an arithmetic shift of the same size.  */
2132           if (unsignedp)
2133             methods1 = OPTAB_MUST_WIDEN;
2134
2135           /* Arithmetic shift */
2136
2137           temp = expand_binop (mode,
2138                                left ? ashl_optab : ashr_optab,
2139                                shifted, op1, target, unsignedp, methods1);
2140         }
2141
2142       /* We used to try extzv here for logical right shifts, but that was
2143          only useful for one machine, the VAX, and caused poor code
2144          generation there for lshrdi3, so the code was deleted and a
2145          define_expand for lshrsi3 was added to vax.md.  */
2146     }
2147
2148   if (temp == 0)
2149     abort ();
2150   return temp;
2151 }
2152 \f
2153 enum alg_code { alg_zero, alg_m, alg_shift,
2154                   alg_add_t_m2, alg_sub_t_m2,
2155                   alg_add_factor, alg_sub_factor,
2156                   alg_add_t2_m, alg_sub_t2_m,
2157                   alg_add, alg_subtract, alg_factor, alg_shiftop };
2158
2159 /* This structure records a sequence of operations.
2160    `ops' is the number of operations recorded.
2161    `cost' is their total cost.
2162    The operations are stored in `op' and the corresponding
2163    logarithms of the integer coefficients in `log'.
2164
2165    These are the operations:
2166    alg_zero             total := 0;
2167    alg_m                total := multiplicand;
2168    alg_shift            total := total * coeff
2169    alg_add_t_m2         total := total + multiplicand * coeff;
2170    alg_sub_t_m2         total := total - multiplicand * coeff;
2171    alg_add_factor       total := total * coeff + total;
2172    alg_sub_factor       total := total * coeff - total;
2173    alg_add_t2_m         total := total * coeff + multiplicand;
2174    alg_sub_t2_m         total := total * coeff - multiplicand;
2175
2176    The first operand must be either alg_zero or alg_m.  */
2177
2178 struct algorithm
2179 {
2180   short cost;
2181   short ops;
2182   /* The size of the OP and LOG fields are not directly related to the
2183      word size, but the worst-case algorithms will be if we have few
2184      consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2185      In that case we will generate shift-by-2, add, shift-by-2, add,...,
2186      in total wordsize operations.  */
2187   enum alg_code op[MAX_BITS_PER_WORD];
2188   char log[MAX_BITS_PER_WORD];
2189 };
2190
2191 /* Indicates the type of fixup needed after a constant multiplication.
2192    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2193    the result should be negated, and ADD_VARIANT means that the
2194    multiplicand should be added to the result.  */
2195 enum mult_variant {basic_variant, negate_variant, add_variant};
2196
2197 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2198                         int, enum machine_mode mode);
2199 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2200                                  struct algorithm *, enum mult_variant *, int);
2201 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2202                               const struct algorithm *, enum mult_variant);
2203 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2204                                                  int, unsigned HOST_WIDE_INT *,
2205                                                  int *, int *);
2206 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2207 static rtx extract_high_half (enum machine_mode, rtx);
2208 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2209                                        int, int);
2210 /* Compute and return the best algorithm for multiplying by T.
2211    The algorithm must cost less than cost_limit
2212    If retval.cost >= COST_LIMIT, no algorithm was found and all
2213    other field of the returned struct are undefined.
2214    MODE is the machine mode of the multiplication.  */
2215
2216 static void
2217 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2218             int cost_limit, enum machine_mode mode)
2219 {
2220   int m;
2221   struct algorithm *alg_in, *best_alg;
2222   int cost;
2223   unsigned HOST_WIDE_INT q;
2224   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2225
2226   /* Indicate that no algorithm is yet found.  If no algorithm
2227      is found, this value will be returned and indicate failure.  */
2228   alg_out->cost = cost_limit;
2229
2230   if (cost_limit <= 0)
2231     return;
2232
2233   /* Restrict the bits of "t" to the multiplication's mode.  */
2234   t &= GET_MODE_MASK (mode);
2235
2236   /* t == 1 can be done in zero cost.  */
2237   if (t == 1)
2238     {
2239       alg_out->ops = 1;
2240       alg_out->cost = 0;
2241       alg_out->op[0] = alg_m;
2242       return;
2243     }
2244
2245   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2246      fail now.  */
2247   if (t == 0)
2248     {
2249       if (zero_cost >= cost_limit)
2250         return;
2251       else
2252         {
2253           alg_out->ops = 1;
2254           alg_out->cost = zero_cost;
2255           alg_out->op[0] = alg_zero;
2256           return;
2257         }
2258     }
2259
2260   /* We'll be needing a couple extra algorithm structures now.  */
2261
2262   alg_in = alloca (sizeof (struct algorithm));
2263   best_alg = alloca (sizeof (struct algorithm));
2264
2265   /* If we have a group of zero bits at the low-order part of T, try
2266      multiplying by the remaining bits and then doing a shift.  */
2267
2268   if ((t & 1) == 0)
2269     {
2270       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2271       if (m < maxm)
2272         {
2273           q = t >> m;
2274           /* The function expand_shift will choose between a shift and
2275              a sequence of additions, so the observed cost is given as
2276              MIN (m * add_cost[mode], shift_cost[mode][m]).  */
2277           cost = m * add_cost[mode];
2278           if (shift_cost[mode][m] < cost)
2279             cost = shift_cost[mode][m];
2280           synth_mult (alg_in, q, cost_limit - cost, mode);
2281
2282           cost += alg_in->cost;
2283           if (cost < cost_limit)
2284             {
2285               struct algorithm *x;
2286               x = alg_in, alg_in = best_alg, best_alg = x;
2287               best_alg->log[best_alg->ops] = m;
2288               best_alg->op[best_alg->ops] = alg_shift;
2289               cost_limit = cost;
2290             }
2291         }
2292     }
2293
2294   /* If we have an odd number, add or subtract one.  */
2295   if ((t & 1) != 0)
2296     {
2297       unsigned HOST_WIDE_INT w;
2298
2299       for (w = 1; (w & t) != 0; w <<= 1)
2300         ;
2301       /* If T was -1, then W will be zero after the loop.  This is another
2302          case where T ends with ...111.  Handling this with (T + 1) and
2303          subtract 1 produces slightly better code and results in algorithm
2304          selection much faster than treating it like the ...0111 case
2305          below.  */
2306       if (w == 0
2307           || (w > 2
2308               /* Reject the case where t is 3.
2309                  Thus we prefer addition in that case.  */
2310               && t != 3))
2311         {
2312           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2313
2314           cost = add_cost[mode];
2315           synth_mult (alg_in, t + 1, cost_limit - cost, mode);
2316
2317           cost += alg_in->cost;
2318           if (cost < cost_limit)
2319             {
2320               struct algorithm *x;
2321               x = alg_in, alg_in = best_alg, best_alg = x;
2322               best_alg->log[best_alg->ops] = 0;
2323               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2324               cost_limit = cost;
2325             }
2326         }
2327       else
2328         {
2329           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2330
2331           cost = add_cost[mode];
2332           synth_mult (alg_in, t - 1, cost_limit - cost, mode);
2333
2334           cost += alg_in->cost;
2335           if (cost < cost_limit)
2336             {
2337               struct algorithm *x;
2338               x = alg_in, alg_in = best_alg, best_alg = x;
2339               best_alg->log[best_alg->ops] = 0;
2340               best_alg->op[best_alg->ops] = alg_add_t_m2;
2341               cost_limit = cost;
2342             }
2343         }
2344     }
2345
2346   /* Look for factors of t of the form
2347      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2348      If we find such a factor, we can multiply by t using an algorithm that
2349      multiplies by q, shift the result by m and add/subtract it to itself.
2350
2351      We search for large factors first and loop down, even if large factors
2352      are less probable than small; if we find a large factor we will find a
2353      good sequence quickly, and therefore be able to prune (by decreasing
2354      COST_LIMIT) the search.  */
2355
2356   for (m = floor_log2 (t - 1); m >= 2; m--)
2357     {
2358       unsigned HOST_WIDE_INT d;
2359
2360       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2361       if (t % d == 0 && t > d && m < maxm)
2362         {
2363           cost = add_cost[mode] + shift_cost[mode][m];
2364           if (shiftadd_cost[mode][m] < cost)
2365             cost = shiftadd_cost[mode][m];
2366           synth_mult (alg_in, t / d, cost_limit - cost, mode);
2367
2368           cost += alg_in->cost;
2369           if (cost < cost_limit)
2370             {
2371               struct algorithm *x;
2372               x = alg_in, alg_in = best_alg, best_alg = x;
2373               best_alg->log[best_alg->ops] = m;
2374               best_alg->op[best_alg->ops] = alg_add_factor;
2375               cost_limit = cost;
2376             }
2377           /* Other factors will have been taken care of in the recursion.  */
2378           break;
2379         }
2380
2381       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2382       if (t % d == 0 && t > d && m < maxm)
2383         {
2384           cost = add_cost[mode] + shift_cost[mode][m];
2385           if (shiftsub_cost[mode][m] < cost)
2386             cost = shiftsub_cost[mode][m];
2387           synth_mult (alg_in, t / d, cost_limit - cost, mode);
2388
2389           cost += alg_in->cost;
2390           if (cost < cost_limit)
2391             {
2392               struct algorithm *x;
2393               x = alg_in, alg_in = best_alg, best_alg = x;
2394               best_alg->log[best_alg->ops] = m;
2395               best_alg->op[best_alg->ops] = alg_sub_factor;
2396               cost_limit = cost;
2397             }
2398           break;
2399         }
2400     }
2401
2402   /* Try shift-and-add (load effective address) instructions,
2403      i.e. do a*3, a*5, a*9.  */
2404   if ((t & 1) != 0)
2405     {
2406       q = t - 1;
2407       q = q & -q;
2408       m = exact_log2 (q);
2409       if (m >= 0 && m < maxm)
2410         {
2411           cost = shiftadd_cost[mode][m];
2412           synth_mult (alg_in, (t - 1) >> m, cost_limit - cost, mode);
2413
2414           cost += alg_in->cost;
2415           if (cost < cost_limit)
2416             {
2417               struct algorithm *x;
2418               x = alg_in, alg_in = best_alg, best_alg = x;
2419               best_alg->log[best_alg->ops] = m;
2420               best_alg->op[best_alg->ops] = alg_add_t2_m;
2421               cost_limit = cost;
2422             }
2423         }
2424
2425       q = t + 1;
2426       q = q & -q;
2427       m = exact_log2 (q);
2428       if (m >= 0 && m < maxm)
2429         {
2430           cost = shiftsub_cost[mode][m];
2431           synth_mult (alg_in, (t + 1) >> m, cost_limit - cost, mode);
2432
2433           cost += alg_in->cost;
2434           if (cost < cost_limit)
2435             {
2436               struct algorithm *x;
2437               x = alg_in, alg_in = best_alg, best_alg = x;
2438               best_alg->log[best_alg->ops] = m;
2439               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2440               cost_limit = cost;
2441             }
2442         }
2443     }
2444
2445   /* If cost_limit has not decreased since we stored it in alg_out->cost,
2446      we have not found any algorithm.  */
2447   if (cost_limit == alg_out->cost)
2448     return;
2449
2450   /* If we are getting a too long sequence for `struct algorithm'
2451      to record, make this search fail.  */
2452   if (best_alg->ops == MAX_BITS_PER_WORD)
2453     return;
2454
2455   /* Copy the algorithm from temporary space to the space at alg_out.
2456      We avoid using structure assignment because the majority of
2457      best_alg is normally undefined, and this is a critical function.  */
2458   alg_out->ops = best_alg->ops + 1;
2459   alg_out->cost = cost_limit;
2460   memcpy (alg_out->op, best_alg->op,
2461           alg_out->ops * sizeof *alg_out->op);
2462   memcpy (alg_out->log, best_alg->log,
2463           alg_out->ops * sizeof *alg_out->log);
2464 }
2465 \f
2466 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2467    Try three variations:
2468
2469        - a shift/add sequence based on VAL itself
2470        - a shift/add sequence based on -VAL, followed by a negation
2471        - a shift/add sequence based on VAL - 1, followed by an addition.
2472
2473    Return true if the cheapest of these cost less than MULT_COST,
2474    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2475
2476 static bool
2477 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2478                      struct algorithm *alg, enum mult_variant *variant,
2479                      int mult_cost)
2480 {
2481   struct algorithm alg2;
2482
2483   *variant = basic_variant;
2484   synth_mult (alg, val, mult_cost, mode);
2485
2486   /* This works only if the inverted value actually fits in an
2487      `unsigned int' */
2488   if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2489     {
2490       synth_mult (&alg2, -val, MIN (alg->cost, mult_cost) - neg_cost[mode],
2491                   mode);
2492       alg2.cost += neg_cost[mode];
2493       if (alg2.cost < alg->cost)
2494         *alg = alg2, *variant = negate_variant;
2495     }
2496
2497   /* This proves very useful for division-by-constant.  */
2498   synth_mult (&alg2, val - 1, MIN (alg->cost, mult_cost) - add_cost[mode],
2499               mode);
2500   alg2.cost += add_cost[mode];
2501   if (alg2.cost < alg->cost)
2502     *alg = alg2, *variant = add_variant;
2503
2504   return alg->cost < mult_cost;
2505 }
2506
2507 /* A subroutine of expand_mult, used for constant multiplications.
2508    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2509    convenient.  Use the shift/add sequence described by ALG and apply
2510    the final fixup specified by VARIANT.  */
2511
2512 static rtx
2513 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2514                    rtx target, const struct algorithm *alg,
2515                    enum mult_variant variant)
2516 {
2517   HOST_WIDE_INT val_so_far;
2518   rtx insn, accum, tem;
2519   int opno;
2520   enum machine_mode nmode;
2521
2522   /* Avoid referencing memory over and over.
2523      For speed, but also for correctness when mem is volatile.  */
2524   if (MEM_P (op0))
2525     op0 = force_reg (mode, op0);
2526
2527   /* ACCUM starts out either as OP0 or as a zero, depending on
2528      the first operation.  */
2529
2530   if (alg->op[0] == alg_zero)
2531     {
2532       accum = copy_to_mode_reg (mode, const0_rtx);
2533       val_so_far = 0;
2534     }
2535   else if (alg->op[0] == alg_m)
2536     {
2537       accum = copy_to_mode_reg (mode, op0);
2538       val_so_far = 1;
2539     }
2540   else
2541     abort ();
2542
2543   for (opno = 1; opno < alg->ops; opno++)
2544     {
2545       int log = alg->log[opno];
2546       rtx shift_subtarget = optimize ? 0 : accum;
2547       rtx add_target
2548         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2549            && !optimize)
2550           ? target : 0;
2551       rtx accum_target = optimize ? 0 : accum;
2552
2553       switch (alg->op[opno])
2554         {
2555         case alg_shift:
2556           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2557                                 build_int_cst (NULL_TREE, log),
2558                                 NULL_RTX, 0);
2559           val_so_far <<= log;
2560           break;
2561
2562         case alg_add_t_m2:
2563           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2564                               build_int_cst (NULL_TREE, log),
2565                               NULL_RTX, 0);
2566           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2567                                  add_target ? add_target : accum_target);
2568           val_so_far += (HOST_WIDE_INT) 1 << log;
2569           break;
2570
2571         case alg_sub_t_m2:
2572           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2573                               build_int_cst (NULL_TREE, log),
2574                               NULL_RTX, 0);
2575           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2576                                  add_target ? add_target : accum_target);
2577           val_so_far -= (HOST_WIDE_INT) 1 << log;
2578           break;
2579
2580         case alg_add_t2_m:
2581           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2582                                 build_int_cst (NULL_TREE, log),
2583                                 shift_subtarget,
2584                                 0);
2585           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2586                                  add_target ? add_target : accum_target);
2587           val_so_far = (val_so_far << log) + 1;
2588           break;
2589
2590         case alg_sub_t2_m:
2591           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2592                                 build_int_cst (NULL_TREE, log),
2593                                 shift_subtarget, 0);
2594           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2595                                  add_target ? add_target : accum_target);
2596           val_so_far = (val_so_far << log) - 1;
2597           break;
2598
2599         case alg_add_factor:
2600           tem = expand_shift (LSHIFT_EXPR, mode, accum,
2601                               build_int_cst (NULL_TREE, log),
2602                               NULL_RTX, 0);
2603           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2604                                  add_target ? add_target : accum_target);
2605           val_so_far += val_so_far << log;
2606           break;
2607
2608         case alg_sub_factor:
2609           tem = expand_shift (LSHIFT_EXPR, mode, accum,
2610                               build_int_cst (NULL_TREE, log),
2611                               NULL_RTX, 0);
2612           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2613                                  (add_target
2614                                   ? add_target : (optimize ? 0 : tem)));
2615           val_so_far = (val_so_far << log) - val_so_far;
2616           break;
2617
2618         default:
2619           abort ();
2620         }
2621
2622       /* Write a REG_EQUAL note on the last insn so that we can cse
2623          multiplication sequences.  Note that if ACCUM is a SUBREG,
2624          we've set the inner register and must properly indicate
2625          that.  */
2626
2627       tem = op0, nmode = mode;
2628       if (GET_CODE (accum) == SUBREG)
2629         {
2630           nmode = GET_MODE (SUBREG_REG (accum));
2631           tem = gen_lowpart (nmode, op0);
2632         }
2633
2634       insn = get_last_insn ();
2635       set_unique_reg_note (insn, REG_EQUAL,
2636                            gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)));
2637     }
2638
2639   if (variant == negate_variant)
2640     {
2641       val_so_far = -val_so_far;
2642       accum = expand_unop (mode, neg_optab, accum, target, 0);
2643     }
2644   else if (variant == add_variant)
2645     {
2646       val_so_far = val_so_far + 1;
2647       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
2648     }
2649
2650   /* Compare only the bits of val and val_so_far that are significant
2651      in the result mode, to avoid sign-/zero-extension confusion.  */
2652   val &= GET_MODE_MASK (mode);
2653   val_so_far &= GET_MODE_MASK (mode);
2654   if (val != val_so_far)
2655     abort ();
2656
2657   return accum;
2658 }
2659
2660 /* Perform a multiplication and return an rtx for the result.
2661    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
2662    TARGET is a suggestion for where to store the result (an rtx).
2663
2664    We check specially for a constant integer as OP1.
2665    If you want this check for OP0 as well, then before calling
2666    you should swap the two operands if OP0 would be constant.  */
2667
2668 rtx
2669 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
2670              int unsignedp)
2671 {
2672   rtx const_op1 = op1;
2673   enum mult_variant variant;
2674   struct algorithm algorithm;
2675
2676   /* synth_mult does an `unsigned int' multiply.  As long as the mode is
2677      less than or equal in size to `unsigned int' this doesn't matter.
2678      If the mode is larger than `unsigned int', then synth_mult works only
2679      if the constant value exactly fits in an `unsigned int' without any
2680      truncation.  This means that multiplying by negative values does
2681      not work; results are off by 2^32 on a 32 bit machine.  */
2682
2683   /* If we are multiplying in DImode, it may still be a win
2684      to try to work with shifts and adds.  */
2685   if (GET_CODE (op1) == CONST_DOUBLE
2686       && GET_MODE_CLASS (GET_MODE (op1)) == MODE_INT
2687       && HOST_BITS_PER_INT >= BITS_PER_WORD
2688       && CONST_DOUBLE_HIGH (op1) == 0)
2689     const_op1 = GEN_INT (CONST_DOUBLE_LOW (op1));
2690   else if (HOST_BITS_PER_INT < GET_MODE_BITSIZE (mode)
2691            && GET_CODE (op1) == CONST_INT
2692            && INTVAL (op1) < 0)
2693     const_op1 = 0;
2694
2695   /* We used to test optimize here, on the grounds that it's better to
2696      produce a smaller program when -O is not used.
2697      But this causes such a terrible slowdown sometimes
2698      that it seems better to use synth_mult always.  */
2699
2700   if (const_op1 && GET_CODE (const_op1) == CONST_INT
2701       && (unsignedp || !flag_trapv))
2702     {
2703       int mult_cost = rtx_cost (gen_rtx_MULT (mode, op0, op1), SET);
2704
2705       if (choose_mult_variant (mode, INTVAL (const_op1), &algorithm, &variant,
2706                                mult_cost))
2707         return expand_mult_const (mode, op0, INTVAL (const_op1), target,
2708                                   &algorithm, variant);
2709     }
2710
2711   if (GET_CODE (op0) == CONST_DOUBLE)
2712     {
2713       rtx temp = op0;
2714       op0 = op1;
2715       op1 = temp;
2716     }
2717
2718   /* Expand x*2.0 as x+x.  */
2719   if (GET_CODE (op1) == CONST_DOUBLE
2720       && GET_MODE_CLASS (mode) == MODE_FLOAT)
2721     {
2722       REAL_VALUE_TYPE d;
2723       REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
2724
2725       if (REAL_VALUES_EQUAL (d, dconst2))
2726         {
2727           op0 = force_reg (GET_MODE (op0), op0);
2728           return expand_binop (mode, add_optab, op0, op0,
2729                                target, unsignedp, OPTAB_LIB_WIDEN);
2730         }
2731     }
2732
2733   /* This used to use umul_optab if unsigned, but for non-widening multiply
2734      there is no difference between signed and unsigned.  */
2735   op0 = expand_binop (mode,
2736                       ! unsignedp
2737                       && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
2738                       ? smulv_optab : smul_optab,
2739                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
2740   if (op0 == 0)
2741     abort ();
2742   return op0;
2743 }
2744 \f
2745 /* Return the smallest n such that 2**n >= X.  */
2746
2747 int
2748 ceil_log2 (unsigned HOST_WIDE_INT x)
2749 {
2750   return floor_log2 (x - 1) + 1;
2751 }
2752
2753 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
2754    replace division by D, and put the least significant N bits of the result
2755    in *MULTIPLIER_PTR and return the most significant bit.
2756
2757    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
2758    needed precision is in PRECISION (should be <= N).
2759
2760    PRECISION should be as small as possible so this function can choose
2761    multiplier more freely.
2762
2763    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
2764    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
2765
2766    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
2767    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
2768
2769 static
2770 unsigned HOST_WIDE_INT
2771 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
2772                    unsigned HOST_WIDE_INT *multiplier_ptr,
2773                    int *post_shift_ptr, int *lgup_ptr)
2774 {
2775   HOST_WIDE_INT mhigh_hi, mlow_hi;
2776   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
2777   int lgup, post_shift;
2778   int pow, pow2;
2779   unsigned HOST_WIDE_INT nl, dummy1;
2780   HOST_WIDE_INT nh, dummy2;
2781
2782   /* lgup = ceil(log2(divisor)); */
2783   lgup = ceil_log2 (d);
2784
2785   if (lgup > n)
2786     abort ();
2787
2788   pow = n + lgup;
2789   pow2 = n + lgup - precision;
2790
2791   if (pow == 2 * HOST_BITS_PER_WIDE_INT)
2792     {
2793       /* We could handle this with some effort, but this case is much better
2794          handled directly with a scc insn, so rely on caller using that.  */
2795       abort ();
2796     }
2797
2798   /* mlow = 2^(N + lgup)/d */
2799  if (pow >= HOST_BITS_PER_WIDE_INT)
2800     {
2801       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
2802       nl = 0;
2803     }
2804   else
2805     {
2806       nh = 0;
2807       nl = (unsigned HOST_WIDE_INT) 1 << pow;
2808     }
2809   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2810                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
2811
2812   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
2813   if (pow2 >= HOST_BITS_PER_WIDE_INT)
2814     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
2815   else
2816     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
2817   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2818                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
2819
2820   if (mhigh_hi && nh - d >= d)
2821     abort ();
2822   if (mhigh_hi > 1 || mlow_hi > 1)
2823     abort ();
2824   /* Assert that mlow < mhigh.  */
2825   if (! (mlow_hi < mhigh_hi || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo)))
2826     abort ();
2827
2828   /* If precision == N, then mlow, mhigh exceed 2^N
2829      (but they do not exceed 2^(N+1)).  */
2830
2831   /* Reduce to lowest terms.  */
2832   for (post_shift = lgup; post_shift > 0; post_shift--)
2833     {
2834       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
2835       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
2836       if (ml_lo >= mh_lo)
2837         break;
2838
2839       mlow_hi = 0;
2840       mlow_lo = ml_lo;
2841       mhigh_hi = 0;
2842       mhigh_lo = mh_lo;
2843     }
2844
2845   *post_shift_ptr = post_shift;
2846   *lgup_ptr = lgup;
2847   if (n < HOST_BITS_PER_WIDE_INT)
2848     {
2849       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
2850       *multiplier_ptr = mhigh_lo & mask;
2851       return mhigh_lo >= mask;
2852     }
2853   else
2854     {
2855       *multiplier_ptr = mhigh_lo;
2856       return mhigh_hi;
2857     }
2858 }
2859
2860 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
2861    congruent to 1 (mod 2**N).  */
2862
2863 static unsigned HOST_WIDE_INT
2864 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
2865 {
2866   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
2867
2868   /* The algorithm notes that the choice y = x satisfies
2869      x*y == 1 mod 2^3, since x is assumed odd.
2870      Each iteration doubles the number of bits of significance in y.  */
2871
2872   unsigned HOST_WIDE_INT mask;
2873   unsigned HOST_WIDE_INT y = x;
2874   int nbit = 3;
2875
2876   mask = (n == HOST_BITS_PER_WIDE_INT
2877           ? ~(unsigned HOST_WIDE_INT) 0
2878           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
2879
2880   while (nbit < n)
2881     {
2882       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
2883       nbit *= 2;
2884     }
2885   return y;
2886 }
2887
2888 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
2889    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
2890    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
2891    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
2892    become signed.
2893
2894    The result is put in TARGET if that is convenient.
2895
2896    MODE is the mode of operation.  */
2897
2898 rtx
2899 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
2900                              rtx op1, rtx target, int unsignedp)
2901 {
2902   rtx tem;
2903   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
2904
2905   tem = expand_shift (RSHIFT_EXPR, mode, op0,
2906                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
2907                       NULL_RTX, 0);
2908   tem = expand_and (mode, tem, op1, NULL_RTX);
2909   adj_operand
2910     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
2911                      adj_operand);
2912
2913   tem = expand_shift (RSHIFT_EXPR, mode, op1,
2914                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
2915                       NULL_RTX, 0);
2916   tem = expand_and (mode, tem, op0, NULL_RTX);
2917   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
2918                           target);
2919
2920   return target;
2921 }
2922
2923 /* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */
2924
2925 static rtx
2926 extract_high_half (enum machine_mode mode, rtx op)
2927 {
2928   enum machine_mode wider_mode;
2929
2930   if (mode == word_mode)
2931     return gen_highpart (mode, op);
2932
2933   wider_mode = GET_MODE_WIDER_MODE (mode);
2934   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
2935                      build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode)), 0, 1);
2936   return convert_modes (mode, wider_mode, op, 0);
2937 }
2938
2939 /* Like expand_mult_highpart, but only consider using a multiplication
2940    optab.  OP1 is an rtx for the constant operand.  */
2941
2942 static rtx
2943 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
2944                             rtx target, int unsignedp, int max_cost)
2945 {
2946   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
2947   enum machine_mode wider_mode;
2948   optab moptab;
2949   rtx tem;
2950   int size;
2951
2952   wider_mode = GET_MODE_WIDER_MODE (mode);
2953   size = GET_MODE_BITSIZE (mode);
2954
2955   /* Firstly, try using a multiplication insn that only generates the needed
2956      high part of the product, and in the sign flavor of unsignedp.  */
2957   if (mul_highpart_cost[mode] < max_cost)
2958     {
2959       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
2960       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
2961                           unsignedp, OPTAB_DIRECT);
2962       if (tem)
2963         return tem;
2964     }
2965
2966   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
2967      Need to adjust the result after the multiplication.  */
2968   if (size - 1 < BITS_PER_WORD
2969       && (mul_highpart_cost[mode] + 2 * shift_cost[mode][size-1]
2970           + 4 * add_cost[mode] < max_cost))
2971     {
2972       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
2973       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
2974                           unsignedp, OPTAB_DIRECT);
2975       if (tem)
2976         /* We used the wrong signedness.  Adjust the result.  */
2977         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
2978                                             tem, unsignedp);
2979     }
2980
2981   /* Try widening multiplication.  */
2982   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
2983   if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
2984       && mul_widen_cost[wider_mode] < max_cost)
2985     {
2986       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
2987                           unsignedp, OPTAB_WIDEN);
2988       if (tem)
2989         return extract_high_half (mode, tem);
2990     }
2991
2992   /* Try widening the mode and perform a non-widening multiplication.  */
2993   moptab = smul_optab;
2994   if (smul_optab->handlers[wider_mode].insn_code != CODE_FOR_nothing
2995       && size - 1 < BITS_PER_WORD
2996       && mul_cost[wider_mode] + shift_cost[mode][size-1] < max_cost)
2997     {
2998       tem = expand_binop (wider_mode, moptab, op0, op1, 0,
2999                           unsignedp, OPTAB_WIDEN);
3000       if (tem)
3001         return extract_high_half (mode, tem);
3002     }
3003
3004   /* Try widening multiplication of opposite signedness, and adjust.  */
3005   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3006   if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3007       && size - 1 < BITS_PER_WORD
3008       && (mul_widen_cost[wider_mode] + 2 * shift_cost[mode][size-1]
3009           + 4 * add_cost[mode] < max_cost))
3010     {
3011       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3012                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3013       if (tem != 0)
3014         {
3015           tem = extract_high_half (mode, tem);
3016           /* We used the wrong signedness.  Adjust the result.  */
3017           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3018                                               target, unsignedp);
3019         }
3020     }
3021
3022   return 0;
3023 }
3024
3025 /* Emit code to multiply OP0 and CNST1, putting the high half of the result
3026    in TARGET if that is convenient, and return where the result is.  If the
3027    operation can not be performed, 0 is returned.
3028
3029    MODE is the mode of operation and result.
3030
3031    UNSIGNEDP nonzero means unsigned multiply.
3032
3033    MAX_COST is the total allowed cost for the expanded RTL.  */
3034
3035 rtx
3036 expand_mult_highpart (enum machine_mode mode, rtx op0,
3037                       unsigned HOST_WIDE_INT cnst1, rtx target,
3038                       int unsignedp, int max_cost)
3039 {
3040   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3041   int extra_cost;
3042   bool sign_adjust = false;
3043   enum mult_variant variant;
3044   struct algorithm alg;
3045   rtx op1, tem;
3046
3047   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3048   if (GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3049     abort ();
3050
3051   op1 = gen_int_mode (cnst1, wider_mode);
3052   cnst1 &= GET_MODE_MASK (mode);
3053
3054   /* We can't optimize modes wider than BITS_PER_WORD.
3055      ??? We might be able to perform double-word arithmetic if
3056      mode == word_mode, however all the cost calculations in
3057      synth_mult etc. assume single-word operations.  */
3058   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3059     return expand_mult_highpart_optab (mode, op0, op1, target,
3060                                        unsignedp, max_cost);
3061
3062   extra_cost = shift_cost[mode][GET_MODE_BITSIZE (mode) - 1];
3063
3064   /* Check whether we try to multiply by a negative constant.  */
3065   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3066     {
3067       sign_adjust = true;
3068       extra_cost += add_cost[mode];
3069     }
3070
3071   /* See whether shift/add multiplication is cheap enough.  */
3072   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3073                            max_cost - extra_cost))
3074     {
3075       /* See whether the specialized multiplication optabs are
3076          cheaper than the shift/add version.  */
3077       tem = expand_mult_highpart_optab (mode, op0, op1, target,
3078                                         unsignedp, alg.cost + extra_cost);
3079       if (tem)
3080         return tem;
3081
3082       tem = convert_to_mode (wider_mode, op0, unsignedp);
3083       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3084       tem = extract_high_half (mode, tem);
3085
3086       /* Adjust result for signedness.  */
3087       if (sign_adjust)
3088         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3089
3090       return tem;
3091     }
3092   return expand_mult_highpart_optab (mode, op0, op1, target,
3093                                      unsignedp, max_cost);
3094 }
3095
3096
3097 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3098
3099 static rtx
3100 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3101 {
3102   unsigned HOST_WIDE_INT mask;
3103   rtx result, temp, shift, label;
3104   int logd;
3105
3106   logd = floor_log2 (d);
3107   result = gen_reg_rtx (mode);
3108
3109   /* Avoid conditional branches when they're expensive.  */
3110   if (BRANCH_COST >= 2
3111       && !optimize_size)
3112     {
3113       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3114                                       mode, 0, -1);
3115       if (signmask)
3116         {
3117           signmask = force_reg (mode, signmask);
3118           mask = ((HOST_WIDE_INT) 1 << logd) - 1;
3119           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3120
3121           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3122              which instruction sequence to use.  If logical right shifts
3123              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3124              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3125
3126           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3127           if (lshr_optab->handlers[mode].insn_code == CODE_FOR_nothing
3128               || rtx_cost (temp, SET) > COSTS_N_INSNS (2))
3129             {
3130               temp = expand_binop (mode, xor_optab, op0, signmask,
3131                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3132               temp = expand_binop (mode, sub_optab, temp, signmask,
3133                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3134               temp = expand_binop (mode, and_optab, temp, GEN_INT (mask),
3135                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3136               temp = expand_binop (mode, xor_optab, temp, signmask,
3137                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3138               temp = expand_binop (mode, sub_optab, temp, signmask,
3139                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3140             }
3141           else
3142             {
3143               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3144                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3145               signmask = force_reg (mode, signmask);
3146
3147               temp = expand_binop (mode, add_optab, op0, signmask,
3148                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3149               temp = expand_binop (mode, and_optab, temp, GEN_INT (mask),
3150                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3151               temp = expand_binop (mode, sub_optab, temp, signmask,
3152                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3153             }
3154           return temp;
3155         }
3156     }
3157
3158   /* Mask contains the mode's signbit and the significant bits of the
3159      modulus.  By including the signbit in the operation, many targets
3160      can avoid an explicit compare operation in the following comparison
3161      against zero.  */
3162
3163   mask = (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1)
3164          | (((HOST_WIDE_INT) 1 << logd) - 1);
3165
3166   temp = expand_binop (mode, and_optab, op0, GEN_INT (mask), result,
3167                        1, OPTAB_LIB_WIDEN);
3168   if (temp != result)
3169     emit_move_insn (result, temp);
3170
3171   label = gen_label_rtx ();
3172   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3173
3174   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3175                        0, OPTAB_LIB_WIDEN);
3176   mask = (HOST_WIDE_INT) -1 << logd;
3177   temp = expand_binop (mode, ior_optab, temp, GEN_INT (mask), result,
3178                        1, OPTAB_LIB_WIDEN);
3179   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3180                        0, OPTAB_LIB_WIDEN);
3181   if (temp != result)
3182     emit_move_insn (result, temp);
3183   emit_label (label);
3184   return result;
3185 }
3186
3187 /* Expand signed division of OP0 by a power of two D in mode MODE.
3188    This routine is only called for positive values of D.  */
3189
3190 static rtx
3191 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3192 {
3193   rtx temp, label;
3194   tree shift;
3195   int logd;
3196
3197   logd = floor_log2 (d);
3198   shift = build_int_cst (NULL_TREE, logd);
3199
3200   if (d == 2 && BRANCH_COST >= 1)
3201     {
3202       temp = gen_reg_rtx (mode);
3203       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3204       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3205                            0, OPTAB_LIB_WIDEN);
3206       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3207     }
3208
3209 #ifdef HAVE_conditional_move
3210   if (BRANCH_COST >= 2)
3211     {
3212       rtx temp2;
3213
3214       start_sequence ();
3215       temp2 = copy_to_mode_reg (mode, op0);
3216       temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3217                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3218       temp = force_reg (mode, temp);
3219
3220       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3221       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3222                                      mode, temp, temp2, mode, 0);
3223       if (temp2)
3224         {
3225           rtx seq = get_insns ();
3226           end_sequence ();
3227           emit_insn (seq);
3228           return expand_shift (RSHIFT_EXPR, mode, temp2, shift, NULL_RTX, 0);
3229         }
3230       end_sequence ();
3231     }
3232 #endif
3233
3234   if (BRANCH_COST >= 2)
3235     {
3236       int ushift = GET_MODE_BITSIZE (mode) - logd;
3237
3238       temp = gen_reg_rtx (mode);
3239       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3240       if (shift_cost[mode][ushift] > COSTS_N_INSNS (1))
3241         temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3242                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3243       else
3244         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3245                              build_int_cst (NULL_TREE, ushift),
3246                              NULL_RTX, 1);
3247       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3248                            0, OPTAB_LIB_WIDEN);
3249       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3250     }
3251
3252   label = gen_label_rtx ();
3253   temp = copy_to_mode_reg (mode, op0);
3254   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3255   expand_inc (temp, GEN_INT (d - 1));
3256   emit_label (label);
3257   return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3258 }
3259 \f
3260 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3261    if that is convenient, and returning where the result is.
3262    You may request either the quotient or the remainder as the result;
3263    specify REM_FLAG nonzero to get the remainder.
3264
3265    CODE is the expression code for which kind of division this is;
3266    it controls how rounding is done.  MODE is the machine mode to use.
3267    UNSIGNEDP nonzero means do unsigned division.  */
3268
3269 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3270    and then correct it by or'ing in missing high bits
3271    if result of ANDI is nonzero.
3272    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3273    This could optimize to a bfexts instruction.
3274    But C doesn't use these operations, so their optimizations are
3275    left for later.  */
3276 /* ??? For modulo, we don't actually need the highpart of the first product,
3277    the low part will do nicely.  And for small divisors, the second multiply
3278    can also be a low-part only multiply or even be completely left out.
3279    E.g. to calculate the remainder of a division by 3 with a 32 bit
3280    multiply, multiply with 0x55555556 and extract the upper two bits;
3281    the result is exact for inputs up to 0x1fffffff.
3282    The input range can be reduced by using cross-sum rules.
3283    For odd divisors >= 3, the following table gives right shift counts
3284    so that if a number is shifted by an integer multiple of the given
3285    amount, the remainder stays the same:
3286    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3287    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3288    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3289    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3290    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3291
3292    Cross-sum rules for even numbers can be derived by leaving as many bits
3293    to the right alone as the divisor has zeros to the right.
3294    E.g. if x is an unsigned 32 bit number:
3295    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3296    */
3297
3298 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
3299
3300 rtx
3301 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3302                rtx op0, rtx op1, rtx target, int unsignedp)
3303 {
3304   enum machine_mode compute_mode;
3305   rtx tquotient;
3306   rtx quotient = 0, remainder = 0;
3307   rtx last;
3308   int size;
3309   rtx insn, set;
3310   optab optab1, optab2;
3311   int op1_is_constant, op1_is_pow2 = 0;
3312   int max_cost, extra_cost;
3313   static HOST_WIDE_INT last_div_const = 0;
3314   static HOST_WIDE_INT ext_op1;
3315
3316   op1_is_constant = GET_CODE (op1) == CONST_INT;
3317   if (op1_is_constant)
3318     {
3319       ext_op1 = INTVAL (op1);
3320       if (unsignedp)
3321         ext_op1 &= GET_MODE_MASK (mode);
3322       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3323                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3324     }
3325
3326   /*
3327      This is the structure of expand_divmod:
3328
3329      First comes code to fix up the operands so we can perform the operations
3330      correctly and efficiently.
3331
3332      Second comes a switch statement with code specific for each rounding mode.
3333      For some special operands this code emits all RTL for the desired
3334      operation, for other cases, it generates only a quotient and stores it in
3335      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3336      to indicate that it has not done anything.
3337
3338      Last comes code that finishes the operation.  If QUOTIENT is set and
3339      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3340      QUOTIENT is not set, it is computed using trunc rounding.
3341
3342      We try to generate special code for division and remainder when OP1 is a
3343      constant.  If |OP1| = 2**n we can use shifts and some other fast
3344      operations.  For other values of OP1, we compute a carefully selected
3345      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3346      by m.
3347
3348      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3349      half of the product.  Different strategies for generating the product are
3350      implemented in expand_mult_highpart.
3351
3352      If what we actually want is the remainder, we generate that by another
3353      by-constant multiplication and a subtraction.  */
3354
3355   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3356      code below will malfunction if we are, so check here and handle
3357      the special case if so.  */
3358   if (op1 == const1_rtx)
3359     return rem_flag ? const0_rtx : op0;
3360
3361     /* When dividing by -1, we could get an overflow.
3362      negv_optab can handle overflows.  */
3363   if (! unsignedp && op1 == constm1_rtx)
3364     {
3365       if (rem_flag)
3366         return const0_rtx;
3367       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3368                           ? negv_optab : neg_optab, op0, target, 0);
3369     }
3370
3371   if (target
3372       /* Don't use the function value register as a target
3373          since we have to read it as well as write it,
3374          and function-inlining gets confused by this.  */
3375       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3376           /* Don't clobber an operand while doing a multi-step calculation.  */
3377           || ((rem_flag || op1_is_constant)
3378               && (reg_mentioned_p (target, op0)
3379                   || (MEM_P (op0) && MEM_P (target))))
3380           || reg_mentioned_p (target, op1)
3381           || (MEM_P (op1) && MEM_P (target))))
3382     target = 0;
3383
3384   /* Get the mode in which to perform this computation.  Normally it will
3385      be MODE, but sometimes we can't do the desired operation in MODE.
3386      If so, pick a wider mode in which we can do the operation.  Convert
3387      to that mode at the start to avoid repeated conversions.
3388
3389      First see what operations we need.  These depend on the expression
3390      we are evaluating.  (We assume that divxx3 insns exist under the
3391      same conditions that modxx3 insns and that these insns don't normally
3392      fail.  If these assumptions are not correct, we may generate less
3393      efficient code in some cases.)
3394
3395      Then see if we find a mode in which we can open-code that operation
3396      (either a division, modulus, or shift).  Finally, check for the smallest
3397      mode for which we can do the operation with a library call.  */
3398
3399   /* We might want to refine this now that we have division-by-constant
3400      optimization.  Since expand_mult_highpart tries so many variants, it is
3401      not straightforward to generalize this.  Maybe we should make an array
3402      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3403
3404   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3405             ? (unsignedp ? lshr_optab : ashr_optab)
3406             : (unsignedp ? udiv_optab : sdiv_optab));
3407   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3408             ? optab1
3409             : (unsignedp ? udivmod_optab : sdivmod_optab));
3410
3411   for (compute_mode = mode; compute_mode != VOIDmode;
3412        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3413     if (optab1->handlers[compute_mode].insn_code != CODE_FOR_nothing
3414         || optab2->handlers[compute_mode].insn_code != CODE_FOR_nothing)
3415       break;
3416
3417   if (compute_mode == VOIDmode)
3418     for (compute_mode = mode; compute_mode != VOIDmode;
3419          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3420       if (optab1->handlers[compute_mode].libfunc
3421           || optab2->handlers[compute_mode].libfunc)
3422         break;
3423
3424   /* If we still couldn't find a mode, use MODE, but we'll probably abort
3425      in expand_binop.  */
3426   if (compute_mode == VOIDmode)
3427     compute_mode = mode;
3428
3429   if (target && GET_MODE (target) == compute_mode)
3430     tquotient = target;
3431   else
3432     tquotient = gen_reg_rtx (compute_mode);
3433
3434   size = GET_MODE_BITSIZE (compute_mode);
3435 #if 0
3436   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3437      (mode), and thereby get better code when OP1 is a constant.  Do that
3438      later.  It will require going over all usages of SIZE below.  */
3439   size = GET_MODE_BITSIZE (mode);
3440 #endif
3441
3442   /* Only deduct something for a REM if the last divide done was
3443      for a different constant.   Then set the constant of the last
3444      divide.  */
3445   max_cost = div_cost[compute_mode]
3446     - (rem_flag && ! (last_div_const != 0 && op1_is_constant
3447                       && INTVAL (op1) == last_div_const)
3448        ? mul_cost[compute_mode] + add_cost[compute_mode]
3449        : 0);
3450
3451   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3452
3453   /* Now convert to the best mode to use.  */
3454   if (compute_mode != mode)
3455     {
3456       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3457       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3458
3459       /* convert_modes may have placed op1 into a register, so we
3460          must recompute the following.  */
3461       op1_is_constant = GET_CODE (op1) == CONST_INT;
3462       op1_is_pow2 = (op1_is_constant
3463                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3464                           || (! unsignedp
3465                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3466     }
3467
3468   /* If one of the operands is a volatile MEM, copy it into a register.  */
3469
3470   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3471     op0 = force_reg (compute_mode, op0);
3472   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3473     op1 = force_reg (compute_mode, op1);
3474
3475   /* If we need the remainder or if OP1 is constant, we need to
3476      put OP0 in a register in case it has any queued subexpressions.  */
3477   if (rem_flag || op1_is_constant)
3478     op0 = force_reg (compute_mode, op0);
3479
3480   last = get_last_insn ();
3481
3482   /* Promote floor rounding to trunc rounding for unsigned operations.  */
3483   if (unsignedp)
3484     {
3485       if (code == FLOOR_DIV_EXPR)
3486         code = TRUNC_DIV_EXPR;
3487       if (code == FLOOR_MOD_EXPR)
3488         code = TRUNC_MOD_EXPR;
3489       if (code == EXACT_DIV_EXPR && op1_is_pow2)
3490         code = TRUNC_DIV_EXPR;
3491     }
3492
3493   if (op1 != const0_rtx)
3494     switch (code)
3495       {
3496       case TRUNC_MOD_EXPR:
3497       case TRUNC_DIV_EXPR:
3498         if (op1_is_constant)
3499           {
3500             if (unsignedp)
3501               {
3502                 unsigned HOST_WIDE_INT mh, ml;
3503                 int pre_shift, post_shift;
3504                 int dummy;
3505                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
3506                                             & GET_MODE_MASK (compute_mode));
3507
3508                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3509                   {
3510                     pre_shift = floor_log2 (d);
3511                     if (rem_flag)
3512                       {
3513                         remainder
3514                           = expand_binop (compute_mode, and_optab, op0,
3515                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3516                                           remainder, 1,
3517                                           OPTAB_LIB_WIDEN);
3518                         if (remainder)
3519                           return gen_lowpart (mode, remainder);
3520                       }
3521                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3522                                              build_int_cst (NULL_TREE,
3523                                                             pre_shift),
3524                                              tquotient, 1);
3525                   }
3526                 else if (size <= HOST_BITS_PER_WIDE_INT)
3527                   {
3528                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
3529                       {
3530                         /* Most significant bit of divisor is set; emit an scc
3531                            insn.  */
3532                         quotient = emit_store_flag (tquotient, GEU, op0, op1,
3533                                                     compute_mode, 1, 1);
3534                         if (quotient == 0)
3535                           goto fail1;
3536                       }
3537                     else
3538                       {
3539                         /* Find a suitable multiplier and right shift count
3540                            instead of multiplying with D.  */
3541
3542                         mh = choose_multiplier (d, size, size,
3543                                                 &ml, &post_shift, &dummy);
3544
3545                         /* If the suggested multiplier is more than SIZE bits,
3546                            we can do better for even divisors, using an
3547                            initial right shift.  */
3548                         if (mh != 0 && (d & 1) == 0)
3549                           {
3550                             pre_shift = floor_log2 (d & -d);
3551                             mh = choose_multiplier (d >> pre_shift, size,
3552                                                     size - pre_shift,
3553                                                     &ml, &post_shift, &dummy);
3554                             if (mh)
3555                               abort ();
3556                           }
3557                         else
3558                           pre_shift = 0;
3559
3560                         if (mh != 0)
3561                           {
3562                             rtx t1, t2, t3, t4;
3563
3564                             if (post_shift - 1 >= BITS_PER_WORD)
3565                               goto fail1;
3566
3567                             extra_cost
3568                               = (shift_cost[compute_mode][post_shift - 1]
3569                                  + shift_cost[compute_mode][1]
3570                                  + 2 * add_cost[compute_mode]);
3571                             t1 = expand_mult_highpart (compute_mode, op0, ml,
3572                                                        NULL_RTX, 1,
3573                                                        max_cost - extra_cost);
3574                             if (t1 == 0)
3575                               goto fail1;
3576                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
3577                                                                op0, t1),
3578                                                 NULL_RTX);
3579                             t3 = expand_shift
3580                               (RSHIFT_EXPR, compute_mode, t2,
3581                                build_int_cst (NULL_TREE, 1),
3582                                NULL_RTX,1);
3583                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
3584                                                               t1, t3),
3585                                                 NULL_RTX);
3586                             quotient = expand_shift
3587                               (RSHIFT_EXPR, compute_mode, t4,
3588                                build_int_cst (NULL_TREE, post_shift - 1),
3589                                tquotient, 1);
3590                           }
3591                         else
3592                           {
3593                             rtx t1, t2;
3594
3595                             if (pre_shift >= BITS_PER_WORD
3596                                 || post_shift >= BITS_PER_WORD)
3597                               goto fail1;
3598
3599                             t1 = expand_shift
3600                               (RSHIFT_EXPR, compute_mode, op0,
3601                                build_int_cst (NULL_TREE, pre_shift),
3602                                NULL_RTX, 1);
3603                             extra_cost
3604                               = (shift_cost[compute_mode][pre_shift]
3605                                  + shift_cost[compute_mode][post_shift]);
3606                             t2 = expand_mult_highpart (compute_mode, t1, ml,
3607                                                        NULL_RTX, 1,
3608                                                        max_cost - extra_cost);
3609                             if (t2 == 0)
3610                               goto fail1;
3611                             quotient = expand_shift
3612                               (RSHIFT_EXPR, compute_mode, t2,
3613                                build_int_cst (NULL_TREE, post_shift),
3614                                tquotient, 1);
3615                           }
3616                       }
3617                   }
3618                 else            /* Too wide mode to use tricky code */
3619                   break;
3620
3621                 insn = get_last_insn ();
3622                 if (insn != last
3623                     && (set = single_set (insn)) != 0
3624                     && SET_DEST (set) == quotient)
3625                   set_unique_reg_note (insn,
3626                                        REG_EQUAL,
3627                                        gen_rtx_UDIV (compute_mode, op0, op1));
3628               }
3629             else                /* TRUNC_DIV, signed */
3630               {
3631                 unsigned HOST_WIDE_INT ml;
3632                 int lgup, post_shift;
3633                 HOST_WIDE_INT d = INTVAL (op1);
3634                 unsigned HOST_WIDE_INT abs_d = d >= 0 ? d : -d;
3635
3636                 /* n rem d = n rem -d */
3637                 if (rem_flag && d < 0)
3638                   {
3639                     d = abs_d;
3640                     op1 = gen_int_mode (abs_d, compute_mode);
3641                   }
3642
3643                 if (d == 1)
3644                   quotient = op0;
3645                 else if (d == -1)
3646                   quotient = expand_unop (compute_mode, neg_optab, op0,
3647                                           tquotient, 0);
3648                 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
3649                   {
3650                     /* This case is not handled correctly below.  */
3651                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
3652                                                 compute_mode, 1, 1);
3653                     if (quotient == 0)
3654                       goto fail1;
3655                   }
3656                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
3657                          && (rem_flag ? smod_pow2_cheap[compute_mode]
3658                                       : sdiv_pow2_cheap[compute_mode])
3659                          /* We assume that cheap metric is true if the
3660                             optab has an expander for this mode.  */
3661                          && (((rem_flag ? smod_optab : sdiv_optab)
3662                               ->handlers[compute_mode].insn_code
3663                               != CODE_FOR_nothing)
3664                              || (sdivmod_optab->handlers[compute_mode]
3665                                  .insn_code != CODE_FOR_nothing)))
3666                   ;
3667                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
3668                   {
3669                     if (rem_flag)
3670                       {
3671                         remainder = expand_smod_pow2 (compute_mode, op0, d);
3672                         if (remainder)
3673                           return gen_lowpart (mode, remainder);
3674                       }
3675                     quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
3676
3677                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
3678                        negate the quotient.  */
3679                     if (d < 0)
3680                       {
3681                         insn = get_last_insn ();
3682                         if (insn != last
3683                             && (set = single_set (insn)) != 0
3684                             && SET_DEST (set) == quotient
3685                             && abs_d < ((unsigned HOST_WIDE_INT) 1
3686                                         << (HOST_BITS_PER_WIDE_INT - 1)))
3687                           set_unique_reg_note (insn,
3688                                                REG_EQUAL,
3689                                                gen_rtx_DIV (compute_mode,
3690                                                             op0,
3691                                                             GEN_INT
3692                                                             (trunc_int_for_mode
3693                                                              (abs_d,
3694                                                               compute_mode))));
3695
3696                         quotient = expand_unop (compute_mode, neg_optab,
3697                                                 quotient, quotient, 0);
3698                       }
3699                   }
3700                 else if (size <= HOST_BITS_PER_WIDE_INT)
3701                   {
3702                     choose_multiplier (abs_d, size, size - 1,
3703                                        &ml, &post_shift, &lgup);
3704                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
3705                       {
3706                         rtx t1, t2, t3;
3707
3708                         if (post_shift >= BITS_PER_WORD
3709                             || size - 1 >= BITS_PER_WORD)
3710                           goto fail1;
3711
3712                         extra_cost = (shift_cost[compute_mode][post_shift]
3713                                       + shift_cost[compute_mode][size - 1]
3714                                       + add_cost[compute_mode]);
3715                         t1 = expand_mult_highpart (compute_mode, op0, ml,
3716                                                    NULL_RTX, 0,
3717                                                    max_cost - extra_cost);
3718                         if (t1 == 0)
3719                           goto fail1;
3720                         t2 = expand_shift
3721                           (RSHIFT_EXPR, compute_mode, t1,
3722                            build_int_cst (NULL_TREE, post_shift),
3723                            NULL_RTX, 0);
3724                         t3 = expand_shift
3725                           (RSHIFT_EXPR, compute_mode, op0,
3726                            build_int_cst (NULL_TREE, size - 1),
3727                            NULL_RTX, 0);
3728                         if (d < 0)
3729                           quotient
3730                             = force_operand (gen_rtx_MINUS (compute_mode,
3731                                                             t3, t2),
3732                                              tquotient);
3733                         else
3734                           quotient
3735                             = force_operand (gen_rtx_MINUS (compute_mode,
3736                                                             t2, t3),
3737                                              tquotient);
3738                       }
3739                     else
3740                       {
3741                         rtx t1, t2, t3, t4;
3742
3743                         if (post_shift >= BITS_PER_WORD
3744                             || size - 1 >= BITS_PER_WORD)
3745                           goto fail1;
3746
3747                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
3748                         extra_cost = (shift_cost[compute_mode][post_shift]
3749                                       + shift_cost[compute_mode][size - 1]
3750                                       + 2 * add_cost[compute_mode]);
3751                         t1 = expand_mult_highpart (compute_mode, op0, ml,
3752                                                    NULL_RTX, 0,
3753                                                    max_cost - extra_cost);
3754                         if (t1 == 0)
3755                           goto fail1;
3756                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
3757                                                           t1, op0),
3758                                             NULL_RTX);
3759                         t3 = expand_shift
3760                           (RSHIFT_EXPR, compute_mode, t2,
3761                            build_int_cst (NULL_TREE, post_shift),
3762                            NULL_RTX, 0);
3763                         t4 = expand_shift
3764                           (RSHIFT_EXPR, compute_mode, op0,
3765                            build_int_cst (NULL_TREE, size - 1),
3766                            NULL_RTX, 0);
3767                         if (d < 0)
3768                           quotient
3769                             = force_operand (gen_rtx_MINUS (compute_mode,
3770                                                             t4, t3),
3771                                              tquotient);
3772                         else
3773                           quotient
3774                             = force_operand (gen_rtx_MINUS (compute_mode,
3775                                                             t3, t4),
3776                                              tquotient);
3777                       }
3778                   }
3779                 else            /* Too wide mode to use tricky code */
3780                   break;
3781
3782                 insn = get_last_insn ();
3783                 if (insn != last
3784                     && (set = single_set (insn)) != 0
3785                     && SET_DEST (set) == quotient)
3786                   set_unique_reg_note (insn,
3787                                        REG_EQUAL,
3788                                        gen_rtx_DIV (compute_mode, op0, op1));
3789               }
3790             break;
3791           }
3792       fail1:
3793         delete_insns_since (last);
3794         break;
3795
3796       case FLOOR_DIV_EXPR:
3797       case FLOOR_MOD_EXPR:
3798       /* We will come here only for signed operations.  */
3799         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
3800           {
3801             unsigned HOST_WIDE_INT mh, ml;
3802             int pre_shift, lgup, post_shift;
3803             HOST_WIDE_INT d = INTVAL (op1);
3804
3805             if (d > 0)
3806               {
3807                 /* We could just as easily deal with negative constants here,
3808                    but it does not seem worth the trouble for GCC 2.6.  */
3809                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3810                   {
3811                     pre_shift = floor_log2 (d);
3812                     if (rem_flag)
3813                       {
3814                         remainder = expand_binop (compute_mode, and_optab, op0,
3815                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3816                                                   remainder, 0, OPTAB_LIB_WIDEN);
3817                         if (remainder)
3818                           return gen_lowpart (mode, remainder);
3819                       }
3820                     quotient = expand_shift
3821                       (RSHIFT_EXPR, compute_mode, op0,
3822                        build_int_cst (NULL_TREE, pre_shift),
3823                        tquotient, 0);
3824                   }
3825                 else
3826                   {
3827                     rtx t1, t2, t3, t4;
3828
3829                     mh = choose_multiplier (d, size, size - 1,
3830                                             &ml, &post_shift, &lgup);
3831                     if (mh)
3832                       abort ();
3833
3834                     if (post_shift < BITS_PER_WORD
3835                         && size - 1 < BITS_PER_WORD)
3836                       {
3837                         t1 = expand_shift
3838                           (RSHIFT_EXPR, compute_mode, op0,
3839                            build_int_cst (NULL_TREE, size - 1),
3840                            NULL_RTX, 0);
3841                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
3842                                            NULL_RTX, 0, OPTAB_WIDEN);
3843                         extra_cost = (shift_cost[compute_mode][post_shift]
3844                                       + shift_cost[compute_mode][size - 1]
3845                                       + 2 * add_cost[compute_mode]);
3846                         t3 = expand_mult_highpart (compute_mode, t2, ml,
3847                                                    NULL_RTX, 1,
3848                                                    max_cost - extra_cost);
3849                         if (t3 != 0)
3850                           {
3851                             t4 = expand_shift
3852                               (RSHIFT_EXPR, compute_mode, t3,
3853                                build_int_cst (NULL_TREE, post_shift),
3854                                NULL_RTX, 1);
3855                             quotient = expand_binop (compute_mode, xor_optab,
3856                                                      t4, t1, tquotient, 0,
3857                                                      OPTAB_WIDEN);
3858                           }
3859                       }
3860                   }
3861               }
3862             else
3863               {
3864                 rtx nsign, t1, t2, t3, t4;
3865                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
3866                                                   op0, constm1_rtx), NULL_RTX);
3867                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
3868                                    0, OPTAB_WIDEN);
3869                 nsign = expand_shift
3870                   (RSHIFT_EXPR, compute_mode, t2,
3871                    build_int_cst (NULL_TREE, size - 1),
3872                    NULL_RTX, 0);
3873                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
3874                                     NULL_RTX);
3875                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
3876                                     NULL_RTX, 0);
3877                 if (t4)
3878                   {
3879                     rtx t5;
3880                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
3881                                       NULL_RTX, 0);
3882                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
3883                                                             t4, t5),
3884                                               tquotient);
3885                   }
3886               }
3887           }
3888
3889         if (quotient != 0)
3890           break;
3891         delete_insns_since (last);
3892
3893         /* Try using an instruction that produces both the quotient and
3894            remainder, using truncation.  We can easily compensate the quotient
3895            or remainder to get floor rounding, once we have the remainder.
3896            Notice that we compute also the final remainder value here,
3897            and return the result right away.  */
3898         if (target == 0 || GET_MODE (target) != compute_mode)
3899           target = gen_reg_rtx (compute_mode);
3900
3901         if (rem_flag)
3902           {
3903             remainder
3904               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
3905             quotient = gen_reg_rtx (compute_mode);
3906           }
3907         else
3908           {
3909             quotient
3910               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
3911             remainder = gen_reg_rtx (compute_mode);
3912           }
3913
3914         if (expand_twoval_binop (sdivmod_optab, op0, op1,
3915                                  quotient, remainder, 0))
3916           {
3917             /* This could be computed with a branch-less sequence.
3918                Save that for later.  */
3919             rtx tem;
3920             rtx label = gen_label_rtx ();
3921             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
3922             tem = expand_binop (compute_mode, xor_optab, op0, op1,
3923                                 NULL_RTX, 0, OPTAB_WIDEN);
3924             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
3925             expand_dec (quotient, const1_rtx);
3926             expand_inc (remainder, op1);
3927             emit_label (label);
3928             return gen_lowpart (mode, rem_flag ? remainder : quotient);
3929           }
3930
3931         /* No luck with division elimination or divmod.  Have to do it
3932            by conditionally adjusting op0 *and* the result.  */
3933         {
3934           rtx label1, label2, label3, label4, label5;
3935           rtx adjusted_op0;
3936           rtx tem;
3937
3938           quotient = gen_reg_rtx (compute_mode);
3939           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3940           label1 = gen_label_rtx ();
3941           label2 = gen_label_rtx ();
3942           label3 = gen_label_rtx ();
3943           label4 = gen_label_rtx ();
3944           label5 = gen_label_rtx ();
3945           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
3946           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
3947           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3948                               quotient, 0, OPTAB_LIB_WIDEN);
3949           if (tem != quotient)
3950             emit_move_insn (quotient, tem);
3951           emit_jump_insn (gen_jump (label5));
3952           emit_barrier ();
3953           emit_label (label1);
3954           expand_inc (adjusted_op0, const1_rtx);
3955           emit_jump_insn (gen_jump (label4));
3956           emit_barrier ();
3957           emit_label (label2);
3958           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
3959           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3960                               quotient, 0, OPTAB_LIB_WIDEN);
3961           if (tem != quotient)
3962             emit_move_insn (quotient, tem);
3963           emit_jump_insn (gen_jump (label5));
3964           emit_barrier ();
3965           emit_label (label3);
3966           expand_dec (adjusted_op0, const1_rtx);
3967           emit_label (label4);
3968           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3969                               quotient, 0, OPTAB_LIB_WIDEN);
3970           if (tem != quotient)
3971             emit_move_insn (quotient, tem);
3972           expand_dec (quotient, const1_rtx);
3973           emit_label (label5);
3974         }
3975         break;
3976
3977       case CEIL_DIV_EXPR:
3978       case CEIL_MOD_EXPR:
3979         if (unsignedp)
3980           {
3981             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
3982               {
3983                 rtx t1, t2, t3;
3984                 unsigned HOST_WIDE_INT d = INTVAL (op1);
3985                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3986                                    build_int_cst (NULL_TREE, floor_log2 (d)),
3987                                    tquotient, 1);
3988                 t2 = expand_binop (compute_mode, and_optab, op0,
3989                                    GEN_INT (d - 1),
3990                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3991                 t3 = gen_reg_rtx (compute_mode);
3992                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
3993                                       compute_mode, 1, 1);
3994                 if (t3 == 0)
3995                   {
3996                     rtx lab;
3997                     lab = gen_label_rtx ();
3998                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
3999                     expand_inc (t1, const1_rtx);
4000                     emit_label (lab);
4001                     quotient = t1;
4002                   }
4003                 else
4004                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4005                                                           t1, t3),
4006                                             tquotient);
4007                 break;
4008               }
4009
4010             /* Try using an instruction that produces both the quotient and
4011                remainder, using truncation.  We can easily compensate the
4012                quotient or remainder to get ceiling rounding, once we have the
4013                remainder.  Notice that we compute also the final remainder
4014                value here, and return the result right away.  */
4015             if (target == 0 || GET_MODE (target) != compute_mode)
4016               target = gen_reg_rtx (compute_mode);
4017
4018             if (rem_flag)
4019               {
4020                 remainder = (REG_P (target)
4021                              ? target : gen_reg_rtx (compute_mode));
4022                 quotient = gen_reg_rtx (compute_mode);
4023               }
4024             else
4025               {
4026                 quotient = (REG_P (target)
4027                             ? target : gen_reg_rtx (compute_mode));
4028                 remainder = gen_reg_rtx (compute_mode);
4029               }
4030
4031             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4032                                      remainder, 1))
4033               {
4034                 /* This could be computed with a branch-less sequence.
4035                    Save that for later.  */
4036                 rtx label = gen_label_rtx ();
4037                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4038                                  compute_mode, label);
4039                 expand_inc (quotient, const1_rtx);
4040                 expand_dec (remainder, op1);
4041                 emit_label (label);
4042                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4043               }
4044
4045             /* No luck with division elimination or divmod.  Have to do it
4046                by conditionally adjusting op0 *and* the result.  */
4047             {
4048               rtx label1, label2;
4049               rtx adjusted_op0, tem;
4050
4051               quotient = gen_reg_rtx (compute_mode);
4052               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4053               label1 = gen_label_rtx ();
4054               label2 = gen_label_rtx ();
4055               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4056                                compute_mode, label1);
4057               emit_move_insn  (quotient, const0_rtx);
4058               emit_jump_insn (gen_jump (label2));
4059               emit_barrier ();
4060               emit_label (label1);
4061               expand_dec (adjusted_op0, const1_rtx);
4062               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4063                                   quotient, 1, OPTAB_LIB_WIDEN);
4064               if (tem != quotient)
4065                 emit_move_insn (quotient, tem);
4066               expand_inc (quotient, const1_rtx);
4067               emit_label (label2);
4068             }
4069           }
4070         else /* signed */
4071           {
4072             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4073                 && INTVAL (op1) >= 0)
4074               {
4075                 /* This is extremely similar to the code for the unsigned case
4076                    above.  For 2.7 we should merge these variants, but for
4077                    2.6.1 I don't want to touch the code for unsigned since that
4078                    get used in C.  The signed case will only be used by other
4079                    languages (Ada).  */
4080
4081                 rtx t1, t2, t3;
4082                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4083                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4084                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4085                                    tquotient, 0);
4086                 t2 = expand_binop (compute_mode, and_optab, op0,
4087                                    GEN_INT (d - 1),
4088                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4089                 t3 = gen_reg_rtx (compute_mode);
4090                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4091                                       compute_mode, 1, 1);
4092                 if (t3 == 0)
4093                   {
4094                     rtx lab;
4095                     lab = gen_label_rtx ();
4096                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4097                     expand_inc (t1, const1_rtx);
4098                     emit_label (lab);
4099                     quotient = t1;
4100                   }
4101                 else
4102                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4103                                                           t1, t3),
4104                                             tquotient);
4105                 break;
4106               }
4107
4108             /* Try using an instruction that produces both the quotient and
4109                remainder, using truncation.  We can easily compensate the
4110                quotient or remainder to get ceiling rounding, once we have the
4111                remainder.  Notice that we compute also the final remainder
4112                value here, and return the result right away.  */
4113             if (target == 0 || GET_MODE (target) != compute_mode)
4114               target = gen_reg_rtx (compute_mode);
4115             if (rem_flag)
4116               {
4117                 remainder= (REG_P (target)
4118                             ? target : gen_reg_rtx (compute_mode));
4119                 quotient = gen_reg_rtx (compute_mode);
4120               }
4121             else
4122               {
4123                 quotient = (REG_P (target)
4124                             ? target : gen_reg_rtx (compute_mode));
4125                 remainder = gen_reg_rtx (compute_mode);
4126               }
4127
4128             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4129                                      remainder, 0))
4130               {
4131                 /* This could be computed with a branch-less sequence.
4132                    Save that for later.  */
4133                 rtx tem;
4134                 rtx label = gen_label_rtx ();
4135                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4136                                  compute_mode, label);
4137                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4138                                     NULL_RTX, 0, OPTAB_WIDEN);
4139                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4140                 expand_inc (quotient, const1_rtx);
4141                 expand_dec (remainder, op1);
4142                 emit_label (label);
4143                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4144               }
4145
4146             /* No luck with division elimination or divmod.  Have to do it
4147                by conditionally adjusting op0 *and* the result.  */
4148             {
4149               rtx label1, label2, label3, label4, label5;
4150               rtx adjusted_op0;
4151               rtx tem;
4152
4153               quotient = gen_reg_rtx (compute_mode);
4154               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4155               label1 = gen_label_rtx ();
4156               label2 = gen_label_rtx ();
4157               label3 = gen_label_rtx ();
4158               label4 = gen_label_rtx ();
4159               label5 = gen_label_rtx ();
4160               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4161               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4162                                compute_mode, label1);
4163               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4164                                   quotient, 0, OPTAB_LIB_WIDEN);
4165               if (tem != quotient)
4166                 emit_move_insn (quotient, tem);
4167               emit_jump_insn (gen_jump (label5));
4168               emit_barrier ();
4169               emit_label (label1);
4170               expand_dec (adjusted_op0, const1_rtx);
4171               emit_jump_insn (gen_jump (label4));
4172               emit_barrier ();
4173               emit_label (label2);
4174               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4175                                compute_mode, label3);
4176               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4177                                   quotient, 0, OPTAB_LIB_WIDEN);
4178               if (tem != quotient)
4179                 emit_move_insn (quotient, tem);
4180               emit_jump_insn (gen_jump (label5));
4181               emit_barrier ();
4182               emit_label (label3);
4183               expand_inc (adjusted_op0, const1_rtx);
4184               emit_label (label4);
4185               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4186                                   quotient, 0, OPTAB_LIB_WIDEN);
4187               if (tem != quotient)
4188                 emit_move_insn (quotient, tem);
4189               expand_inc (quotient, const1_rtx);
4190               emit_label (label5);
4191             }
4192           }
4193         break;
4194
4195       case EXACT_DIV_EXPR:
4196         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4197           {
4198             HOST_WIDE_INT d = INTVAL (op1);
4199             unsigned HOST_WIDE_INT ml;
4200             int pre_shift;
4201             rtx t1;
4202
4203             pre_shift = floor_log2 (d & -d);
4204             ml = invert_mod2n (d >> pre_shift, size);
4205             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4206                                build_int_cst (NULL_TREE, pre_shift),
4207                                NULL_RTX, unsignedp);
4208             quotient = expand_mult (compute_mode, t1,
4209                                     gen_int_mode (ml, compute_mode),
4210                                     NULL_RTX, 1);
4211
4212             insn = get_last_insn ();
4213             set_unique_reg_note (insn,
4214                                  REG_EQUAL,
4215                                  gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4216                                                  compute_mode,
4217                                                  op0, op1));
4218           }
4219         break;
4220
4221       case ROUND_DIV_EXPR:
4222       case ROUND_MOD_EXPR:
4223         if (unsignedp)
4224           {
4225             rtx tem;
4226             rtx label;
4227             label = gen_label_rtx ();
4228             quotient = gen_reg_rtx (compute_mode);
4229             remainder = gen_reg_rtx (compute_mode);
4230             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4231               {
4232                 rtx tem;
4233                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4234                                          quotient, 1, OPTAB_LIB_WIDEN);
4235                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4236                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4237                                           remainder, 1, OPTAB_LIB_WIDEN);
4238               }
4239             tem = plus_constant (op1, -1);
4240             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4241                                 build_int_cst (NULL_TREE, 1),
4242                                 NULL_RTX, 1);
4243             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4244             expand_inc (quotient, const1_rtx);
4245             expand_dec (remainder, op1);
4246             emit_label (label);
4247           }
4248         else
4249           {
4250             rtx abs_rem, abs_op1, tem, mask;
4251             rtx label;
4252             label = gen_label_rtx ();
4253             quotient = gen_reg_rtx (compute_mode);
4254             remainder = gen_reg_rtx (compute_mode);
4255             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4256               {
4257                 rtx tem;
4258                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4259                                          quotient, 0, OPTAB_LIB_WIDEN);
4260                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4261                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4262                                           remainder, 0, OPTAB_LIB_WIDEN);
4263               }
4264             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4265             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4266             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4267                                 build_int_cst (NULL_TREE, 1),
4268                                 NULL_RTX, 1);
4269             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4270             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4271                                 NULL_RTX, 0, OPTAB_WIDEN);
4272             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4273                                  build_int_cst (NULL_TREE, size - 1),
4274                                  NULL_RTX, 0);
4275             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4276                                 NULL_RTX, 0, OPTAB_WIDEN);
4277             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4278                                 NULL_RTX, 0, OPTAB_WIDEN);
4279             expand_inc (quotient, tem);
4280             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4281                                 NULL_RTX, 0, OPTAB_WIDEN);
4282             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4283                                 NULL_RTX, 0, OPTAB_WIDEN);
4284             expand_dec (remainder, tem);
4285             emit_label (label);
4286           }
4287         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4288
4289       default:
4290         abort ();
4291       }
4292
4293   if (quotient == 0)
4294     {
4295       if (target && GET_MODE (target) != compute_mode)
4296         target = 0;
4297
4298       if (rem_flag)
4299         {
4300           /* Try to produce the remainder without producing the quotient.
4301              If we seem to have a divmod pattern that does not require widening,
4302              don't try widening here.  We should really have a WIDEN argument
4303              to expand_twoval_binop, since what we'd really like to do here is
4304              1) try a mod insn in compute_mode
4305              2) try a divmod insn in compute_mode
4306              3) try a div insn in compute_mode and multiply-subtract to get
4307                 remainder
4308              4) try the same things with widening allowed.  */
4309           remainder
4310             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4311                                  op0, op1, target,
4312                                  unsignedp,
4313                                  ((optab2->handlers[compute_mode].insn_code
4314                                    != CODE_FOR_nothing)
4315                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4316           if (remainder == 0)
4317             {
4318               /* No luck there.  Can we do remainder and divide at once
4319                  without a library call?  */
4320               remainder = gen_reg_rtx (compute_mode);
4321               if (! expand_twoval_binop ((unsignedp
4322                                           ? udivmod_optab
4323                                           : sdivmod_optab),
4324                                          op0, op1,
4325                                          NULL_RTX, remainder, unsignedp))
4326                 remainder = 0;
4327             }
4328
4329           if (remainder)
4330             return gen_lowpart (mode, remainder);
4331         }
4332
4333       /* Produce the quotient.  Try a quotient insn, but not a library call.
4334          If we have a divmod in this mode, use it in preference to widening
4335          the div (for this test we assume it will not fail). Note that optab2
4336          is set to the one of the two optabs that the call below will use.  */
4337       quotient
4338         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4339                              op0, op1, rem_flag ? NULL_RTX : target,
4340                              unsignedp,
4341                              ((optab2->handlers[compute_mode].insn_code
4342                                != CODE_FOR_nothing)
4343                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4344
4345       if (quotient == 0)
4346         {
4347           /* No luck there.  Try a quotient-and-remainder insn,
4348              keeping the quotient alone.  */
4349           quotient = gen_reg_rtx (compute_mode);
4350           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4351                                      op0, op1,
4352                                      quotient, NULL_RTX, unsignedp))
4353             {
4354               quotient = 0;
4355               if (! rem_flag)
4356                 /* Still no luck.  If we are not computing the remainder,
4357                    use a library call for the quotient.  */
4358                 quotient = sign_expand_binop (compute_mode,
4359                                               udiv_optab, sdiv_optab,
4360                                               op0, op1, target,
4361                                               unsignedp, OPTAB_LIB_WIDEN);
4362             }
4363         }
4364     }
4365
4366   if (rem_flag)
4367     {
4368       if (target && GET_MODE (target) != compute_mode)
4369         target = 0;
4370
4371       if (quotient == 0)
4372         {
4373           /* No divide instruction either.  Use library for remainder.  */
4374           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4375                                          op0, op1, target,
4376                                          unsignedp, OPTAB_LIB_WIDEN);
4377           /* No remainder function.  Try a quotient-and-remainder
4378              function, keeping the remainder.  */
4379           if (!remainder)
4380             {
4381               remainder = gen_reg_rtx (compute_mode);
4382               if (!expand_twoval_binop_libfunc
4383                   (unsignedp ? udivmod_optab : sdivmod_optab,
4384                    op0, op1,
4385                    NULL_RTX, remainder,
4386                    unsignedp ? UMOD : MOD))
4387                 remainder = NULL_RTX;
4388             }
4389         }
4390       else
4391         {
4392           /* We divided.  Now finish doing X - Y * (X / Y).  */
4393           remainder = expand_mult (compute_mode, quotient, op1,
4394                                    NULL_RTX, unsignedp);
4395           remainder = expand_binop (compute_mode, sub_optab, op0,
4396                                     remainder, target, unsignedp,
4397                                     OPTAB_LIB_WIDEN);
4398         }
4399     }
4400
4401   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4402 }
4403 \f
4404 /* Return a tree node with data type TYPE, describing the value of X.
4405    Usually this is an VAR_DECL, if there is no obvious better choice.
4406    X may be an expression, however we only support those expressions
4407    generated by loop.c.  */
4408
4409 tree
4410 make_tree (tree type, rtx x)
4411 {
4412   tree t;
4413
4414   switch (GET_CODE (x))
4415     {
4416     case CONST_INT:
4417       {
4418         HOST_WIDE_INT hi = 0;
4419
4420         if (INTVAL (x) < 0
4421             && !(TYPE_UNSIGNED (type)
4422                  && (GET_MODE_BITSIZE (TYPE_MODE (type))
4423                      < HOST_BITS_PER_WIDE_INT)))
4424           hi = -1;
4425
4426         t = build_int_cst_wide (type, INTVAL (x), hi);
4427
4428         return t;
4429       }
4430
4431     case CONST_DOUBLE:
4432       if (GET_MODE (x) == VOIDmode)
4433         t = build_int_cst_wide (type,
4434                                 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4435       else
4436         {
4437           REAL_VALUE_TYPE d;
4438
4439           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4440           t = build_real (type, d);
4441         }
4442
4443       return t;
4444
4445     case CONST_VECTOR:
4446       {
4447         int i, units;
4448         rtx elt;
4449         tree t = NULL_TREE;
4450
4451         units = CONST_VECTOR_NUNITS (x);
4452
4453         /* Build a tree with vector elements.  */
4454         for (i = units - 1; i >= 0; --i)
4455           {
4456             elt = CONST_VECTOR_ELT (x, i);
4457             t = tree_cons (NULL_TREE, make_tree (type, elt), t);
4458           }
4459
4460         return build_vector (type, t);
4461       }
4462
4463     case PLUS:
4464       return fold (build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4465                            make_tree (type, XEXP (x, 1))));
4466
4467     case MINUS:
4468       return fold (build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4469                            make_tree (type, XEXP (x, 1))));
4470
4471     case NEG:
4472       return fold (build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0))));
4473
4474     case MULT:
4475       return fold (build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4476                            make_tree (type, XEXP (x, 1))));
4477
4478     case ASHIFT:
4479       return fold (build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4480                            make_tree (type, XEXP (x, 1))));
4481
4482     case LSHIFTRT:
4483       t = lang_hooks.types.unsigned_type (type);
4484       return fold (convert (type,
4485                             build2 (RSHIFT_EXPR, t,
4486                                     make_tree (t, XEXP (x, 0)),
4487                                     make_tree (type, XEXP (x, 1)))));
4488
4489     case ASHIFTRT:
4490       t = lang_hooks.types.signed_type (type);
4491       return fold (convert (type,
4492                             build2 (RSHIFT_EXPR, t,
4493                                     make_tree (t, XEXP (x, 0)),
4494                                     make_tree (type, XEXP (x, 1)))));
4495
4496     case DIV:
4497       if (TREE_CODE (type) != REAL_TYPE)
4498         t = lang_hooks.types.signed_type (type);
4499       else
4500         t = type;
4501
4502       return fold (convert (type,
4503                             build2 (TRUNC_DIV_EXPR, t,
4504                                     make_tree (t, XEXP (x, 0)),
4505                                     make_tree (t, XEXP (x, 1)))));
4506     case UDIV:
4507       t = lang_hooks.types.unsigned_type (type);
4508       return fold (convert (type,
4509                             build2 (TRUNC_DIV_EXPR, t,
4510                                     make_tree (t, XEXP (x, 0)),
4511                                     make_tree (t, XEXP (x, 1)))));
4512
4513     case SIGN_EXTEND:
4514     case ZERO_EXTEND:
4515       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
4516                                           GET_CODE (x) == ZERO_EXTEND);
4517       return fold (convert (type, make_tree (t, XEXP (x, 0))));
4518
4519     default:
4520       t = build_decl (VAR_DECL, NULL_TREE, type);
4521
4522       /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
4523          ptr_mode.  So convert.  */
4524       if (POINTER_TYPE_P (type))
4525         x = convert_memory_address (TYPE_MODE (type), x);
4526
4527       /* Note that we do *not* use SET_DECL_RTL here, because we do not
4528          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
4529       t->decl.rtl = x;
4530
4531       return t;
4532     }
4533 }
4534
4535 /* Check whether the multiplication X * MULT + ADD overflows.
4536    X, MULT and ADD must be CONST_*.
4537    MODE is the machine mode for the computation.
4538    X and MULT must have mode MODE.  ADD may have a different mode.
4539    So can X (defaults to same as MODE).
4540    UNSIGNEDP is nonzero to do unsigned multiplication.  */
4541
4542 bool
4543 const_mult_add_overflow_p (rtx x, rtx mult, rtx add,
4544                            enum machine_mode mode, int unsignedp)
4545 {
4546   tree type, mult_type, add_type, result;
4547
4548   type = lang_hooks.types.type_for_mode (mode, unsignedp);
4549
4550   /* In order to get a proper overflow indication from an unsigned
4551      type, we have to pretend that it's a sizetype.  */
4552   mult_type = type;
4553   if (unsignedp)
4554     {
4555       /* FIXME:It would be nice if we could step directly from this
4556          type to its sizetype equivalent.  */
4557       mult_type = build_distinct_type_copy (type);
4558       TYPE_IS_SIZETYPE (mult_type) = 1;
4559     }
4560
4561   add_type = (GET_MODE (add) == VOIDmode ? mult_type
4562               : lang_hooks.types.type_for_mode (GET_MODE (add), unsignedp));
4563
4564   result = fold (build2 (PLUS_EXPR, mult_type,
4565                          fold (build2 (MULT_EXPR, mult_type,
4566                                        make_tree (mult_type, x),
4567                                        make_tree (mult_type, mult))),
4568                          make_tree (add_type, add)));
4569
4570   return TREE_CONSTANT_OVERFLOW (result);
4571 }
4572
4573 /* Return an rtx representing the value of X * MULT + ADD.
4574    TARGET is a suggestion for where to store the result (an rtx).
4575    MODE is the machine mode for the computation.
4576    X and MULT must have mode MODE.  ADD may have a different mode.
4577    So can X (defaults to same as MODE).
4578    UNSIGNEDP is nonzero to do unsigned multiplication.
4579    This may emit insns.  */
4580
4581 rtx
4582 expand_mult_add (rtx x, rtx target, rtx mult, rtx add, enum machine_mode mode,
4583                  int unsignedp)
4584 {
4585   tree type = lang_hooks.types.type_for_mode (mode, unsignedp);
4586   tree add_type = (GET_MODE (add) == VOIDmode
4587                    ? type: lang_hooks.types.type_for_mode (GET_MODE (add),
4588                                                            unsignedp));
4589   tree result =  fold (build2 (PLUS_EXPR, type,
4590                                fold (build2 (MULT_EXPR, type,
4591                                              make_tree (type, x),
4592                                              make_tree (type, mult))),
4593                                make_tree (add_type, add)));
4594
4595   return expand_expr (result, target, VOIDmode, 0);
4596 }
4597 \f
4598 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
4599    and returning TARGET.
4600
4601    If TARGET is 0, a pseudo-register or constant is returned.  */
4602
4603 rtx
4604 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
4605 {
4606   rtx tem = 0;
4607
4608   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
4609     tem = simplify_binary_operation (AND, mode, op0, op1);
4610   if (tem == 0)
4611     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
4612
4613   if (target == 0)
4614     target = tem;
4615   else if (tem != target)
4616     emit_move_insn (target, tem);
4617   return target;
4618 }
4619 \f
4620 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
4621    and storing in TARGET.  Normally return TARGET.
4622    Return 0 if that cannot be done.
4623
4624    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
4625    it is VOIDmode, they cannot both be CONST_INT.
4626
4627    UNSIGNEDP is for the case where we have to widen the operands
4628    to perform the operation.  It says to use zero-extension.
4629
4630    NORMALIZEP is 1 if we should convert the result to be either zero
4631    or one.  Normalize is -1 if we should convert the result to be
4632    either zero or -1.  If NORMALIZEP is zero, the result will be left
4633    "raw" out of the scc insn.  */
4634
4635 rtx
4636 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
4637                  enum machine_mode mode, int unsignedp, int normalizep)
4638 {
4639   rtx subtarget;
4640   enum insn_code icode;
4641   enum machine_mode compare_mode;
4642   enum machine_mode target_mode = GET_MODE (target);
4643   rtx tem;
4644   rtx last = get_last_insn ();
4645   rtx pattern, comparison;
4646
4647   if (unsignedp)
4648     code = unsigned_condition (code);
4649
4650   /* If one operand is constant, make it the second one.  Only do this
4651      if the other operand is not constant as well.  */
4652
4653   if (swap_commutative_operands_p (op0, op1))
4654     {
4655       tem = op0;
4656       op0 = op1;
4657       op1 = tem;
4658       code = swap_condition (code);
4659     }
4660
4661   if (mode == VOIDmode)
4662     mode = GET_MODE (op0);
4663
4664   /* For some comparisons with 1 and -1, we can convert this to
4665      comparisons with zero.  This will often produce more opportunities for
4666      store-flag insns.  */
4667
4668   switch (code)
4669     {
4670     case LT:
4671       if (op1 == const1_rtx)
4672         op1 = const0_rtx, code = LE;
4673       break;
4674     case LE:
4675       if (op1 == constm1_rtx)
4676         op1 = const0_rtx, code = LT;
4677       break;
4678     case GE:
4679       if (op1 == const1_rtx)
4680         op1 = const0_rtx, code = GT;
4681       break;
4682     case GT:
4683       if (op1 == constm1_rtx)
4684         op1 = const0_rtx, code = GE;
4685       break;
4686     case GEU:
4687       if (op1 == const1_rtx)
4688         op1 = const0_rtx, code = NE;
4689       break;
4690     case LTU:
4691       if (op1 == const1_rtx)
4692         op1 = const0_rtx, code = EQ;
4693       break;
4694     default:
4695       break;
4696     }
4697
4698   /* If we are comparing a double-word integer with zero or -1, we can
4699      convert the comparison into one involving a single word.  */
4700   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
4701       && GET_MODE_CLASS (mode) == MODE_INT
4702       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
4703     {
4704       if ((code == EQ || code == NE)
4705           && (op1 == const0_rtx || op1 == constm1_rtx))
4706         {
4707           rtx op00, op01, op0both;
4708
4709           /* Do a logical OR or AND of the two words and compare the result.  */
4710           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
4711           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
4712           op0both = expand_binop (word_mode,
4713                                   op1 == const0_rtx ? ior_optab : and_optab,
4714                                   op00, op01, NULL_RTX, unsignedp, OPTAB_DIRECT);
4715
4716           if (op0both != 0)
4717             return emit_store_flag (target, code, op0both, op1, word_mode,
4718                                     unsignedp, normalizep);
4719         }
4720       else if ((code == LT || code == GE) && op1 == const0_rtx)
4721         {
4722           rtx op0h;
4723
4724           /* If testing the sign bit, can just test on high word.  */
4725           op0h = simplify_gen_subreg (word_mode, op0, mode,
4726                                       subreg_highpart_offset (word_mode, mode));
4727           return emit_store_flag (target, code, op0h, op1, word_mode,
4728                                   unsignedp, normalizep);
4729         }
4730     }
4731
4732   /* From now on, we won't change CODE, so set ICODE now.  */
4733   icode = setcc_gen_code[(int) code];
4734
4735   /* If this is A < 0 or A >= 0, we can do this by taking the ones
4736      complement of A (for GE) and shifting the sign bit to the low bit.  */
4737   if (op1 == const0_rtx && (code == LT || code == GE)
4738       && GET_MODE_CLASS (mode) == MODE_INT
4739       && (normalizep || STORE_FLAG_VALUE == 1
4740           || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4741               && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
4742                   == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
4743     {
4744       subtarget = target;
4745
4746       /* If the result is to be wider than OP0, it is best to convert it
4747          first.  If it is to be narrower, it is *incorrect* to convert it
4748          first.  */
4749       if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
4750         {
4751           op0 = convert_modes (target_mode, mode, op0, 0);
4752           mode = target_mode;
4753         }
4754
4755       if (target_mode != mode)
4756         subtarget = 0;
4757
4758       if (code == GE)
4759         op0 = expand_unop (mode, one_cmpl_optab, op0,
4760                            ((STORE_FLAG_VALUE == 1 || normalizep)
4761                             ? 0 : subtarget), 0);
4762
4763       if (STORE_FLAG_VALUE == 1 || normalizep)
4764         /* If we are supposed to produce a 0/1 value, we want to do
4765            a logical shift from the sign bit to the low-order bit; for
4766            a -1/0 value, we do an arithmetic shift.  */
4767         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
4768                             size_int (GET_MODE_BITSIZE (mode) - 1),
4769                             subtarget, normalizep != -1);
4770
4771       if (mode != target_mode)
4772         op0 = convert_modes (target_mode, mode, op0, 0);
4773
4774       return op0;
4775     }
4776
4777   if (icode != CODE_FOR_nothing)
4778     {
4779       insn_operand_predicate_fn pred;
4780
4781       /* We think we may be able to do this with a scc insn.  Emit the
4782          comparison and then the scc insn.  */
4783
4784       do_pending_stack_adjust ();
4785       last = get_last_insn ();
4786
4787       comparison
4788         = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX);
4789       if (CONSTANT_P (comparison))
4790         {
4791           if (GET_CODE (comparison) == CONST_INT)
4792             {
4793               if (comparison == const0_rtx)
4794                 return const0_rtx;
4795             }
4796 #ifdef FLOAT_STORE_FLAG_VALUE
4797           else if (GET_CODE (comparison) == CONST_DOUBLE)
4798             {
4799               if (comparison == CONST0_RTX (GET_MODE (comparison)))
4800                 return const0_rtx;
4801             }
4802 #endif
4803           else
4804             abort ();
4805           if (normalizep == 1)
4806             return const1_rtx;
4807           if (normalizep == -1)
4808             return constm1_rtx;
4809           return const_true_rtx;
4810         }
4811
4812       /* The code of COMPARISON may not match CODE if compare_from_rtx
4813          decided to swap its operands and reverse the original code.
4814
4815          We know that compare_from_rtx returns either a CONST_INT or
4816          a new comparison code, so it is safe to just extract the
4817          code from COMPARISON.  */
4818       code = GET_CODE (comparison);
4819
4820       /* Get a reference to the target in the proper mode for this insn.  */
4821       compare_mode = insn_data[(int) icode].operand[0].mode;
4822       subtarget = target;
4823       pred = insn_data[(int) icode].operand[0].predicate;
4824       if (optimize || ! (*pred) (subtarget, compare_mode))
4825         subtarget = gen_reg_rtx (compare_mode);
4826
4827       pattern = GEN_FCN (icode) (subtarget);
4828       if (pattern)
4829         {
4830           emit_insn (pattern);
4831
4832           /* If we are converting to a wider mode, first convert to
4833              TARGET_MODE, then normalize.  This produces better combining
4834              opportunities on machines that have a SIGN_EXTRACT when we are
4835              testing a single bit.  This mostly benefits the 68k.
4836
4837              If STORE_FLAG_VALUE does not have the sign bit set when
4838              interpreted in COMPARE_MODE, we can do this conversion as
4839              unsigned, which is usually more efficient.  */
4840           if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (compare_mode))
4841             {
4842               convert_move (target, subtarget,
4843                             (GET_MODE_BITSIZE (compare_mode)
4844                              <= HOST_BITS_PER_WIDE_INT)
4845                             && 0 == (STORE_FLAG_VALUE
4846                                      & ((HOST_WIDE_INT) 1
4847                                         << (GET_MODE_BITSIZE (compare_mode) -1))));
4848               op0 = target;
4849               compare_mode = target_mode;
4850             }
4851           else
4852             op0 = subtarget;
4853
4854           /* If we want to keep subexpressions around, don't reuse our
4855              last target.  */
4856
4857           if (optimize)
4858             subtarget = 0;
4859
4860           /* Now normalize to the proper value in COMPARE_MODE.  Sometimes
4861              we don't have to do anything.  */
4862           if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
4863             ;
4864           /* STORE_FLAG_VALUE might be the most negative number, so write
4865              the comparison this way to avoid a compiler-time warning.  */
4866           else if (- normalizep == STORE_FLAG_VALUE)
4867             op0 = expand_unop (compare_mode, neg_optab, op0, subtarget, 0);
4868
4869           /* We don't want to use STORE_FLAG_VALUE < 0 below since this
4870              makes it hard to use a value of just the sign bit due to
4871              ANSI integer constant typing rules.  */
4872           else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_WIDE_INT
4873                    && (STORE_FLAG_VALUE
4874                        & ((HOST_WIDE_INT) 1
4875                           << (GET_MODE_BITSIZE (compare_mode) - 1))))
4876             op0 = expand_shift (RSHIFT_EXPR, compare_mode, op0,
4877                                 size_int (GET_MODE_BITSIZE (compare_mode) - 1),
4878                                 subtarget, normalizep == 1);
4879           else if (STORE_FLAG_VALUE & 1)
4880             {
4881               op0 = expand_and (compare_mode, op0, const1_rtx, subtarget);
4882               if (normalizep == -1)
4883                 op0 = expand_unop (compare_mode, neg_optab, op0, op0, 0);
4884             }
4885           else
4886             abort ();
4887
4888           /* If we were converting to a smaller mode, do the
4889              conversion now.  */
4890           if (target_mode != compare_mode)
4891             {
4892               convert_move (target, op0, 0);
4893               return target;
4894             }
4895           else
4896             return op0;
4897         }
4898     }
4899
4900   delete_insns_since (last);
4901
4902   /* If optimizing, use different pseudo registers for each insn, instead
4903      of reusing the same pseudo.  This leads to better CSE, but slows
4904      down the compiler, since there are more pseudos */
4905   subtarget = (!optimize
4906                && (target_mode == mode)) ? target : NULL_RTX;
4907
4908   /* If we reached here, we can't do this with a scc insn.  However, there
4909      are some comparisons that can be done directly.  For example, if
4910      this is an equality comparison of integers, we can try to exclusive-or
4911      (or subtract) the two operands and use a recursive call to try the
4912      comparison with zero.  Don't do any of these cases if branches are
4913      very cheap.  */
4914
4915   if (BRANCH_COST > 0
4916       && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
4917       && op1 != const0_rtx)
4918     {
4919       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
4920                           OPTAB_WIDEN);
4921
4922       if (tem == 0)
4923         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
4924                             OPTAB_WIDEN);
4925       if (tem != 0)
4926         tem = emit_store_flag (target, code, tem, const0_rtx,
4927                                mode, unsignedp, normalizep);
4928       if (tem == 0)
4929         delete_insns_since (last);
4930       return tem;
4931     }
4932
4933   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
4934      the constant zero.  Reject all other comparisons at this point.  Only
4935      do LE and GT if branches are expensive since they are expensive on
4936      2-operand machines.  */
4937
4938   if (BRANCH_COST == 0
4939       || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
4940       || (code != EQ && code != NE
4941           && (BRANCH_COST <= 1 || (code != LE && code != GT))))
4942     return 0;
4943
4944   /* See what we need to return.  We can only return a 1, -1, or the
4945      sign bit.  */
4946
4947   if (normalizep == 0)
4948     {
4949       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
4950         normalizep = STORE_FLAG_VALUE;
4951
4952       else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4953                && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
4954                    == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
4955         ;
4956       else
4957         return 0;
4958     }
4959
4960   /* Try to put the result of the comparison in the sign bit.  Assume we can't
4961      do the necessary operation below.  */
4962
4963   tem = 0;
4964
4965   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
4966      the sign bit set.  */
4967
4968   if (code == LE)
4969     {
4970       /* This is destructive, so SUBTARGET can't be OP0.  */
4971       if (rtx_equal_p (subtarget, op0))
4972         subtarget = 0;
4973
4974       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
4975                           OPTAB_WIDEN);
4976       if (tem)
4977         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
4978                             OPTAB_WIDEN);
4979     }
4980
4981   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
4982      number of bits in the mode of OP0, minus one.  */
4983
4984   if (code == GT)
4985     {
4986       if (rtx_equal_p (subtarget, op0))
4987         subtarget = 0;
4988
4989       tem = expand_shift (RSHIFT_EXPR, mode, op0,
4990                           size_int (GET_MODE_BITSIZE (mode) - 1),
4991                           subtarget, 0);
4992       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
4993                           OPTAB_WIDEN);
4994     }
4995
4996   if (code == EQ || code == NE)
4997     {
4998       /* For EQ or NE, one way to do the comparison is to apply an operation
4999          that converts the operand into a positive number if it is nonzero
5000          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5001          for NE we negate.  This puts the result in the sign bit.  Then we
5002          normalize with a shift, if needed.
5003
5004          Two operations that can do the above actions are ABS and FFS, so try
5005          them.  If that doesn't work, and MODE is smaller than a full word,
5006          we can use zero-extension to the wider mode (an unsigned conversion)
5007          as the operation.  */
5008
5009       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5010          that is compensated by the subsequent overflow when subtracting
5011          one / negating.  */
5012
5013       if (abs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
5014         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5015       else if (ffs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
5016         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5017       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5018         {
5019           tem = convert_modes (word_mode, mode, op0, 1);
5020           mode = word_mode;
5021         }
5022
5023       if (tem != 0)
5024         {
5025           if (code == EQ)
5026             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5027                                 0, OPTAB_WIDEN);
5028           else
5029             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5030         }
5031
5032       /* If we couldn't do it that way, for NE we can "or" the two's complement
5033          of the value with itself.  For EQ, we take the one's complement of
5034          that "or", which is an extra insn, so we only handle EQ if branches
5035          are expensive.  */
5036
5037       if (tem == 0 && (code == NE || BRANCH_COST > 1))
5038         {
5039           if (rtx_equal_p (subtarget, op0))
5040             subtarget = 0;
5041
5042           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5043           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5044                               OPTAB_WIDEN);
5045
5046           if (tem && code == EQ)
5047             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5048         }
5049     }
5050
5051   if (tem && normalizep)
5052     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5053                         size_int (GET_MODE_BITSIZE (mode) - 1),
5054                         subtarget, normalizep == 1);
5055
5056   if (tem)
5057     {
5058       if (GET_MODE (tem) != target_mode)
5059         {
5060           convert_move (target, tem, 0);
5061           tem = target;
5062         }
5063       else if (!subtarget)
5064         {
5065           emit_move_insn (target, tem);
5066           tem = target;
5067         }
5068     }
5069   else
5070     delete_insns_since (last);
5071
5072   return tem;
5073 }
5074
5075 /* Like emit_store_flag, but always succeeds.  */
5076
5077 rtx
5078 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5079                        enum machine_mode mode, int unsignedp, int normalizep)
5080 {
5081   rtx tem, label;
5082
5083   /* First see if emit_store_flag can do the job.  */
5084   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5085   if (tem != 0)
5086     return tem;
5087
5088   if (normalizep == 0)
5089     normalizep = 1;
5090
5091   /* If this failed, we have to do this with set/compare/jump/set code.  */
5092
5093   if (!REG_P (target)
5094       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5095     target = gen_reg_rtx (GET_MODE (target));
5096
5097   emit_move_insn (target, const1_rtx);
5098   label = gen_label_rtx ();
5099   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5100                            NULL_RTX, label);
5101
5102   emit_move_insn (target, const0_rtx);
5103   emit_label (label);
5104
5105   return target;
5106 }
5107 \f
5108 /* Perform possibly multi-word comparison and conditional jump to LABEL
5109    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE
5110
5111    The algorithm is based on the code in expr.c:do_jump.
5112
5113    Note that this does not perform a general comparison.  Only variants
5114    generated within expmed.c are correctly handled, others abort (but could
5115    be handled if needed).  */
5116
5117 static void
5118 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5119                  rtx label)
5120 {
5121   /* If this mode is an integer too wide to compare properly,
5122      compare word by word.  Rely on cse to optimize constant cases.  */
5123
5124   if (GET_MODE_CLASS (mode) == MODE_INT
5125       && ! can_compare_p (op, mode, ccp_jump))
5126     {
5127       rtx label2 = gen_label_rtx ();
5128
5129       switch (op)
5130         {
5131         case LTU:
5132           do_jump_by_parts_greater_rtx (mode, 1, arg2, arg1, label2, label);
5133           break;
5134
5135         case LEU:
5136           do_jump_by_parts_greater_rtx (mode, 1, arg1, arg2, label, label2);
5137           break;
5138
5139         case LT:
5140           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label2, label);
5141           break;
5142
5143         case GT:
5144           do_jump_by_parts_greater_rtx (mode, 0, arg1, arg2, label2, label);
5145           break;
5146
5147         case GE:
5148           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label, label2);
5149           break;
5150
5151           /* do_jump_by_parts_equality_rtx compares with zero.  Luckily
5152              that's the only equality operations we do */
5153         case EQ:
5154           if (arg2 != const0_rtx || mode != GET_MODE(arg1))
5155             abort ();
5156           do_jump_by_parts_equality_rtx (arg1, label2, label);
5157           break;
5158
5159         case NE:
5160           if (arg2 != const0_rtx || mode != GET_MODE(arg1))
5161             abort ();
5162           do_jump_by_parts_equality_rtx (arg1, label, label2);
5163           break;
5164
5165         default:
5166           abort ();
5167         }
5168
5169       emit_label (label2);
5170     }
5171   else
5172     emit_cmp_and_jump_insns (arg1, arg2, op, NULL_RTX, mode, 0, label);
5173 }