gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007
   5    Free Software Foundation, Inc.
   6
   7 This file is part of GCC.
   8
   9 GCC is free software; you can redistribute it and/or modify it under
  10 the terms of the GNU General Public License as published by the Free
  11 Software Foundation; either version 3, or (at your option) any later
  12 version.
  13
  14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  16 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  17 for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with GCC; see the file COPYING3.  If not see
  21 <http://www.gnu.org/licenses/>.  */
  22
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "coretypes.h"
  27 #include "tm.h"
  28 #include "toplev.h"
  29 #include "rtl.h"
  30 #include "tree.h"
  31 #include "tm_p.h"
  32 #include "flags.h"
  33 #include "insn-config.h"
  34 #include "expr.h"
  35 #include "optabs.h"
  36 #include "real.h"
  37 #include "recog.h"
  38 #include "langhooks.h"
  39 #include "df.h"
  40 #include "target.h"
  41
  42 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  43                                    unsigned HOST_WIDE_INT,
  44                                    unsigned HOST_WIDE_INT, rtx);
  45 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  46                                    unsigned HOST_WIDE_INT, rtx);
  47 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  48                                     unsigned HOST_WIDE_INT,
  49                                     unsigned HOST_WIDE_INT,
  50                                     unsigned HOST_WIDE_INT, rtx, int);
  51 static rtx mask_rtx (enum machine_mode, int, int, int);
  52 static rtx lshift_value (enum machine_mode, rtx, int, int);
  53 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  54                                     unsigned HOST_WIDE_INT, int);
  55 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  56 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  57 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  58
  59 /* Test whether a value is zero of a power of two.  */
  60 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
  61
  62 /* Nonzero means divides or modulus operations are relatively cheap for
  63    powers of two, so don't use branches; emit the operation instead.
  64    Usually, this will mean that the MD file will emit non-branch
  65    sequences.  */
  66
  67 static bool sdiv_pow2_cheap[2][NUM_MACHINE_MODES];
  68 static bool smod_pow2_cheap[2][NUM_MACHINE_MODES];
  69
  70 #ifndef SLOW_UNALIGNED_ACCESS
  71 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  72 #endif
  73
  74 /* For compilers that support multiple targets with different word sizes,
  75    MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
  76    is the H8/300(H) compiler.  */
  77
  78 #ifndef MAX_BITS_PER_WORD
  79 #define MAX_BITS_PER_WORD BITS_PER_WORD
  80 #endif
  81
  82 /* Reduce conditional compilation elsewhere.  */
  83 #ifndef HAVE_insv
  84 #define HAVE_insv       0
  85 #define CODE_FOR_insv   CODE_FOR_nothing
  86 #define gen_insv(a,b,c,d) NULL_RTX
  87 #endif
  88 #ifndef HAVE_extv
  89 #define HAVE_extv       0
  90 #define CODE_FOR_extv   CODE_FOR_nothing
  91 #define gen_extv(a,b,c,d) NULL_RTX
  92 #endif
  93 #ifndef HAVE_extzv
  94 #define HAVE_extzv      0
  95 #define CODE_FOR_extzv  CODE_FOR_nothing
  96 #define gen_extzv(a,b,c,d) NULL_RTX
  97 #endif
  98
  99 /* Cost of various pieces of RTL.  Note that some of these are indexed by
 100    shift count and some by mode.  */
 101 static int zero_cost[2];
 102 static int add_cost[2][NUM_MACHINE_MODES];
 103 static int neg_cost[2][NUM_MACHINE_MODES];
 104 static int shift_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 105 static int shiftadd_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 106 static int shiftsub_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 107 static int mul_cost[2][NUM_MACHINE_MODES];
 108 static int sdiv_cost[2][NUM_MACHINE_MODES];
 109 static int udiv_cost[2][NUM_MACHINE_MODES];
 110 static int mul_widen_cost[2][NUM_MACHINE_MODES];
 111 static int mul_highpart_cost[2][NUM_MACHINE_MODES];
 112
 113 void
 114 init_expmed (void)
 115 {
 116   struct
 117   {
 118     struct rtx_def reg;         rtunion reg_fld[2];
 119     struct rtx_def plus;        rtunion plus_fld1;
 120     struct rtx_def neg;
 121     struct rtx_def mult;        rtunion mult_fld1;
 122     struct rtx_def sdiv;        rtunion sdiv_fld1;
 123     struct rtx_def udiv;        rtunion udiv_fld1;
 124     struct rtx_def zext;
 125     struct rtx_def sdiv_32;     rtunion sdiv_32_fld1;
 126     struct rtx_def smod_32;     rtunion smod_32_fld1;
 127     struct rtx_def wide_mult;   rtunion wide_mult_fld1;
 128     struct rtx_def wide_lshr;   rtunion wide_lshr_fld1;
 129     struct rtx_def wide_trunc;
 130     struct rtx_def shift;       rtunion shift_fld1;
 131     struct rtx_def shift_mult;  rtunion shift_mult_fld1;
 132     struct rtx_def shift_add;   rtunion shift_add_fld1;
 133     struct rtx_def shift_sub;   rtunion shift_sub_fld1;
 134   } all;
 135
 136   rtx pow2[MAX_BITS_PER_WORD];
 137   rtx cint[MAX_BITS_PER_WORD];
 138   int m, n;
 139   enum machine_mode mode, wider_mode;
 140   int speed;
 141
 142
 143   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 144     {
 145       pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 146       cint[m] = GEN_INT (m);
 147     }
 148   memset (&all, 0, sizeof all);
 149
 150   PUT_CODE (&all.reg, REG);
 151   /* Avoid using hard regs in ways which may be unsupported.  */
 152   SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
 153
 154   PUT_CODE (&all.plus, PLUS);
 155   XEXP (&all.plus, 0) = &all.reg;
 156   XEXP (&all.plus, 1) = &all.reg;
 157
 158   PUT_CODE (&all.neg, NEG);
 159   XEXP (&all.neg, 0) = &all.reg;
 160
 161   PUT_CODE (&all.mult, MULT);
 162   XEXP (&all.mult, 0) = &all.reg;
 163   XEXP (&all.mult, 1) = &all.reg;
 164
 165   PUT_CODE (&all.sdiv, DIV);
 166   XEXP (&all.sdiv, 0) = &all.reg;
 167   XEXP (&all.sdiv, 1) = &all.reg;
 168
 169   PUT_CODE (&all.udiv, UDIV);
 170   XEXP (&all.udiv, 0) = &all.reg;
 171   XEXP (&all.udiv, 1) = &all.reg;
 172
 173   PUT_CODE (&all.sdiv_32, DIV);
 174   XEXP (&all.sdiv_32, 0) = &all.reg;
 175   XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
 176
 177   PUT_CODE (&all.smod_32, MOD);
 178   XEXP (&all.smod_32, 0) = &all.reg;
 179   XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
 180
 181   PUT_CODE (&all.zext, ZERO_EXTEND);
 182   XEXP (&all.zext, 0) = &all.reg;
 183
 184   PUT_CODE (&all.wide_mult, MULT);
 185   XEXP (&all.wide_mult, 0) = &all.zext;
 186   XEXP (&all.wide_mult, 1) = &all.zext;
 187
 188   PUT_CODE (&all.wide_lshr, LSHIFTRT);
 189   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
 190
 191   PUT_CODE (&all.wide_trunc, TRUNCATE);
 192   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
 193
 194   PUT_CODE (&all.shift, ASHIFT);
 195   XEXP (&all.shift, 0) = &all.reg;
 196
 197   PUT_CODE (&all.shift_mult, MULT);
 198   XEXP (&all.shift_mult, 0) = &all.reg;
 199
 200   PUT_CODE (&all.shift_add, PLUS);
 201   XEXP (&all.shift_add, 0) = &all.shift_mult;
 202   XEXP (&all.shift_add, 1) = &all.reg;
 203
 204   PUT_CODE (&all.shift_sub, MINUS);
 205   XEXP (&all.shift_sub, 0) = &all.shift_mult;
 206   XEXP (&all.shift_sub, 1) = &all.reg;
 207
 208   for (speed = 0; speed < 2; speed++)
 209     {
 210       crtl->maybe_hot_insn_p = speed;
 211       zero_cost[speed] = rtx_cost (const0_rtx, 0, speed);
 212
 213       for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 214            mode != VOIDmode;
 215            mode = GET_MODE_WIDER_MODE (mode))
 216         {
 217           PUT_MODE (&all.reg, mode);
 218           PUT_MODE (&all.plus, mode);
 219           PUT_MODE (&all.neg, mode);
 220           PUT_MODE (&all.mult, mode);
 221           PUT_MODE (&all.sdiv, mode);
 222           PUT_MODE (&all.udiv, mode);
 223           PUT_MODE (&all.sdiv_32, mode);
 224           PUT_MODE (&all.smod_32, mode);
 225           PUT_MODE (&all.wide_trunc, mode);
 226           PUT_MODE (&all.shift, mode);
 227           PUT_MODE (&all.shift_mult, mode);
 228           PUT_MODE (&all.shift_add, mode);
 229           PUT_MODE (&all.shift_sub, mode);
 230
 231           add_cost[speed][mode] = rtx_cost (&all.plus, SET, speed);
 232           neg_cost[speed][mode] = rtx_cost (&all.neg, SET, speed);
 233           mul_cost[speed][mode] = rtx_cost (&all.mult, SET, speed);
 234           sdiv_cost[speed][mode] = rtx_cost (&all.sdiv, SET, speed);
 235           udiv_cost[speed][mode] = rtx_cost (&all.udiv, SET, speed);
 236
 237           sdiv_pow2_cheap[speed][mode] = (rtx_cost (&all.sdiv_32, SET, speed)
 238                                           <= 2 * add_cost[speed][mode]);
 239           smod_pow2_cheap[speed][mode] = (rtx_cost (&all.smod_32, SET, speed)
 240                                           <= 4 * add_cost[speed][mode]);
 241
 242           wider_mode = GET_MODE_WIDER_MODE (mode);
 243           if (wider_mode != VOIDmode)
 244             {
 245               PUT_MODE (&all.zext, wider_mode);
 246               PUT_MODE (&all.wide_mult, wider_mode);
 247               PUT_MODE (&all.wide_lshr, wider_mode);
 248               XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
 249
 250               mul_widen_cost[speed][wider_mode]
 251                 = rtx_cost (&all.wide_mult, SET, speed);
 252               mul_highpart_cost[speed][mode]
 253                 = rtx_cost (&all.wide_trunc, SET, speed);
 254             }
 255
 256           shift_cost[speed][mode][0] = 0;
 257           shiftadd_cost[speed][mode][0] = shiftsub_cost[speed][mode][0]
 258             = add_cost[speed][mode];
 259
 260           n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
 261           for (m = 1; m < n; m++)
 262             {
 263               XEXP (&all.shift, 1) = cint[m];
 264               XEXP (&all.shift_mult, 1) = pow2[m];
 265
 266               shift_cost[speed][mode][m] = rtx_cost (&all.shift, SET, speed);
 267               shiftadd_cost[speed][mode][m] = rtx_cost (&all.shift_add, SET, speed);
 268               shiftsub_cost[speed][mode][m] = rtx_cost (&all.shift_sub, SET, speed);
 269             }
 270         }
 271     }
 272   default_rtl_profile ();
 273 }
 274
 275 /* Return an rtx representing minus the value of X.
 276    MODE is the intended mode of the result,
 277    useful if X is a CONST_INT.  */
 278
 279 rtx
 280 negate_rtx (enum machine_mode mode, rtx x)
 281 {
 282   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 283
 284   if (result == 0)
 285     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 286
 287   return result;
 288 }
 289
 290 /* Report on the availability of insv/extv/extzv and the desired mode
 291    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
 292    is false; else the mode of the specified operand.  If OPNO is -1,
 293    all the caller cares about is whether the insn is available.  */
 294 enum machine_mode
 295 mode_for_extraction (enum extraction_pattern pattern, int opno)
 296 {
 297   const struct insn_data *data;
 298
 299   switch (pattern)
 300     {
 301     case EP_insv:
 302       if (HAVE_insv)
 303         {
 304           data = &insn_data[CODE_FOR_insv];
 305           break;
 306         }
 307       return MAX_MACHINE_MODE;
 308
 309     case EP_extv:
 310       if (HAVE_extv)
 311         {
 312           data = &insn_data[CODE_FOR_extv];
 313           break;
 314         }
 315       return MAX_MACHINE_MODE;
 316
 317     case EP_extzv:
 318       if (HAVE_extzv)
 319         {
 320           data = &insn_data[CODE_FOR_extzv];
 321           break;
 322         }
 323       return MAX_MACHINE_MODE;
 324
 325     default:
 326       gcc_unreachable ();
 327     }
 328
 329   if (opno == -1)
 330     return VOIDmode;
 331
 332   /* Everyone who uses this function used to follow it with
 333      if (result == VOIDmode) result = word_mode; */
 334   if (data->operand[opno].mode == VOIDmode)
 335     return word_mode;
 336   return data->operand[opno].mode;
 337 }
 338
 339 /* Return true if X, of mode MODE, matches the predicate for operand
 340    OPNO of instruction ICODE.  Allow volatile memories, regardless of
 341    the ambient volatile_ok setting.  */
 342
 343 static bool
 344 check_predicate_volatile_ok (enum insn_code icode, int opno,
 345                              rtx x, enum machine_mode mode)
 346 {
 347   bool save_volatile_ok, result;
 348
 349   save_volatile_ok = volatile_ok;
 350   result = insn_data[(int) icode].operand[opno].predicate (x, mode);
 351   volatile_ok = save_volatile_ok;
 352   return result;
 353 }
 354 \f
 355 /* A subroutine of store_bit_field, with the same arguments.  Return true
 356    if the operation could be implemented.
 357
 358    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 359    no other way of implementing the operation.  If FALLBACK_P is false,
 360    return false instead.  */
 361
 362 static bool
 363 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 364                    unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
 365                    rtx value, bool fallback_p)
 366 {
 367   unsigned int unit
 368     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
 369   unsigned HOST_WIDE_INT offset, bitpos;
 370   rtx op0 = str_rtx;
 371   int byte_offset;
 372   rtx orig_value;
 373
 374   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
 375
 376   while (GET_CODE (op0) == SUBREG)
 377     {
 378       /* The following line once was done only if WORDS_BIG_ENDIAN,
 379          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 380          meaningful at a much higher level; when structures are copied
 381          between memory and regs, the higher-numbered regs
 382          always get higher addresses.  */
 383       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 384       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 385
 386       byte_offset = 0;
 387
 388       /* Paradoxical subregs need special handling on big endian machines.  */
 389       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 390         {
 391           int difference = inner_mode_size - outer_mode_size;
 392
 393           if (WORDS_BIG_ENDIAN)
 394             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 395           if (BYTES_BIG_ENDIAN)
 396             byte_offset += difference % UNITS_PER_WORD;
 397         }
 398       else
 399         byte_offset = SUBREG_BYTE (op0);
 400
 401       bitnum += byte_offset * BITS_PER_UNIT;
 402       op0 = SUBREG_REG (op0);
 403     }
 404
 405   /* No action is needed if the target is a register and if the field
 406      lies completely outside that register.  This can occur if the source
 407      code contains an out-of-bounds access to a small array.  */
 408   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 409     return true;
 410
 411   /* Use vec_set patterns for inserting parts of vectors whenever
 412      available.  */
 413   if (VECTOR_MODE_P (GET_MODE (op0))
 414       && !MEM_P (op0)
 415       && (optab_handler (vec_set_optab, GET_MODE (op0))->insn_code
 416           != CODE_FOR_nothing)
 417       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 418       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 419       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 420     {
 421       enum machine_mode outermode = GET_MODE (op0);
 422       enum machine_mode innermode = GET_MODE_INNER (outermode);
 423       int icode = (int) optab_handler (vec_set_optab, outermode)->insn_code;
 424       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 425       rtx rtxpos = GEN_INT (pos);
 426       rtx src = value;
 427       rtx dest = op0;
 428       rtx pat, seq;
 429       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
 430       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
 431       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
 432
 433       start_sequence ();
 434
 435       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
 436         src = copy_to_mode_reg (mode1, src);
 437
 438       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
 439         rtxpos = copy_to_mode_reg (mode1, rtxpos);
 440
 441       /* We could handle this, but we should always be called with a pseudo
 442          for our targets and all insns should take them as outputs.  */
 443       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
 444                   && (*insn_data[icode].operand[1].predicate) (src, mode1)
 445                   && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
 446       pat = GEN_FCN (icode) (dest, src, rtxpos);
 447       seq = get_insns ();
 448       end_sequence ();
 449       if (pat)
 450         {
 451           emit_insn (seq);
 452           emit_insn (pat);
 453           return true;
 454         }
 455     }
 456
 457   /* If the target is a register, overwriting the entire object, or storing
 458      a full-word or multi-word field can be done with just a SUBREG.
 459
 460      If the target is memory, storing any naturally aligned field can be
 461      done with a simple store.  For targets that support fast unaligned
 462      memory, any naturally sized, unit aligned field can be done directly.  */
 463
 464   offset = bitnum / unit;
 465   bitpos = bitnum % unit;
 466   byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 467                 + (offset * UNITS_PER_WORD);
 468
 469   if (bitpos == 0
 470       && bitsize == GET_MODE_BITSIZE (fieldmode)
 471       && (!MEM_P (op0)
 472           ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 473              || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
 474              && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
 475           : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 476              || (offset * BITS_PER_UNIT % bitsize == 0
 477                  && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
 478     {
 479       if (MEM_P (op0))
 480         op0 = adjust_address (op0, fieldmode, offset);
 481       else if (GET_MODE (op0) != fieldmode)
 482         op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 483                                    byte_offset);
 484       emit_move_insn (op0, value);
 485       return true;
 486     }
 487
 488   /* Make sure we are playing with integral modes.  Pun with subregs
 489      if we aren't.  This must come after the entire register case above,
 490      since that case is valid for any mode.  The following cases are only
 491      valid for integral modes.  */
 492   {
 493     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 494     if (imode != GET_MODE (op0))
 495       {
 496         if (MEM_P (op0))
 497           op0 = adjust_address (op0, imode, 0);
 498         else
 499           {
 500             gcc_assert (imode != BLKmode);
 501             op0 = gen_lowpart (imode, op0);
 502           }
 503       }
 504   }
 505
 506   /* We may be accessing data outside the field, which means
 507      we can alias adjacent data.  */
 508   if (MEM_P (op0))
 509     {
 510       op0 = shallow_copy_rtx (op0);
 511       set_mem_alias_set (op0, 0);
 512       set_mem_expr (op0, 0);
 513     }
 514
 515   /* If OP0 is a register, BITPOS must count within a word.
 516      But as we have it, it counts within whatever size OP0 now has.
 517      On a bigendian machine, these are not the same, so convert.  */
 518   if (BYTES_BIG_ENDIAN
 519       && !MEM_P (op0)
 520       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 521     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 522
 523   /* Storing an lsb-aligned field in a register
 524      can be done with a movestrict instruction.  */
 525
 526   if (!MEM_P (op0)
 527       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 528       && bitsize == GET_MODE_BITSIZE (fieldmode)
 529       && (optab_handler (movstrict_optab, fieldmode)->insn_code
 530           != CODE_FOR_nothing))
 531     {
 532       int icode = optab_handler (movstrict_optab, fieldmode)->insn_code;
 533       rtx insn;
 534       rtx start = get_last_insn ();
 535
 536       /* Get appropriate low part of the value being stored.  */
 537       if (GET_CODE (value) == CONST_INT || REG_P (value))
 538         value = gen_lowpart (fieldmode, value);
 539       else if (!(GET_CODE (value) == SYMBOL_REF
 540                  || GET_CODE (value) == LABEL_REF
 541                  || GET_CODE (value) == CONST))
 542         value = convert_to_mode (fieldmode, value, 0);
 543
 544       if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
 545         value = copy_to_mode_reg (fieldmode, value);
 546
 547       if (GET_CODE (op0) == SUBREG)
 548         {
 549           /* Else we've got some float mode source being extracted into
 550              a different float mode destination -- this combination of
 551              subregs results in Severe Tire Damage.  */
 552           gcc_assert (GET_MODE (SUBREG_REG (op0)) == fieldmode
 553                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 554                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 555           op0 = SUBREG_REG (op0);
 556         }
 557
 558       insn = (GEN_FCN (icode)
 559                  (gen_rtx_SUBREG (fieldmode, op0,
 560                                   (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 561                                   + (offset * UNITS_PER_WORD)),
 562                                   value));
 563       if (insn)
 564         {
 565           emit_insn (insn);
 566           return true;
 567         }
 568       delete_insns_since (start);
 569     }
 570
 571   /* Handle fields bigger than a word.  */
 572
 573   if (bitsize > BITS_PER_WORD)
 574     {
 575       /* Here we transfer the words of the field
 576          in the order least significant first.
 577          This is because the most significant word is the one which may
 578          be less than full.
 579          However, only do that if the value is not BLKmode.  */
 580
 581       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 582       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 583       unsigned int i;
 584       rtx last;
 585
 586       /* This is the mode we must force value to, so that there will be enough
 587          subwords to extract.  Note that fieldmode will often (always?) be
 588          VOIDmode, because that is what store_field uses to indicate that this
 589          is a bit field, but passing VOIDmode to operand_subword_force
 590          is not allowed.  */
 591       fieldmode = GET_MODE (value);
 592       if (fieldmode == VOIDmode)
 593         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 594
 595       last = get_last_insn ();
 596       for (i = 0; i < nwords; i++)
 597         {
 598           /* If I is 0, use the low-order word in both field and target;
 599              if I is 1, use the next to lowest word; and so on.  */
 600           unsigned int wordnum = (backwards ? nwords - i - 1 : i);
 601           unsigned int bit_offset = (backwards
 602                                      ? MAX ((int) bitsize - ((int) i + 1)
 603                                             * BITS_PER_WORD,
 604                                             0)
 605                                      : (int) i * BITS_PER_WORD);
 606           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 607
 608           if (!store_bit_field_1 (op0, MIN (BITS_PER_WORD,
 609                                             bitsize - i * BITS_PER_WORD),
 610                                   bitnum + bit_offset, word_mode,
 611                                   value_word, fallback_p))
 612             {
 613               delete_insns_since (last);
 614               return false;
 615             }
 616         }
 617       return true;
 618     }
 619
 620   /* From here on we can assume that the field to be stored in is
 621      a full-word (whatever type that is), since it is shorter than a word.  */
 622
 623   /* OFFSET is the number of words or bytes (UNIT says which)
 624      from STR_RTX to the first word or byte containing part of the field.  */
 625
 626   if (!MEM_P (op0))
 627     {
 628       if (offset != 0
 629           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 630         {
 631           if (!REG_P (op0))
 632             {
 633               /* Since this is a destination (lvalue), we can't copy
 634                  it to a pseudo.  We can remove a SUBREG that does not
 635                  change the size of the operand.  Such a SUBREG may
 636                  have been added above.  */
 637               gcc_assert (GET_CODE (op0) == SUBREG
 638                           && (GET_MODE_SIZE (GET_MODE (op0))
 639                               == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
 640               op0 = SUBREG_REG (op0);
 641             }
 642           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 643                                 op0, (offset * UNITS_PER_WORD));
 644         }
 645       offset = 0;
 646     }
 647
 648   /* If VALUE has a floating-point or complex mode, access it as an
 649      integer of the corresponding size.  This can occur on a machine
 650      with 64 bit registers that uses SFmode for float.  It can also
 651      occur for unaligned float or complex fields.  */
 652   orig_value = value;
 653   if (GET_MODE (value) != VOIDmode
 654       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 655       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 656     {
 657       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 658       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 659     }
 660
 661   /* Now OFFSET is nonzero only if OP0 is memory
 662      and is therefore always measured in bytes.  */
 663
 664   if (HAVE_insv
 665       && GET_MODE (value) != BLKmode
 666       && bitsize > 0
 667       && GET_MODE_BITSIZE (op_mode) >= bitsize
 668       && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
 669             && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode)))
 670       && insn_data[CODE_FOR_insv].operand[1].predicate (GEN_INT (bitsize),
 671                                                         VOIDmode)
 672       && check_predicate_volatile_ok (CODE_FOR_insv, 0, op0, VOIDmode))
 673     {
 674       int xbitpos = bitpos;
 675       rtx value1;
 676       rtx xop0 = op0;
 677       rtx last = get_last_insn ();
 678       rtx pat;
 679
 680       /* Add OFFSET into OP0's address.  */
 681       if (MEM_P (xop0))
 682         xop0 = adjust_address (xop0, byte_mode, offset);
 683
 684       /* If xop0 is a register, we need it in OP_MODE
 685          to make it acceptable to the format of insv.  */
 686       if (GET_CODE (xop0) == SUBREG)
 687         /* We can't just change the mode, because this might clobber op0,
 688            and we will need the original value of op0 if insv fails.  */
 689         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 690       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 691         xop0 = gen_rtx_SUBREG (op_mode, xop0, 0);
 692
 693       /* On big-endian machines, we count bits from the most significant.
 694          If the bit field insn does not, we must invert.  */
 695
 696       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 697         xbitpos = unit - bitsize - xbitpos;
 698
 699       /* We have been counting XBITPOS within UNIT.
 700          Count instead within the size of the register.  */
 701       if (BITS_BIG_ENDIAN && !MEM_P (xop0))
 702         xbitpos += GET_MODE_BITSIZE (op_mode) - unit;
 703
 704       unit = GET_MODE_BITSIZE (op_mode);
 705
 706       /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 707       value1 = value;
 708       if (GET_MODE (value) != op_mode)
 709         {
 710           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 711             {
 712               /* Optimization: Don't bother really extending VALUE
 713                  if it has all the bits we will actually use.  However,
 714                  if we must narrow it, be sure we do it correctly.  */
 715
 716               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 717                 {
 718                   rtx tmp;
 719
 720                   tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 721                   if (! tmp)
 722                     tmp = simplify_gen_subreg (op_mode,
 723                                                force_reg (GET_MODE (value),
 724                                                           value1),
 725                                                GET_MODE (value), 0);
 726                   value1 = tmp;
 727                 }
 728               else
 729                 value1 = gen_lowpart (op_mode, value1);
 730             }
 731           else if (GET_CODE (value) == CONST_INT)
 732             value1 = gen_int_mode (INTVAL (value), op_mode);
 733           else
 734             /* Parse phase is supposed to make VALUE's data type
 735                match that of the component reference, which is a type
 736                at least as wide as the field; so VALUE should have
 737                a mode that corresponds to that type.  */
 738             gcc_assert (CONSTANT_P (value));
 739         }
 740
 741       /* If this machine's insv insists on a register,
 742          get VALUE1 into a register.  */
 743       if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
 744              (value1, op_mode)))
 745         value1 = force_reg (op_mode, value1);
 746
 747       pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
 748       if (pat)
 749         {
 750           emit_insn (pat);
 751           return true;
 752         }
 753       delete_insns_since (last);
 754     }
 755
 756   /* If OP0 is a memory, try copying it to a register and seeing if a
 757      cheap register alternative is available.  */
 758   if (HAVE_insv && MEM_P (op0))
 759     {
 760       enum machine_mode bestmode;
 761
 762       /* Get the mode to use for inserting into this field.  If OP0 is
 763          BLKmode, get the smallest mode consistent with the alignment. If
 764          OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
 765          mode. Otherwise, use the smallest mode containing the field.  */
 766
 767       if (GET_MODE (op0) == BLKmode
 768           || (op_mode != MAX_MACHINE_MODE
 769               && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode)))
 770         bestmode = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0),
 771                                   (op_mode == MAX_MACHINE_MODE
 772                                    ? VOIDmode : op_mode),
 773                                   MEM_VOLATILE_P (op0));
 774       else
 775         bestmode = GET_MODE (op0);
 776
 777       if (bestmode != VOIDmode
 778           && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode)
 779           && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
 780                && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
 781         {
 782           rtx last, tempreg, xop0;
 783           unsigned HOST_WIDE_INT xoffset, xbitpos;
 784
 785           last = get_last_insn ();
 786
 787           /* Adjust address to point to the containing unit of
 788              that mode.  Compute the offset as a multiple of this unit,
 789              counting in bytes.  */
 790           unit = GET_MODE_BITSIZE (bestmode);
 791           xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 792           xbitpos = bitnum % unit;
 793           xop0 = adjust_address (op0, bestmode, xoffset);
 794
 795           /* Fetch that unit, store the bitfield in it, then store
 796              the unit.  */
 797           tempreg = copy_to_reg (xop0);
 798           if (store_bit_field_1 (tempreg, bitsize, xbitpos,
 799                                  fieldmode, orig_value, false))
 800             {
 801               emit_move_insn (xop0, tempreg);
 802               return true;
 803             }
 804           delete_insns_since (last);
 805         }
 806     }
 807
 808   if (!fallback_p)
 809     return false;
 810
 811   store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 812   return true;
 813 }
 814
 815 /* Generate code to store value from rtx VALUE
 816    into a bit-field within structure STR_RTX
 817    containing BITSIZE bits starting at bit BITNUM.
 818    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.  */
 819
 820 void
 821 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 822                  unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
 823                  rtx value)
 824 {
 825   if (!store_bit_field_1 (str_rtx, bitsize, bitnum, fieldmode, value, true))
 826     gcc_unreachable ();
 827 }
 828 \f
 829 /* Use shifts and boolean operations to store VALUE
 830    into a bit field of width BITSIZE
 831    in a memory location specified by OP0 except offset by OFFSET bytes.
 832      (OFFSET must be 0 if OP0 is a register.)
 833    The field starts at position BITPOS within the byte.
 834     (If OP0 is a register, it may be a full word or a narrower mode,
 835      but BITPOS still counts within a full word,
 836      which is significant on bigendian machines.)  */
 837
 838 static void
 839 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
 840                        unsigned HOST_WIDE_INT bitsize,
 841                        unsigned HOST_WIDE_INT bitpos, rtx value)
 842 {
 843   enum machine_mode mode;
 844   unsigned int total_bits = BITS_PER_WORD;
 845   rtx temp;
 846   int all_zero = 0;
 847   int all_one = 0;
 848
 849   /* There is a case not handled here:
 850      a structure with a known alignment of just a halfword
 851      and a field split across two aligned halfwords within the structure.
 852      Or likewise a structure with a known alignment of just a byte
 853      and a field split across two bytes.
 854      Such cases are not supposed to be able to occur.  */
 855
 856   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 857     {
 858       gcc_assert (!offset);
 859       /* Special treatment for a bit field split across two registers.  */
 860       if (bitsize + bitpos > BITS_PER_WORD)
 861         {
 862           store_split_bit_field (op0, bitsize, bitpos, value);
 863           return;
 864         }
 865     }
 866   else
 867     {
 868       /* Get the proper mode to use for this field.  We want a mode that
 869          includes the entire field.  If such a mode would be larger than
 870          a word, we won't be doing the extraction the normal way.
 871          We don't want a mode bigger than the destination.  */
 872
 873       mode = GET_MODE (op0);
 874       if (GET_MODE_BITSIZE (mode) == 0
 875           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 876         mode = word_mode;
 877       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 878                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 879
 880       if (mode == VOIDmode)
 881         {
 882           /* The only way this should occur is if the field spans word
 883              boundaries.  */
 884           store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
 885                                  value);
 886           return;
 887         }
 888
 889       total_bits = GET_MODE_BITSIZE (mode);
 890
 891       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 892          be in the range 0 to total_bits-1, and put any excess bytes in
 893          OFFSET.  */
 894       if (bitpos >= total_bits)
 895         {
 896           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 897           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 898                      * BITS_PER_UNIT);
 899         }
 900
 901       /* Get ref to an aligned byte, halfword, or word containing the field.
 902          Adjust BITPOS to be position within a word,
 903          and OFFSET to be the offset of that word.
 904          Then alter OP0 to refer to that word.  */
 905       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 906       offset -= (offset % (total_bits / BITS_PER_UNIT));
 907       op0 = adjust_address (op0, mode, offset);
 908     }
 909
 910   mode = GET_MODE (op0);
 911
 912   /* Now MODE is either some integral mode for a MEM as OP0,
 913      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 914      The bit field is contained entirely within OP0.
 915      BITPOS is the starting bit number within OP0.
 916      (OP0's mode may actually be narrower than MODE.)  */
 917
 918   if (BYTES_BIG_ENDIAN)
 919       /* BITPOS is the distance between our msb
 920          and that of the containing datum.
 921          Convert it to the distance from the lsb.  */
 922       bitpos = total_bits - bitsize - bitpos;
 923
 924   /* Now BITPOS is always the distance between our lsb
 925      and that of OP0.  */
 926
 927   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 928      we must first convert its mode to MODE.  */
 929
 930   if (GET_CODE (value) == CONST_INT)
 931     {
 932       HOST_WIDE_INT v = INTVAL (value);
 933
 934       if (bitsize < HOST_BITS_PER_WIDE_INT)
 935         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 936
 937       if (v == 0)
 938         all_zero = 1;
 939       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 940                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 941                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 942         all_one = 1;
 943
 944       value = lshift_value (mode, value, bitpos, bitsize);
 945     }
 946   else
 947     {
 948       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 949                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 950
 951       if (GET_MODE (value) != mode)
 952         value = convert_to_mode (mode, value, 1);
 953
 954       if (must_and)
 955         value = expand_binop (mode, and_optab, value,
 956                               mask_rtx (mode, 0, bitsize, 0),
 957                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
 958       if (bitpos > 0)
 959         value = expand_shift (LSHIFT_EXPR, mode, value,
 960                               build_int_cst (NULL_TREE, bitpos), NULL_RTX, 1);
 961     }
 962
 963   /* Now clear the chosen bits in OP0,
 964      except that if VALUE is -1 we need not bother.  */
 965   /* We keep the intermediates in registers to allow CSE to combine
 966      consecutive bitfield assignments.  */
 967
 968   temp = force_reg (mode, op0);
 969
 970   if (! all_one)
 971     {
 972       temp = expand_binop (mode, and_optab, temp,
 973                            mask_rtx (mode, bitpos, bitsize, 1),
 974                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
 975       temp = force_reg (mode, temp);
 976     }
 977
 978   /* Now logical-or VALUE into OP0, unless it is zero.  */
 979
 980   if (! all_zero)
 981     {
 982       temp = expand_binop (mode, ior_optab, temp, value,
 983                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
 984       temp = force_reg (mode, temp);
 985     }
 986
 987   if (op0 != temp)
 988     {
 989       op0 = copy_rtx (op0);
 990       emit_move_insn (op0, temp);
 991     }
 992 }
 993 \f
 994 /* Store a bit field that is split across multiple accessible memory objects.
 995
 996    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
 997    BITSIZE is the field width; BITPOS the position of its first bit
 998    (within the word).
 999    VALUE is the value to store.
1000
1001    This does not yet handle fields wider than BITS_PER_WORD.  */
1002
1003 static void
1004 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1005                        unsigned HOST_WIDE_INT bitpos, rtx value)
1006 {
1007   unsigned int unit;
1008   unsigned int bitsdone = 0;
1009
1010   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1011      much at a time.  */
1012   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1013     unit = BITS_PER_WORD;
1014   else
1015     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1016
1017   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1018      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1019      that VALUE might be a floating-point constant.  */
1020   if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
1021     {
1022       rtx word = gen_lowpart_common (word_mode, value);
1023
1024       if (word && (value != word))
1025         value = word;
1026       else
1027         value = gen_lowpart_common (word_mode,
1028                                     force_reg (GET_MODE (value) != VOIDmode
1029                                                ? GET_MODE (value)
1030                                                : word_mode, value));
1031     }
1032
1033   while (bitsdone < bitsize)
1034     {
1035       unsigned HOST_WIDE_INT thissize;
1036       rtx part, word;
1037       unsigned HOST_WIDE_INT thispos;
1038       unsigned HOST_WIDE_INT offset;
1039
1040       offset = (bitpos + bitsdone) / unit;
1041       thispos = (bitpos + bitsdone) % unit;
1042
1043       /* THISSIZE must not overrun a word boundary.  Otherwise,
1044          store_fixed_bit_field will call us again, and we will mutually
1045          recurse forever.  */
1046       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1047       thissize = MIN (thissize, unit - thispos);
1048
1049       if (BYTES_BIG_ENDIAN)
1050         {
1051           int total_bits;
1052
1053           /* We must do an endian conversion exactly the same way as it is
1054              done in extract_bit_field, so that the two calls to
1055              extract_fixed_bit_field will have comparable arguments.  */
1056           if (!MEM_P (value) || GET_MODE (value) == BLKmode)
1057             total_bits = BITS_PER_WORD;
1058           else
1059             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1060
1061           /* Fetch successively less significant portions.  */
1062           if (GET_CODE (value) == CONST_INT)
1063             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1064                              >> (bitsize - bitsdone - thissize))
1065                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1066           else
1067             /* The args are chosen so that the last part includes the
1068                lsb.  Give extract_bit_field the value it needs (with
1069                endianness compensation) to fetch the piece we want.  */
1070             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1071                                             total_bits - bitsize + bitsdone,
1072                                             NULL_RTX, 1);
1073         }
1074       else
1075         {
1076           /* Fetch successively more significant portions.  */
1077           if (GET_CODE (value) == CONST_INT)
1078             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1079                              >> bitsdone)
1080                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1081           else
1082             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1083                                             bitsdone, NULL_RTX, 1);
1084         }
1085
1086       /* If OP0 is a register, then handle OFFSET here.
1087
1088          When handling multiword bitfields, extract_bit_field may pass
1089          down a word_mode SUBREG of a larger REG for a bitfield that actually
1090          crosses a word boundary.  Thus, for a SUBREG, we must find
1091          the current word starting from the base register.  */
1092       if (GET_CODE (op0) == SUBREG)
1093         {
1094           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1095           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1096                                         GET_MODE (SUBREG_REG (op0)));
1097           offset = 0;
1098         }
1099       else if (REG_P (op0))
1100         {
1101           word = operand_subword_force (op0, offset, GET_MODE (op0));
1102           offset = 0;
1103         }
1104       else
1105         word = op0;
1106
1107       /* OFFSET is in UNITs, and UNIT is in bits.
1108          store_fixed_bit_field wants offset in bytes.  */
1109       store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1110                              thispos, part);
1111       bitsdone += thissize;
1112     }
1113 }
1114 \f
1115 /* A subroutine of extract_bit_field_1 that converts return value X
1116    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1117    to extract_bit_field.  */
1118
1119 static rtx
1120 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1121                              enum machine_mode tmode, bool unsignedp)
1122 {
1123   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1124     return x;
1125
1126   /* If the x mode is not a scalar integral, first convert to the
1127      integer mode of that size and then access it as a floating-point
1128      value via a SUBREG.  */
1129   if (!SCALAR_INT_MODE_P (tmode))
1130     {
1131       enum machine_mode smode;
1132
1133       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1134       x = convert_to_mode (smode, x, unsignedp);
1135       x = force_reg (smode, x);
1136       return gen_lowpart (tmode, x);
1137     }
1138
1139   return convert_to_mode (tmode, x, unsignedp);
1140 }
1141
1142 /* A subroutine of extract_bit_field, with the same arguments.
1143    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1144    if we can find no other means of implementing the operation.
1145    if FALLBACK_P is false, return NULL instead.  */
1146
1147 static rtx
1148 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1149                      unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1150                      enum machine_mode mode, enum machine_mode tmode,
1151                      bool fallback_p)
1152 {
1153   unsigned int unit
1154     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1155   unsigned HOST_WIDE_INT offset, bitpos;
1156   rtx op0 = str_rtx;
1157   enum machine_mode int_mode;
1158   enum machine_mode ext_mode;
1159   enum machine_mode mode1;
1160   enum insn_code icode;
1161   int byte_offset;
1162
1163   if (tmode == VOIDmode)
1164     tmode = mode;
1165
1166   while (GET_CODE (op0) == SUBREG)
1167     {
1168       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1169       op0 = SUBREG_REG (op0);
1170     }
1171
1172   /* If we have an out-of-bounds access to a register, just return an
1173      uninitialized register of the required mode.  This can occur if the
1174      source code contains an out-of-bounds access to a small array.  */
1175   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1176     return gen_reg_rtx (tmode);
1177
1178   if (REG_P (op0)
1179       && mode == GET_MODE (op0)
1180       && bitnum == 0
1181       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1182     {
1183       /* We're trying to extract a full register from itself.  */
1184       return op0;
1185     }
1186
1187   /* See if we can get a better vector mode before extracting.  */
1188   if (VECTOR_MODE_P (GET_MODE (op0))
1189       && !MEM_P (op0)
1190       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1191     {
1192       enum machine_mode new_mode;
1193       int nunits = GET_MODE_NUNITS (GET_MODE (op0));
1194
1195       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1196         new_mode = MIN_MODE_VECTOR_FLOAT;
1197       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1198         new_mode = MIN_MODE_VECTOR_FRACT;
1199       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1200         new_mode = MIN_MODE_VECTOR_UFRACT;
1201       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1202         new_mode = MIN_MODE_VECTOR_ACCUM;
1203       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1204         new_mode = MIN_MODE_VECTOR_UACCUM;
1205       else
1206         new_mode = MIN_MODE_VECTOR_INT;
1207
1208       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1209         if (GET_MODE_NUNITS (new_mode) == nunits
1210             && GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1211             && targetm.vector_mode_supported_p (new_mode))
1212           break;
1213       if (new_mode != VOIDmode)
1214         op0 = gen_lowpart (new_mode, op0);
1215     }
1216
1217   /* Use vec_extract patterns for extracting parts of vectors whenever
1218      available.  */
1219   if (VECTOR_MODE_P (GET_MODE (op0))
1220       && !MEM_P (op0)
1221       && (optab_handler (vec_extract_optab, GET_MODE (op0))->insn_code
1222           != CODE_FOR_nothing)
1223       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1224           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1225     {
1226       enum machine_mode outermode = GET_MODE (op0);
1227       enum machine_mode innermode = GET_MODE_INNER (outermode);
1228       int icode = (int) optab_handler (vec_extract_optab, outermode)->insn_code;
1229       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1230       rtx rtxpos = GEN_INT (pos);
1231       rtx src = op0;
1232       rtx dest = NULL, pat, seq;
1233       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
1234       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
1235       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
1236
1237       if (innermode == tmode || innermode == mode)
1238         dest = target;
1239
1240       if (!dest)
1241         dest = gen_reg_rtx (innermode);
1242
1243       start_sequence ();
1244
1245       if (! (*insn_data[icode].operand[0].predicate) (dest, mode0))
1246         dest = copy_to_mode_reg (mode0, dest);
1247
1248       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
1249         src = copy_to_mode_reg (mode1, src);
1250
1251       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1252         rtxpos = copy_to_mode_reg (mode1, rtxpos);
1253
1254       /* We could handle this, but we should always be called with a pseudo
1255          for our targets and all insns should take them as outputs.  */
1256       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
1257                   && (*insn_data[icode].operand[1].predicate) (src, mode1)
1258                   && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
1259
1260       pat = GEN_FCN (icode) (dest, src, rtxpos);
1261       seq = get_insns ();
1262       end_sequence ();
1263       if (pat)
1264         {
1265           emit_insn (seq);
1266           emit_insn (pat);
1267           if (mode0 != mode)
1268             return gen_lowpart (tmode, dest);
1269           return dest;
1270         }
1271     }
1272
1273   /* Make sure we are playing with integral modes.  Pun with subregs
1274      if we aren't.  */
1275   {
1276     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1277     if (imode != GET_MODE (op0))
1278       {
1279         if (MEM_P (op0))
1280           op0 = adjust_address (op0, imode, 0);
1281         else
1282           {
1283             gcc_assert (imode != BLKmode);
1284             op0 = gen_lowpart (imode, op0);
1285
1286             /* If we got a SUBREG, force it into a register since we
1287                aren't going to be able to do another SUBREG on it.  */
1288             if (GET_CODE (op0) == SUBREG)
1289               op0 = force_reg (imode, op0);
1290           }
1291       }
1292   }
1293
1294   /* We may be accessing data outside the field, which means
1295      we can alias adjacent data.  */
1296   if (MEM_P (op0))
1297     {
1298       op0 = shallow_copy_rtx (op0);
1299       set_mem_alias_set (op0, 0);
1300       set_mem_expr (op0, 0);
1301     }
1302
1303   /* Extraction of a full-word or multi-word value from a structure
1304      in a register or aligned memory can be done with just a SUBREG.
1305      A subword value in the least significant part of a register
1306      can also be extracted with a SUBREG.  For this, we need the
1307      byte offset of the value in op0.  */
1308
1309   bitpos = bitnum % unit;
1310   offset = bitnum / unit;
1311   byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1312
1313   /* If OP0 is a register, BITPOS must count within a word.
1314      But as we have it, it counts within whatever size OP0 now has.
1315      On a bigendian machine, these are not the same, so convert.  */
1316   if (BYTES_BIG_ENDIAN
1317       && !MEM_P (op0)
1318       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1319     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1320
1321   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1322      If that's wrong, the solution is to test for it and set TARGET to 0
1323      if needed.  */
1324
1325   /* Only scalar integer modes can be converted via subregs.  There is an
1326      additional problem for FP modes here in that they can have a precision
1327      which is different from the size.  mode_for_size uses precision, but
1328      we want a mode based on the size, so we must avoid calling it for FP
1329      modes.  */
1330   mode1  = (SCALAR_INT_MODE_P (tmode)
1331             ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1332             : mode);
1333
1334   if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1335         && bitpos % BITS_PER_WORD == 0)
1336        || (mode1 != BLKmode
1337            /* ??? The big endian test here is wrong.  This is correct
1338               if the value is in a register, and if mode_for_size is not
1339               the same mode as op0.  This causes us to get unnecessarily
1340               inefficient code from the Thumb port when -mbig-endian.  */
1341            && (BYTES_BIG_ENDIAN
1342                ? bitpos + bitsize == BITS_PER_WORD
1343                : bitpos == 0)))
1344       && ((!MEM_P (op0)
1345            && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode1),
1346                                      GET_MODE_BITSIZE (GET_MODE (op0)))
1347            && GET_MODE_SIZE (mode1) != 0
1348            && byte_offset % GET_MODE_SIZE (mode1) == 0)
1349           || (MEM_P (op0)
1350               && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1351                   || (offset * BITS_PER_UNIT % bitsize == 0
1352                       && MEM_ALIGN (op0) % bitsize == 0)))))
1353     {
1354       if (MEM_P (op0))
1355         op0 = adjust_address (op0, mode1, offset);
1356       else if (mode1 != GET_MODE (op0))
1357         {
1358           rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1359                                          byte_offset);
1360           if (sub == NULL)
1361             goto no_subreg_mode_swap;
1362           op0 = sub;
1363         }
1364       if (mode1 != mode)
1365         return convert_to_mode (tmode, op0, unsignedp);
1366       return op0;
1367     }
1368  no_subreg_mode_swap:
1369
1370   /* Handle fields bigger than a word.  */
1371
1372   if (bitsize > BITS_PER_WORD)
1373     {
1374       /* Here we transfer the words of the field
1375          in the order least significant first.
1376          This is because the most significant word is the one which may
1377          be less than full.  */
1378
1379       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1380       unsigned int i;
1381
1382       if (target == 0 || !REG_P (target))
1383         target = gen_reg_rtx (mode);
1384
1385       /* Indicate for flow that the entire target reg is being set.  */
1386       emit_clobber (target);
1387
1388       for (i = 0; i < nwords; i++)
1389         {
1390           /* If I is 0, use the low-order word in both field and target;
1391              if I is 1, use the next to lowest word; and so on.  */
1392           /* Word number in TARGET to use.  */
1393           unsigned int wordnum
1394             = (WORDS_BIG_ENDIAN
1395                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1396                : i);
1397           /* Offset from start of field in OP0.  */
1398           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1399                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1400                                                 * (int) BITS_PER_WORD))
1401                                      : (int) i * BITS_PER_WORD);
1402           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1403           rtx result_part
1404             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1405                                            bitsize - i * BITS_PER_WORD),
1406                                  bitnum + bit_offset, 1, target_part, mode,
1407                                  word_mode);
1408
1409           gcc_assert (target_part);
1410
1411           if (result_part != target_part)
1412             emit_move_insn (target_part, result_part);
1413         }
1414
1415       if (unsignedp)
1416         {
1417           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1418              need to be zero'd out.  */
1419           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1420             {
1421               unsigned int i, total_words;
1422
1423               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1424               for (i = nwords; i < total_words; i++)
1425                 emit_move_insn
1426                   (operand_subword (target,
1427                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1428                                     1, VOIDmode),
1429                    const0_rtx);
1430             }
1431           return target;
1432         }
1433
1434       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1435       target = expand_shift (LSHIFT_EXPR, mode, target,
1436                              build_int_cst (NULL_TREE,
1437                                             GET_MODE_BITSIZE (mode) - bitsize),
1438                              NULL_RTX, 0);
1439       return expand_shift (RSHIFT_EXPR, mode, target,
1440                            build_int_cst (NULL_TREE,
1441                                           GET_MODE_BITSIZE (mode) - bitsize),
1442                            NULL_RTX, 0);
1443     }
1444
1445   /* From here on we know the desired field is smaller than a word.  */
1446
1447   /* Check if there is a correspondingly-sized integer field, so we can
1448      safely extract it as one size of integer, if necessary; then
1449      truncate or extend to the size that is wanted; then use SUBREGs or
1450      convert_to_mode to get one of the modes we really wanted.  */
1451
1452   int_mode = int_mode_for_mode (tmode);
1453   if (int_mode == BLKmode)
1454     int_mode = int_mode_for_mode (mode);
1455   /* Should probably push op0 out to memory and then do a load.  */
1456   gcc_assert (int_mode != BLKmode);
1457
1458   /* OFFSET is the number of words or bytes (UNIT says which)
1459      from STR_RTX to the first word or byte containing part of the field.  */
1460   if (!MEM_P (op0))
1461     {
1462       if (offset != 0
1463           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1464         {
1465           if (!REG_P (op0))
1466             op0 = copy_to_reg (op0);
1467           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1468                                 op0, (offset * UNITS_PER_WORD));
1469         }
1470       offset = 0;
1471     }
1472
1473   /* Now OFFSET is nonzero only for memory operands.  */
1474   ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0);
1475   icode = unsignedp ? CODE_FOR_extzv : CODE_FOR_extv;
1476   if (ext_mode != MAX_MACHINE_MODE
1477       && bitsize > 0
1478       && GET_MODE_BITSIZE (ext_mode) >= bitsize
1479       /* If op0 is a register, we need it in EXT_MODE to make it
1480          acceptable to the format of ext(z)v.  */
1481       && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1482       && !((REG_P (op0) || GET_CODE (op0) == SUBREG)
1483            && (bitsize + bitpos > GET_MODE_BITSIZE (ext_mode)))
1484       && check_predicate_volatile_ok (icode, 1, op0, GET_MODE (op0)))
1485     {
1486       unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1487       rtx bitsize_rtx, bitpos_rtx;
1488       rtx last = get_last_insn ();
1489       rtx xop0 = op0;
1490       rtx xtarget = target;
1491       rtx xspec_target = target;
1492       rtx xspec_target_subreg = 0;
1493       rtx pat;
1494
1495       /* If op0 is a register, we need it in EXT_MODE to make it
1496          acceptable to the format of ext(z)v.  */
1497       if (REG_P (xop0) && GET_MODE (xop0) != ext_mode)
1498         xop0 = gen_rtx_SUBREG (ext_mode, xop0, 0);
1499       if (MEM_P (xop0))
1500         /* Get ref to first byte containing part of the field.  */
1501         xop0 = adjust_address (xop0, byte_mode, xoffset);
1502
1503       /* On big-endian machines, we count bits from the most significant.
1504          If the bit field insn does not, we must invert.  */
1505       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1506         xbitpos = unit - bitsize - xbitpos;
1507
1508       /* Now convert from counting within UNIT to counting in EXT_MODE.  */
1509       if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1510         xbitpos += GET_MODE_BITSIZE (ext_mode) - unit;
1511
1512       unit = GET_MODE_BITSIZE (ext_mode);
1513
1514       if (xtarget == 0)
1515         xtarget = xspec_target = gen_reg_rtx (tmode);
1516
1517       if (GET_MODE (xtarget) != ext_mode)
1518         {
1519           if (REG_P (xtarget))
1520             {
1521               xtarget = gen_lowpart (ext_mode, xtarget);
1522               if (GET_MODE_SIZE (ext_mode)
1523                   > GET_MODE_SIZE (GET_MODE (xspec_target)))
1524                 xspec_target_subreg = xtarget;
1525             }
1526           else
1527             xtarget = gen_reg_rtx (ext_mode);
1528         }
1529
1530       /* If this machine's ext(z)v insists on a register target,
1531          make sure we have one.  */
1532       if (!insn_data[(int) icode].operand[0].predicate (xtarget, ext_mode))
1533         xtarget = gen_reg_rtx (ext_mode);
1534
1535       bitsize_rtx = GEN_INT (bitsize);
1536       bitpos_rtx = GEN_INT (xbitpos);
1537
1538       pat = (unsignedp
1539              ? gen_extzv (xtarget, xop0, bitsize_rtx, bitpos_rtx)
1540              : gen_extv (xtarget, xop0, bitsize_rtx, bitpos_rtx));
1541       if (pat)
1542         {
1543           emit_insn (pat);
1544           if (xtarget == xspec_target)
1545             return xtarget;
1546           if (xtarget == xspec_target_subreg)
1547             return xspec_target;
1548           return convert_extracted_bit_field (xtarget, mode, tmode, unsignedp);
1549         }
1550       delete_insns_since (last);
1551     }
1552
1553   /* If OP0 is a memory, try copying it to a register and seeing if a
1554      cheap register alternative is available.  */
1555   if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0))
1556     {
1557       enum machine_mode bestmode;
1558
1559       /* Get the mode to use for inserting into this field.  If
1560          OP0 is BLKmode, get the smallest mode consistent with the
1561          alignment. If OP0 is a non-BLKmode object that is no
1562          wider than EXT_MODE, use its mode. Otherwise, use the
1563          smallest mode containing the field.  */
1564
1565       if (GET_MODE (op0) == BLKmode
1566           || (ext_mode != MAX_MACHINE_MODE
1567               && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode)))
1568         bestmode = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0),
1569                                   (ext_mode == MAX_MACHINE_MODE
1570                                    ? VOIDmode : ext_mode),
1571                                   MEM_VOLATILE_P (op0));
1572       else
1573         bestmode = GET_MODE (op0);
1574
1575       if (bestmode != VOIDmode
1576           && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
1577                && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
1578         {
1579           unsigned HOST_WIDE_INT xoffset, xbitpos;
1580
1581           /* Compute the offset as a multiple of this unit,
1582              counting in bytes.  */
1583           unit = GET_MODE_BITSIZE (bestmode);
1584           xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1585           xbitpos = bitnum % unit;
1586
1587           /* Make sure the register is big enough for the whole field.  */
1588           if (xoffset * BITS_PER_UNIT + unit
1589               >= offset * BITS_PER_UNIT + bitsize)
1590             {
1591               rtx last, result, xop0;
1592
1593               last = get_last_insn ();
1594
1595               /* Fetch it to a register in that size.  */
1596               xop0 = adjust_address (op0, bestmode, xoffset);
1597               xop0 = force_reg (bestmode, xop0);
1598               result = extract_bit_field_1 (xop0, bitsize, xbitpos,
1599                                             unsignedp, target,
1600                                             mode, tmode, false);
1601               if (result)
1602                 return result;
1603
1604               delete_insns_since (last);
1605             }
1606         }
1607     }
1608
1609   if (!fallback_p)
1610     return NULL;
1611
1612   target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1613                                     bitpos, target, unsignedp);
1614   return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1615 }
1616
1617 /* Generate code to extract a byte-field from STR_RTX
1618    containing BITSIZE bits, starting at BITNUM,
1619    and put it in TARGET if possible (if TARGET is nonzero).
1620    Regardless of TARGET, we return the rtx for where the value is placed.
1621
1622    STR_RTX is the structure containing the byte (a REG or MEM).
1623    UNSIGNEDP is nonzero if this is an unsigned bit field.
1624    MODE is the natural mode of the field value once extracted.
1625    TMODE is the mode the caller would like the value to have;
1626    but the value may be returned with type MODE instead.
1627
1628    If a TARGET is specified and we can store in it at no extra cost,
1629    we do so, and return TARGET.
1630    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1631    if they are equally easy.  */
1632
1633 rtx
1634 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1635                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1636                    enum machine_mode mode, enum machine_mode tmode)
1637 {
1638   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1639                               target, mode, tmode, true);
1640 }
1641 \f
1642 /* Extract a bit field using shifts and boolean operations
1643    Returns an rtx to represent the value.
1644    OP0 addresses a register (word) or memory (byte).
1645    BITPOS says which bit within the word or byte the bit field starts in.
1646    OFFSET says how many bytes farther the bit field starts;
1647     it is 0 if OP0 is a register.
1648    BITSIZE says how many bits long the bit field is.
1649     (If OP0 is a register, it may be narrower than a full word,
1650      but BITPOS still counts within a full word,
1651      which is significant on bigendian machines.)
1652
1653    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1654    If TARGET is nonzero, attempts to store the value there
1655    and return TARGET, but this is not guaranteed.
1656    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1657
1658 static rtx
1659 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1660                          unsigned HOST_WIDE_INT offset,
1661                          unsigned HOST_WIDE_INT bitsize,
1662                          unsigned HOST_WIDE_INT bitpos, rtx target,
1663                          int unsignedp)
1664 {
1665   unsigned int total_bits = BITS_PER_WORD;
1666   enum machine_mode mode;
1667
1668   if (GET_CODE (op0) == SUBREG || REG_P (op0))
1669     {
1670       /* Special treatment for a bit field split across two registers.  */
1671       if (bitsize + bitpos > BITS_PER_WORD)
1672         return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1673     }
1674   else
1675     {
1676       /* Get the proper mode to use for this field.  We want a mode that
1677          includes the entire field.  If such a mode would be larger than
1678          a word, we won't be doing the extraction the normal way.  */
1679
1680       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1681                             MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1682
1683       if (mode == VOIDmode)
1684         /* The only way this should occur is if the field spans word
1685            boundaries.  */
1686         return extract_split_bit_field (op0, bitsize,
1687                                         bitpos + offset * BITS_PER_UNIT,
1688                                         unsignedp);
1689
1690       total_bits = GET_MODE_BITSIZE (mode);
1691
1692       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1693          be in the range 0 to total_bits-1, and put any excess bytes in
1694          OFFSET.  */
1695       if (bitpos >= total_bits)
1696         {
1697           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1698           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1699                      * BITS_PER_UNIT);
1700         }
1701
1702       /* Get ref to an aligned byte, halfword, or word containing the field.
1703          Adjust BITPOS to be position within a word,
1704          and OFFSET to be the offset of that word.
1705          Then alter OP0 to refer to that word.  */
1706       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1707       offset -= (offset % (total_bits / BITS_PER_UNIT));
1708       op0 = adjust_address (op0, mode, offset);
1709     }
1710
1711   mode = GET_MODE (op0);
1712
1713   if (BYTES_BIG_ENDIAN)
1714     /* BITPOS is the distance between our msb and that of OP0.
1715        Convert it to the distance from the lsb.  */
1716     bitpos = total_bits - bitsize - bitpos;
1717
1718   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1719      We have reduced the big-endian case to the little-endian case.  */
1720
1721   if (unsignedp)
1722     {
1723       if (bitpos)
1724         {
1725           /* If the field does not already start at the lsb,
1726              shift it so it does.  */
1727           tree amount = build_int_cst (NULL_TREE, bitpos);
1728           /* Maybe propagate the target for the shift.  */
1729           /* But not if we will return it--could confuse integrate.c.  */
1730           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1731           if (tmode != mode) subtarget = 0;
1732           op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1733         }
1734       /* Convert the value to the desired mode.  */
1735       if (mode != tmode)
1736         op0 = convert_to_mode (tmode, op0, 1);
1737
1738       /* Unless the msb of the field used to be the msb when we shifted,
1739          mask out the upper bits.  */
1740
1741       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1742         return expand_binop (GET_MODE (op0), and_optab, op0,
1743                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1744                              target, 1, OPTAB_LIB_WIDEN);
1745       return op0;
1746     }
1747
1748   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1749      then arithmetic-shift its lsb to the lsb of the word.  */
1750   op0 = force_reg (mode, op0);
1751   if (mode != tmode)
1752     target = 0;
1753
1754   /* Find the narrowest integer mode that contains the field.  */
1755
1756   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1757        mode = GET_MODE_WIDER_MODE (mode))
1758     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1759       {
1760         op0 = convert_to_mode (mode, op0, 0);
1761         break;
1762       }
1763
1764   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1765     {
1766       tree amount
1767         = build_int_cst (NULL_TREE,
1768                          GET_MODE_BITSIZE (mode) - (bitsize + bitpos));
1769       /* Maybe propagate the target for the shift.  */
1770       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1771       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1772     }
1773
1774   return expand_shift (RSHIFT_EXPR, mode, op0,
1775                        build_int_cst (NULL_TREE,
1776                                       GET_MODE_BITSIZE (mode) - bitsize),
1777                        target, 0);
1778 }
1779 \f
1780 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1781    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1782    complement of that if COMPLEMENT.  The mask is truncated if
1783    necessary to the width of mode MODE.  The mask is zero-extended if
1784    BITSIZE+BITPOS is too small for MODE.  */
1785
1786 static rtx
1787 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1788 {
1789   HOST_WIDE_INT masklow, maskhigh;
1790
1791   if (bitsize == 0)
1792     masklow = 0;
1793   else if (bitpos < HOST_BITS_PER_WIDE_INT)
1794     masklow = (HOST_WIDE_INT) -1 << bitpos;
1795   else
1796     masklow = 0;
1797
1798   if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1799     masklow &= ((unsigned HOST_WIDE_INT) -1
1800                 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1801
1802   if (bitpos <= HOST_BITS_PER_WIDE_INT)
1803     maskhigh = -1;
1804   else
1805     maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1806
1807   if (bitsize == 0)
1808     maskhigh = 0;
1809   else if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1810     maskhigh &= ((unsigned HOST_WIDE_INT) -1
1811                  >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1812   else
1813     maskhigh = 0;
1814
1815   if (complement)
1816     {
1817       maskhigh = ~maskhigh;
1818       masklow = ~masklow;
1819     }
1820
1821   return immed_double_const (masklow, maskhigh, mode);
1822 }
1823
1824 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1825    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1826
1827 static rtx
1828 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1829 {
1830   unsigned HOST_WIDE_INT v = INTVAL (value);
1831   HOST_WIDE_INT low, high;
1832
1833   if (bitsize < HOST_BITS_PER_WIDE_INT)
1834     v &= ~((HOST_WIDE_INT) -1 << bitsize);
1835
1836   if (bitpos < HOST_BITS_PER_WIDE_INT)
1837     {
1838       low = v << bitpos;
1839       high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1840     }
1841   else
1842     {
1843       low = 0;
1844       high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1845     }
1846
1847   return immed_double_const (low, high, mode);
1848 }
1849 \f
1850 /* Extract a bit field that is split across two words
1851    and return an RTX for the result.
1852
1853    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1854    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1855    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1856
1857 static rtx
1858 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1859                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1860 {
1861   unsigned int unit;
1862   unsigned int bitsdone = 0;
1863   rtx result = NULL_RTX;
1864   int first = 1;
1865
1866   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1867      much at a time.  */
1868   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1869     unit = BITS_PER_WORD;
1870   else
1871     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1872
1873   while (bitsdone < bitsize)
1874     {
1875       unsigned HOST_WIDE_INT thissize;
1876       rtx part, word;
1877       unsigned HOST_WIDE_INT thispos;
1878       unsigned HOST_WIDE_INT offset;
1879
1880       offset = (bitpos + bitsdone) / unit;
1881       thispos = (bitpos + bitsdone) % unit;
1882
1883       /* THISSIZE must not overrun a word boundary.  Otherwise,
1884          extract_fixed_bit_field will call us again, and we will mutually
1885          recurse forever.  */
1886       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1887       thissize = MIN (thissize, unit - thispos);
1888
1889       /* If OP0 is a register, then handle OFFSET here.
1890
1891          When handling multiword bitfields, extract_bit_field may pass
1892          down a word_mode SUBREG of a larger REG for a bitfield that actually
1893          crosses a word boundary.  Thus, for a SUBREG, we must find
1894          the current word starting from the base register.  */
1895       if (GET_CODE (op0) == SUBREG)
1896         {
1897           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1898           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1899                                         GET_MODE (SUBREG_REG (op0)));
1900           offset = 0;
1901         }
1902       else if (REG_P (op0))
1903         {
1904           word = operand_subword_force (op0, offset, GET_MODE (op0));
1905           offset = 0;
1906         }
1907       else
1908         word = op0;
1909
1910       /* Extract the parts in bit-counting order,
1911          whose meaning is determined by BYTES_PER_UNIT.
1912          OFFSET is in UNITs, and UNIT is in bits.
1913          extract_fixed_bit_field wants offset in bytes.  */
1914       part = extract_fixed_bit_field (word_mode, word,
1915                                       offset * unit / BITS_PER_UNIT,
1916                                       thissize, thispos, 0, 1);
1917       bitsdone += thissize;
1918
1919       /* Shift this part into place for the result.  */
1920       if (BYTES_BIG_ENDIAN)
1921         {
1922           if (bitsize != bitsdone)
1923             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1924                                  build_int_cst (NULL_TREE, bitsize - bitsdone),
1925                                  0, 1);
1926         }
1927       else
1928         {
1929           if (bitsdone != thissize)
1930             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1931                                  build_int_cst (NULL_TREE,
1932                                                 bitsdone - thissize), 0, 1);
1933         }
1934
1935       if (first)
1936         result = part;
1937       else
1938         /* Combine the parts with bitwise or.  This works
1939            because we extracted each part as an unsigned bit field.  */
1940         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1941                                OPTAB_LIB_WIDEN);
1942
1943       first = 0;
1944     }
1945
1946   /* Unsigned bit field: we are done.  */
1947   if (unsignedp)
1948     return result;
1949   /* Signed bit field: sign-extend with two arithmetic shifts.  */
1950   result = expand_shift (LSHIFT_EXPR, word_mode, result,
1951                          build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
1952                          NULL_RTX, 0);
1953   return expand_shift (RSHIFT_EXPR, word_mode, result,
1954                        build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
1955                        NULL_RTX, 0);
1956 }
1957 \f
1958 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
1959    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
1960    MODE, fill the upper bits with zeros.  Fail if the layout of either
1961    mode is unknown (as for CC modes) or if the extraction would involve
1962    unprofitable mode punning.  Return the value on success, otherwise
1963    return null.
1964
1965    This is different from gen_lowpart* in these respects:
1966
1967      - the returned value must always be considered an rvalue
1968
1969      - when MODE is wider than SRC_MODE, the extraction involves
1970        a zero extension
1971
1972      - when MODE is smaller than SRC_MODE, the extraction involves
1973        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
1974
1975    In other words, this routine performs a computation, whereas the
1976    gen_lowpart* routines are conceptually lvalue or rvalue subreg
1977    operations.  */
1978
1979 rtx
1980 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
1981 {
1982   enum machine_mode int_mode, src_int_mode;
1983
1984   if (mode == src_mode)
1985     return src;
1986
1987   if (CONSTANT_P (src))
1988     {
1989       /* simplify_gen_subreg can't be used here, as if simplify_subreg
1990          fails, it will happily create (subreg (symbol_ref)) or similar
1991          invalid SUBREGs.  */
1992       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
1993       rtx ret = simplify_subreg (mode, src, src_mode, byte);
1994       if (ret)
1995         return ret;
1996
1997       if (GET_MODE (src) == VOIDmode
1998           || !validate_subreg (mode, src_mode, src, byte))
1999         return NULL_RTX;
2000
2001       src = force_reg (GET_MODE (src), src);
2002       return gen_rtx_SUBREG (mode, src, byte);
2003     }
2004
2005   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2006     return NULL_RTX;
2007
2008   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2009       && MODES_TIEABLE_P (mode, src_mode))
2010     {
2011       rtx x = gen_lowpart_common (mode, src);
2012       if (x)
2013         return x;
2014     }
2015
2016   src_int_mode = int_mode_for_mode (src_mode);
2017   int_mode = int_mode_for_mode (mode);
2018   if (src_int_mode == BLKmode || int_mode == BLKmode)
2019     return NULL_RTX;
2020
2021   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2022     return NULL_RTX;
2023   if (!MODES_TIEABLE_P (int_mode, mode))
2024     return NULL_RTX;
2025
2026   src = gen_lowpart (src_int_mode, src);
2027   src = convert_modes (int_mode, src_int_mode, src, true);
2028   src = gen_lowpart (mode, src);
2029   return src;
2030 }
2031 \f
2032 /* Add INC into TARGET.  */
2033
2034 void
2035 expand_inc (rtx target, rtx inc)
2036 {
2037   rtx value = expand_binop (GET_MODE (target), add_optab,
2038                             target, inc,
2039                             target, 0, OPTAB_LIB_WIDEN);
2040   if (value != target)
2041     emit_move_insn (target, value);
2042 }
2043
2044 /* Subtract DEC from TARGET.  */
2045
2046 void
2047 expand_dec (rtx target, rtx dec)
2048 {
2049   rtx value = expand_binop (GET_MODE (target), sub_optab,
2050                             target, dec,
2051                             target, 0, OPTAB_LIB_WIDEN);
2052   if (value != target)
2053     emit_move_insn (target, value);
2054 }
2055 \f
2056 /* Output a shift instruction for expression code CODE,
2057    with SHIFTED being the rtx for the value to shift,
2058    and AMOUNT the tree for the amount to shift by.
2059    Store the result in the rtx TARGET, if that is convenient.
2060    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2061    Return the rtx for where the value is.  */
2062
2063 rtx
2064 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2065               tree amount, rtx target, int unsignedp)
2066 {
2067   rtx op1, temp = 0;
2068   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2069   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2070   optab lshift_optab = ashl_optab;
2071   optab rshift_arith_optab = ashr_optab;
2072   optab rshift_uns_optab = lshr_optab;
2073   optab lrotate_optab = rotl_optab;
2074   optab rrotate_optab = rotr_optab;
2075   enum machine_mode op1_mode;
2076   int attempt;
2077   bool speed = optimize_insn_for_speed_p ();
2078
2079   op1 = expand_normal (amount);
2080   op1_mode = GET_MODE (op1);
2081
2082   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2083      shift amount is a vector, use the vector/vector shift patterns.  */
2084   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2085     {
2086       lshift_optab = vashl_optab;
2087       rshift_arith_optab = vashr_optab;
2088       rshift_uns_optab = vlshr_optab;
2089       lrotate_optab = vrotl_optab;
2090       rrotate_optab = vrotr_optab;
2091     }
2092
2093   /* Previously detected shift-counts computed by NEGATE_EXPR
2094      and shifted in the other direction; but that does not work
2095      on all machines.  */
2096
2097   if (SHIFT_COUNT_TRUNCATED)
2098     {
2099       if (GET_CODE (op1) == CONST_INT
2100           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2101               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2102         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2103                        % GET_MODE_BITSIZE (mode));
2104       else if (GET_CODE (op1) == SUBREG
2105                && subreg_lowpart_p (op1))
2106         op1 = SUBREG_REG (op1);
2107     }
2108
2109   if (op1 == const0_rtx)
2110     return shifted;
2111
2112   /* Check whether its cheaper to implement a left shift by a constant
2113      bit count by a sequence of additions.  */
2114   if (code == LSHIFT_EXPR
2115       && GET_CODE (op1) == CONST_INT
2116       && INTVAL (op1) > 0
2117       && INTVAL (op1) < GET_MODE_BITSIZE (mode)
2118       && INTVAL (op1) < MAX_BITS_PER_WORD
2119       && shift_cost[speed][mode][INTVAL (op1)] > INTVAL (op1) * add_cost[speed][mode]
2120       && shift_cost[speed][mode][INTVAL (op1)] != MAX_COST)
2121     {
2122       int i;
2123       for (i = 0; i < INTVAL (op1); i++)
2124         {
2125           temp = force_reg (mode, shifted);
2126           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2127                                   unsignedp, OPTAB_LIB_WIDEN);
2128         }
2129       return shifted;
2130     }
2131
2132   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2133     {
2134       enum optab_methods methods;
2135
2136       if (attempt == 0)
2137         methods = OPTAB_DIRECT;
2138       else if (attempt == 1)
2139         methods = OPTAB_WIDEN;
2140       else
2141         methods = OPTAB_LIB_WIDEN;
2142
2143       if (rotate)
2144         {
2145           /* Widening does not work for rotation.  */
2146           if (methods == OPTAB_WIDEN)
2147             continue;
2148           else if (methods == OPTAB_LIB_WIDEN)
2149             {
2150               /* If we have been unable to open-code this by a rotation,
2151                  do it as the IOR of two shifts.  I.e., to rotate A
2152                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2153                  where C is the bitsize of A.
2154
2155                  It is theoretically possible that the target machine might
2156                  not be able to perform either shift and hence we would
2157                  be making two libcalls rather than just the one for the
2158                  shift (similarly if IOR could not be done).  We will allow
2159                  this extremely unlikely lossage to avoid complicating the
2160                  code below.  */
2161
2162               rtx subtarget = target == shifted ? 0 : target;
2163               tree new_amount, other_amount;
2164               rtx temp1;
2165               tree type = TREE_TYPE (amount);
2166               if (GET_MODE (op1) != TYPE_MODE (type)
2167                   && GET_MODE (op1) != VOIDmode)
2168                 op1 = convert_to_mode (TYPE_MODE (type), op1, 1);
2169               new_amount = make_tree (type, op1);
2170               other_amount
2171                 = fold_build2 (MINUS_EXPR, type,
2172                                build_int_cst (type, GET_MODE_BITSIZE (mode)),
2173                                new_amount);
2174
2175               shifted = force_reg (mode, shifted);
2176
2177               temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2178                                    mode, shifted, new_amount, 0, 1);
2179               temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2180                                     mode, shifted, other_amount, subtarget, 1);
2181               return expand_binop (mode, ior_optab, temp, temp1, target,
2182                                    unsignedp, methods);
2183             }
2184
2185           temp = expand_binop (mode,
2186                                left ? lrotate_optab : rrotate_optab,
2187                                shifted, op1, target, unsignedp, methods);
2188         }
2189       else if (unsignedp)
2190         temp = expand_binop (mode,
2191                              left ? lshift_optab : rshift_uns_optab,
2192                              shifted, op1, target, unsignedp, methods);
2193
2194       /* Do arithmetic shifts.
2195          Also, if we are going to widen the operand, we can just as well
2196          use an arithmetic right-shift instead of a logical one.  */
2197       if (temp == 0 && ! rotate
2198           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2199         {
2200           enum optab_methods methods1 = methods;
2201
2202           /* If trying to widen a log shift to an arithmetic shift,
2203              don't accept an arithmetic shift of the same size.  */
2204           if (unsignedp)
2205             methods1 = OPTAB_MUST_WIDEN;
2206
2207           /* Arithmetic shift */
2208
2209           temp = expand_binop (mode,
2210                                left ? lshift_optab : rshift_arith_optab,
2211                                shifted, op1, target, unsignedp, methods1);
2212         }
2213
2214       /* We used to try extzv here for logical right shifts, but that was
2215          only useful for one machine, the VAX, and caused poor code
2216          generation there for lshrdi3, so the code was deleted and a
2217          define_expand for lshrsi3 was added to vax.md.  */
2218     }
2219
2220   gcc_assert (temp);
2221   return temp;
2222 }
2223 \f
2224 enum alg_code {
2225   alg_unknown,
2226   alg_zero,
2227   alg_m, alg_shift,
2228   alg_add_t_m2,
2229   alg_sub_t_m2,
2230   alg_add_factor,
2231   alg_sub_factor,
2232   alg_add_t2_m,
2233   alg_sub_t2_m,
2234   alg_impossible
2235 };
2236
2237 /* This structure holds the "cost" of a multiply sequence.  The
2238    "cost" field holds the total rtx_cost of every operator in the
2239    synthetic multiplication sequence, hence cost(a op b) is defined
2240    as rtx_cost(op) + cost(a) + cost(b), where cost(leaf) is zero.
2241    The "latency" field holds the minimum possible latency of the
2242    synthetic multiply, on a hypothetical infinitely parallel CPU.
2243    This is the critical path, or the maximum height, of the expression
2244    tree which is the sum of rtx_costs on the most expensive path from
2245    any leaf to the root.  Hence latency(a op b) is defined as zero for
2246    leaves and rtx_cost(op) + max(latency(a), latency(b)) otherwise.  */
2247
2248 struct mult_cost {
2249   short cost;     /* Total rtx_cost of the multiplication sequence.  */
2250   short latency;  /* The latency of the multiplication sequence.  */
2251 };
2252
2253 /* This macro is used to compare a pointer to a mult_cost against an
2254    single integer "rtx_cost" value.  This is equivalent to the macro
2255    CHEAPER_MULT_COST(X,Z) where Z = {Y,Y}.  */
2256 #define MULT_COST_LESS(X,Y) ((X)->cost < (Y)    \
2257                              || ((X)->cost == (Y) && (X)->latency < (Y)))
2258
2259 /* This macro is used to compare two pointers to mult_costs against
2260    each other.  The macro returns true if X is cheaper than Y.
2261    Currently, the cheaper of two mult_costs is the one with the
2262    lower "cost".  If "cost"s are tied, the lower latency is cheaper.  */
2263 #define CHEAPER_MULT_COST(X,Y)  ((X)->cost < (Y)->cost          \
2264                                  || ((X)->cost == (Y)->cost     \
2265                                      && (X)->latency < (Y)->latency))
2266
2267 /* This structure records a sequence of operations.
2268    `ops' is the number of operations recorded.
2269    `cost' is their total cost.
2270    The operations are stored in `op' and the corresponding
2271    logarithms of the integer coefficients in `log'.
2272
2273    These are the operations:
2274    alg_zero             total := 0;
2275    alg_m                total := multiplicand;
2276    alg_shift            total := total * coeff
2277    alg_add_t_m2         total := total + multiplicand * coeff;
2278    alg_sub_t_m2         total := total - multiplicand * coeff;
2279    alg_add_factor       total := total * coeff + total;
2280    alg_sub_factor       total := total * coeff - total;
2281    alg_add_t2_m         total := total * coeff + multiplicand;
2282    alg_sub_t2_m         total := total * coeff - multiplicand;
2283
2284    The first operand must be either alg_zero or alg_m.  */
2285
2286 struct algorithm
2287 {
2288   struct mult_cost cost;
2289   short ops;
2290   /* The size of the OP and LOG fields are not directly related to the
2291      word size, but the worst-case algorithms will be if we have few
2292      consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2293      In that case we will generate shift-by-2, add, shift-by-2, add,...,
2294      in total wordsize operations.  */
2295   enum alg_code op[MAX_BITS_PER_WORD];
2296   char log[MAX_BITS_PER_WORD];
2297 };
2298
2299 /* The entry for our multiplication cache/hash table.  */
2300 struct alg_hash_entry {
2301   /* The number we are multiplying by.  */
2302   unsigned HOST_WIDE_INT t;
2303
2304   /* The mode in which we are multiplying something by T.  */
2305   enum machine_mode mode;
2306
2307   /* The best multiplication algorithm for t.  */
2308   enum alg_code alg;
2309
2310   /* The cost of multiplication if ALG_CODE is not alg_impossible.
2311      Otherwise, the cost within which multiplication by T is
2312      impossible.  */
2313   struct mult_cost cost;
2314
2315   /* OPtimized for speed? */
2316   bool speed;
2317 };
2318
2319 /* The number of cache/hash entries.  */
2320 #if HOST_BITS_PER_WIDE_INT == 64
2321 #define NUM_ALG_HASH_ENTRIES 1031
2322 #else
2323 #define NUM_ALG_HASH_ENTRIES 307
2324 #endif
2325
2326 /* Each entry of ALG_HASH caches alg_code for some integer.  This is
2327    actually a hash table.  If we have a collision, that the older
2328    entry is kicked out.  */
2329 static struct alg_hash_entry alg_hash[NUM_ALG_HASH_ENTRIES];
2330
2331 /* Indicates the type of fixup needed after a constant multiplication.
2332    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2333    the result should be negated, and ADD_VARIANT means that the
2334    multiplicand should be added to the result.  */
2335 enum mult_variant {basic_variant, negate_variant, add_variant};
2336
2337 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2338                         const struct mult_cost *, enum machine_mode mode);
2339 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2340                                  struct algorithm *, enum mult_variant *, int);
2341 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2342                               const struct algorithm *, enum mult_variant);
2343 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2344                                                  int, rtx *, int *, int *);
2345 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2346 static rtx extract_high_half (enum machine_mode, rtx);
2347 static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2348 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2349                                        int, int);
2350 /* Compute and return the best algorithm for multiplying by T.
2351    The algorithm must cost less than cost_limit
2352    If retval.cost >= COST_LIMIT, no algorithm was found and all
2353    other field of the returned struct are undefined.
2354    MODE is the machine mode of the multiplication.  */
2355
2356 static void
2357 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2358             const struct mult_cost *cost_limit, enum machine_mode mode)
2359 {
2360   int m;
2361   struct algorithm *alg_in, *best_alg;
2362   struct mult_cost best_cost;
2363   struct mult_cost new_limit;
2364   int op_cost, op_latency;
2365   unsigned HOST_WIDE_INT q;
2366   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2367   int hash_index;
2368   bool cache_hit = false;
2369   enum alg_code cache_alg = alg_zero;
2370   bool speed = optimize_insn_for_speed_p ();
2371
2372   /* Indicate that no algorithm is yet found.  If no algorithm
2373      is found, this value will be returned and indicate failure.  */
2374   alg_out->cost.cost = cost_limit->cost + 1;
2375   alg_out->cost.latency = cost_limit->latency + 1;
2376
2377   if (cost_limit->cost < 0
2378       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2379     return;
2380
2381   /* Restrict the bits of "t" to the multiplication's mode.  */
2382   t &= GET_MODE_MASK (mode);
2383
2384   /* t == 1 can be done in zero cost.  */
2385   if (t == 1)
2386     {
2387       alg_out->ops = 1;
2388       alg_out->cost.cost = 0;
2389       alg_out->cost.latency = 0;
2390       alg_out->op[0] = alg_m;
2391       return;
2392     }
2393
2394   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2395      fail now.  */
2396   if (t == 0)
2397     {
2398       if (MULT_COST_LESS (cost_limit, zero_cost[speed]))
2399         return;
2400       else
2401         {
2402           alg_out->ops = 1;
2403           alg_out->cost.cost = zero_cost[speed];
2404           alg_out->cost.latency = zero_cost[speed];
2405           alg_out->op[0] = alg_zero;
2406           return;
2407         }
2408     }
2409
2410   /* We'll be needing a couple extra algorithm structures now.  */
2411
2412   alg_in = XALLOCA (struct algorithm);
2413   best_alg = XALLOCA (struct algorithm);
2414   best_cost = *cost_limit;
2415
2416   /* Compute the hash index.  */
2417   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2418
2419   /* See if we already know what to do for T.  */
2420   if (alg_hash[hash_index].t == t
2421       && alg_hash[hash_index].mode == mode
2422       && alg_hash[hash_index].mode == mode
2423       && alg_hash[hash_index].speed == speed
2424       && alg_hash[hash_index].alg != alg_unknown)
2425     {
2426       cache_alg = alg_hash[hash_index].alg;
2427
2428       if (cache_alg == alg_impossible)
2429         {
2430           /* The cache tells us that it's impossible to synthesize
2431              multiplication by T within alg_hash[hash_index].cost.  */
2432           if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit))
2433             /* COST_LIMIT is at least as restrictive as the one
2434                recorded in the hash table, in which case we have no
2435                hope of synthesizing a multiplication.  Just
2436                return.  */
2437             return;
2438
2439           /* If we get here, COST_LIMIT is less restrictive than the
2440              one recorded in the hash table, so we may be able to
2441              synthesize a multiplication.  Proceed as if we didn't
2442              have the cache entry.  */
2443         }
2444       else
2445         {
2446           if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost))
2447             /* The cached algorithm shows that this multiplication
2448                requires more cost than COST_LIMIT.  Just return.  This
2449                way, we don't clobber this cache entry with
2450                alg_impossible but retain useful information.  */
2451             return;
2452
2453           cache_hit = true;
2454
2455           switch (cache_alg)
2456             {
2457             case alg_shift:
2458               goto do_alg_shift;
2459
2460             case alg_add_t_m2:
2461             case alg_sub_t_m2:
2462               goto do_alg_addsub_t_m2;
2463
2464             case alg_add_factor:
2465             case alg_sub_factor:
2466               goto do_alg_addsub_factor;
2467
2468             case alg_add_t2_m:
2469               goto do_alg_add_t2_m;
2470
2471             case alg_sub_t2_m:
2472               goto do_alg_sub_t2_m;
2473
2474             default:
2475               gcc_unreachable ();
2476             }
2477         }
2478     }
2479
2480   /* If we have a group of zero bits at the low-order part of T, try
2481      multiplying by the remaining bits and then doing a shift.  */
2482
2483   if ((t & 1) == 0)
2484     {
2485     do_alg_shift:
2486       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2487       if (m < maxm)
2488         {
2489           q = t >> m;
2490           /* The function expand_shift will choose between a shift and
2491              a sequence of additions, so the observed cost is given as
2492              MIN (m * add_cost[speed][mode], shift_cost[speed][mode][m]).  */
2493           op_cost = m * add_cost[speed][mode];
2494           if (shift_cost[speed][mode][m] < op_cost)
2495             op_cost = shift_cost[speed][mode][m];
2496           new_limit.cost = best_cost.cost - op_cost;
2497           new_limit.latency = best_cost.latency - op_cost;
2498           synth_mult (alg_in, q, &new_limit, mode);
2499
2500           alg_in->cost.cost += op_cost;
2501           alg_in->cost.latency += op_cost;
2502           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2503             {
2504               struct algorithm *x;
2505               best_cost = alg_in->cost;
2506               x = alg_in, alg_in = best_alg, best_alg = x;
2507               best_alg->log[best_alg->ops] = m;
2508               best_alg->op[best_alg->ops] = alg_shift;
2509             }
2510         }
2511       if (cache_hit)
2512         goto done;
2513     }
2514
2515   /* If we have an odd number, add or subtract one.  */
2516   if ((t & 1) != 0)
2517     {
2518       unsigned HOST_WIDE_INT w;
2519
2520     do_alg_addsub_t_m2:
2521       for (w = 1; (w & t) != 0; w <<= 1)
2522         ;
2523       /* If T was -1, then W will be zero after the loop.  This is another
2524          case where T ends with ...111.  Handling this with (T + 1) and
2525          subtract 1 produces slightly better code and results in algorithm
2526          selection much faster than treating it like the ...0111 case
2527          below.  */
2528       if (w == 0
2529           || (w > 2
2530               /* Reject the case where t is 3.
2531                  Thus we prefer addition in that case.  */
2532               && t != 3))
2533         {
2534           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2535
2536           op_cost = add_cost[speed][mode];
2537           new_limit.cost = best_cost.cost - op_cost;
2538           new_limit.latency = best_cost.latency - op_cost;
2539           synth_mult (alg_in, t + 1, &new_limit, mode);
2540
2541           alg_in->cost.cost += op_cost;
2542           alg_in->cost.latency += op_cost;
2543           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2544             {
2545               struct algorithm *x;
2546               best_cost = alg_in->cost;
2547               x = alg_in, alg_in = best_alg, best_alg = x;
2548               best_alg->log[best_alg->ops] = 0;
2549               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2550             }
2551         }
2552       else
2553         {
2554           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2555
2556           op_cost = add_cost[speed][mode];
2557           new_limit.cost = best_cost.cost - op_cost;
2558           new_limit.latency = best_cost.latency - op_cost;
2559           synth_mult (alg_in, t - 1, &new_limit, mode);
2560
2561           alg_in->cost.cost += op_cost;
2562           alg_in->cost.latency += op_cost;
2563           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2564             {
2565               struct algorithm *x;
2566               best_cost = alg_in->cost;
2567               x = alg_in, alg_in = best_alg, best_alg = x;
2568               best_alg->log[best_alg->ops] = 0;
2569               best_alg->op[best_alg->ops] = alg_add_t_m2;
2570             }
2571         }
2572       if (cache_hit)
2573         goto done;
2574     }
2575
2576   /* Look for factors of t of the form
2577      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2578      If we find such a factor, we can multiply by t using an algorithm that
2579      multiplies by q, shift the result by m and add/subtract it to itself.
2580
2581      We search for large factors first and loop down, even if large factors
2582      are less probable than small; if we find a large factor we will find a
2583      good sequence quickly, and therefore be able to prune (by decreasing
2584      COST_LIMIT) the search.  */
2585
2586  do_alg_addsub_factor:
2587   for (m = floor_log2 (t - 1); m >= 2; m--)
2588     {
2589       unsigned HOST_WIDE_INT d;
2590
2591       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2592       if (t % d == 0 && t > d && m < maxm
2593           && (!cache_hit || cache_alg == alg_add_factor))
2594         {
2595           /* If the target has a cheap shift-and-add instruction use
2596              that in preference to a shift insn followed by an add insn.
2597              Assume that the shift-and-add is "atomic" with a latency
2598              equal to its cost, otherwise assume that on superscalar
2599              hardware the shift may be executed concurrently with the
2600              earlier steps in the algorithm.  */
2601           op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2602           if (shiftadd_cost[speed][mode][m] < op_cost)
2603             {
2604               op_cost = shiftadd_cost[speed][mode][m];
2605               op_latency = op_cost;
2606             }
2607           else
2608             op_latency = add_cost[speed][mode];
2609
2610           new_limit.cost = best_cost.cost - op_cost;
2611           new_limit.latency = best_cost.latency - op_latency;
2612           synth_mult (alg_in, t / d, &new_limit, mode);
2613
2614           alg_in->cost.cost += op_cost;
2615           alg_in->cost.latency += op_latency;
2616           if (alg_in->cost.latency < op_cost)
2617             alg_in->cost.latency = op_cost;
2618           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2619             {
2620               struct algorithm *x;
2621               best_cost = alg_in->cost;
2622               x = alg_in, alg_in = best_alg, best_alg = x;
2623               best_alg->log[best_alg->ops] = m;
2624               best_alg->op[best_alg->ops] = alg_add_factor;
2625             }
2626           /* Other factors will have been taken care of in the recursion.  */
2627           break;
2628         }
2629
2630       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2631       if (t % d == 0 && t > d && m < maxm
2632           && (!cache_hit || cache_alg == alg_sub_factor))
2633         {
2634           /* If the target has a cheap shift-and-subtract insn use
2635              that in preference to a shift insn followed by a sub insn.
2636              Assume that the shift-and-sub is "atomic" with a latency
2637              equal to it's cost, otherwise assume that on superscalar
2638              hardware the shift may be executed concurrently with the
2639              earlier steps in the algorithm.  */
2640           op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2641           if (shiftsub_cost[speed][mode][m] < op_cost)
2642             {
2643               op_cost = shiftsub_cost[speed][mode][m];
2644               op_latency = op_cost;
2645             }
2646           else
2647             op_latency = add_cost[speed][mode];
2648
2649           new_limit.cost = best_cost.cost - op_cost;
2650           new_limit.latency = best_cost.latency - op_latency;
2651           synth_mult (alg_in, t / d, &new_limit, mode);
2652
2653           alg_in->cost.cost += op_cost;
2654           alg_in->cost.latency += op_latency;
2655           if (alg_in->cost.latency < op_cost)
2656             alg_in->cost.latency = op_cost;
2657           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2658             {
2659               struct algorithm *x;
2660               best_cost = alg_in->cost;
2661               x = alg_in, alg_in = best_alg, best_alg = x;
2662               best_alg->log[best_alg->ops] = m;
2663               best_alg->op[best_alg->ops] = alg_sub_factor;
2664             }
2665           break;
2666         }
2667     }
2668   if (cache_hit)
2669     goto done;
2670
2671   /* Try shift-and-add (load effective address) instructions,
2672      i.e. do a*3, a*5, a*9.  */
2673   if ((t & 1) != 0)
2674     {
2675     do_alg_add_t2_m:
2676       q = t - 1;
2677       q = q & -q;
2678       m = exact_log2 (q);
2679       if (m >= 0 && m < maxm)
2680         {
2681           op_cost = shiftadd_cost[speed][mode][m];
2682           new_limit.cost = best_cost.cost - op_cost;
2683           new_limit.latency = best_cost.latency - op_cost;
2684           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2685
2686           alg_in->cost.cost += op_cost;
2687           alg_in->cost.latency += op_cost;
2688           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2689             {
2690               struct algorithm *x;
2691               best_cost = alg_in->cost;
2692               x = alg_in, alg_in = best_alg, best_alg = x;
2693               best_alg->log[best_alg->ops] = m;
2694               best_alg->op[best_alg->ops] = alg_add_t2_m;
2695             }
2696         }
2697       if (cache_hit)
2698         goto done;
2699
2700     do_alg_sub_t2_m:
2701       q = t + 1;
2702       q = q & -q;
2703       m = exact_log2 (q);
2704       if (m >= 0 && m < maxm)
2705         {
2706           op_cost = shiftsub_cost[speed][mode][m];
2707           new_limit.cost = best_cost.cost - op_cost;
2708           new_limit.latency = best_cost.latency - op_cost;
2709           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2710
2711           alg_in->cost.cost += op_cost;
2712           alg_in->cost.latency += op_cost;
2713           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2714             {
2715               struct algorithm *x;
2716               best_cost = alg_in->cost;
2717               x = alg_in, alg_in = best_alg, best_alg = x;
2718               best_alg->log[best_alg->ops] = m;
2719               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2720             }
2721         }
2722       if (cache_hit)
2723         goto done;
2724     }
2725
2726  done:
2727   /* If best_cost has not decreased, we have not found any algorithm.  */
2728   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2729     {
2730       /* We failed to find an algorithm.  Record alg_impossible for
2731          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2732          we are asked to find an algorithm for T within the same or
2733          lower COST_LIMIT, we can immediately return to the
2734          caller.  */
2735       alg_hash[hash_index].t = t;
2736       alg_hash[hash_index].mode = mode;
2737       alg_hash[hash_index].speed = speed;
2738       alg_hash[hash_index].alg = alg_impossible;
2739       alg_hash[hash_index].cost = *cost_limit;
2740       return;
2741     }
2742
2743   /* Cache the result.  */
2744   if (!cache_hit)
2745     {
2746       alg_hash[hash_index].t = t;
2747       alg_hash[hash_index].mode = mode;
2748       alg_hash[hash_index].speed = speed;
2749       alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2750       alg_hash[hash_index].cost.cost = best_cost.cost;
2751       alg_hash[hash_index].cost.latency = best_cost.latency;
2752     }
2753
2754   /* If we are getting a too long sequence for `struct algorithm'
2755      to record, make this search fail.  */
2756   if (best_alg->ops == MAX_BITS_PER_WORD)
2757     return;
2758
2759   /* Copy the algorithm from temporary space to the space at alg_out.
2760      We avoid using structure assignment because the majority of
2761      best_alg is normally undefined, and this is a critical function.  */
2762   alg_out->ops = best_alg->ops + 1;
2763   alg_out->cost = best_cost;
2764   memcpy (alg_out->op, best_alg->op,
2765           alg_out->ops * sizeof *alg_out->op);
2766   memcpy (alg_out->log, best_alg->log,
2767           alg_out->ops * sizeof *alg_out->log);
2768 }
2769 \f
2770 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2771    Try three variations:
2772
2773        - a shift/add sequence based on VAL itself
2774        - a shift/add sequence based on -VAL, followed by a negation
2775        - a shift/add sequence based on VAL - 1, followed by an addition.
2776
2777    Return true if the cheapest of these cost less than MULT_COST,
2778    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2779
2780 static bool
2781 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2782                      struct algorithm *alg, enum mult_variant *variant,
2783                      int mult_cost)
2784 {
2785   struct algorithm alg2;
2786   struct mult_cost limit;
2787   int op_cost;
2788   bool speed = optimize_insn_for_speed_p ();
2789
2790   /* Fail quickly for impossible bounds.  */
2791   if (mult_cost < 0)
2792     return false;
2793
2794   /* Ensure that mult_cost provides a reasonable upper bound.
2795      Any constant multiplication can be performed with less
2796      than 2 * bits additions.  */
2797   op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[speed][mode];
2798   if (mult_cost > op_cost)
2799     mult_cost = op_cost;
2800
2801   *variant = basic_variant;
2802   limit.cost = mult_cost;
2803   limit.latency = mult_cost;
2804   synth_mult (alg, val, &limit, mode);
2805
2806   /* This works only if the inverted value actually fits in an
2807      `unsigned int' */
2808   if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2809     {
2810       op_cost = neg_cost[speed][mode];
2811       if (MULT_COST_LESS (&alg->cost, mult_cost))
2812         {
2813           limit.cost = alg->cost.cost - op_cost;
2814           limit.latency = alg->cost.latency - op_cost;
2815         }
2816       else
2817         {
2818           limit.cost = mult_cost - op_cost;
2819           limit.latency = mult_cost - op_cost;
2820         }
2821
2822       synth_mult (&alg2, -val, &limit, mode);
2823       alg2.cost.cost += op_cost;
2824       alg2.cost.latency += op_cost;
2825       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2826         *alg = alg2, *variant = negate_variant;
2827     }
2828
2829   /* This proves very useful for division-by-constant.  */
2830   op_cost = add_cost[speed][mode];
2831   if (MULT_COST_LESS (&alg->cost, mult_cost))
2832     {
2833       limit.cost = alg->cost.cost - op_cost;
2834       limit.latency = alg->cost.latency - op_cost;
2835     }
2836   else
2837     {
2838       limit.cost = mult_cost - op_cost;
2839       limit.latency = mult_cost - op_cost;
2840     }
2841
2842   synth_mult (&alg2, val - 1, &limit, mode);
2843   alg2.cost.cost += op_cost;
2844   alg2.cost.latency += op_cost;
2845   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2846     *alg = alg2, *variant = add_variant;
2847
2848   return MULT_COST_LESS (&alg->cost, mult_cost);
2849 }
2850
2851 /* A subroutine of expand_mult, used for constant multiplications.
2852    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2853    convenient.  Use the shift/add sequence described by ALG and apply
2854    the final fixup specified by VARIANT.  */
2855
2856 static rtx
2857 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2858                    rtx target, const struct algorithm *alg,
2859                    enum mult_variant variant)
2860 {
2861   HOST_WIDE_INT val_so_far;
2862   rtx insn, accum, tem;
2863   int opno;
2864   enum machine_mode nmode;
2865
2866   /* Avoid referencing memory over and over and invalid sharing
2867      on SUBREGs.  */
2868   op0 = force_reg (mode, op0);
2869
2870   /* ACCUM starts out either as OP0 or as a zero, depending on
2871      the first operation.  */
2872
2873   if (alg->op[0] == alg_zero)
2874     {
2875       accum = copy_to_mode_reg (mode, const0_rtx);
2876       val_so_far = 0;
2877     }
2878   else if (alg->op[0] == alg_m)
2879     {
2880       accum = copy_to_mode_reg (mode, op0);
2881       val_so_far = 1;
2882     }
2883   else
2884     gcc_unreachable ();
2885
2886   for (opno = 1; opno < alg->ops; opno++)
2887     {
2888       int log = alg->log[opno];
2889       rtx shift_subtarget = optimize ? 0 : accum;
2890       rtx add_target
2891         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2892            && !optimize)
2893           ? target : 0;
2894       rtx accum_target = optimize ? 0 : accum;
2895
2896       switch (alg->op[opno])
2897         {
2898         case alg_shift:
2899           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2900                                 build_int_cst (NULL_TREE, log),
2901                                 NULL_RTX, 0);
2902           val_so_far <<= log;
2903           break;
2904
2905         case alg_add_t_m2:
2906           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2907                               build_int_cst (NULL_TREE, log),
2908                               NULL_RTX, 0);
2909           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2910                                  add_target ? add_target : accum_target);
2911           val_so_far += (HOST_WIDE_INT) 1 << log;
2912           break;
2913
2914         case alg_sub_t_m2:
2915           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2916                               build_int_cst (NULL_TREE, log),
2917                               NULL_RTX, 0);
2918           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2919                                  add_target ? add_target : accum_target);
2920           val_so_far -= (HOST_WIDE_INT) 1 << log;
2921           break;
2922
2923         case alg_add_t2_m:
2924           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2925                                 build_int_cst (NULL_TREE, log),
2926                                 shift_subtarget,
2927                                 0);
2928           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2929                                  add_target ? add_target : accum_target);
2930           val_so_far = (val_so_far << log) + 1;
2931           break;
2932
2933         case alg_sub_t2_m:
2934           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2935                                 build_int_cst (NULL_TREE, log),
2936                                 shift_subtarget, 0);
2937           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2938                                  add_target ? add_target : accum_target);
2939           val_so_far = (val_so_far << log) - 1;
2940           break;
2941
2942         case alg_add_factor:
2943           tem = expand_shift (LSHIFT_EXPR, mode, accum,
2944                               build_int_cst (NULL_TREE, log),
2945                               NULL_RTX, 0);
2946           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2947                                  add_target ? add_target : accum_target);
2948           val_so_far += val_so_far << log;
2949           break;
2950
2951         case alg_sub_factor:
2952           tem = expand_shift (LSHIFT_EXPR, mode, accum,
2953                               build_int_cst (NULL_TREE, log),
2954                               NULL_RTX, 0);
2955           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2956                                  (add_target
2957                                   ? add_target : (optimize ? 0 : tem)));
2958           val_so_far = (val_so_far << log) - val_so_far;
2959           break;
2960
2961         default:
2962           gcc_unreachable ();
2963         }
2964
2965       /* Write a REG_EQUAL note on the last insn so that we can cse
2966          multiplication sequences.  Note that if ACCUM is a SUBREG,
2967          we've set the inner register and must properly indicate
2968          that.  */
2969
2970       tem = op0, nmode = mode;
2971       if (GET_CODE (accum) == SUBREG)
2972         {
2973           nmode = GET_MODE (SUBREG_REG (accum));
2974           tem = gen_lowpart (nmode, op0);
2975         }
2976
2977       insn = get_last_insn ();
2978       set_unique_reg_note (insn, REG_EQUAL,
2979                            gen_rtx_MULT (nmode, tem,
2980                                          GEN_INT (val_so_far)));
2981     }
2982
2983   if (variant == negate_variant)
2984     {
2985       val_so_far = -val_so_far;
2986       accum = expand_unop (mode, neg_optab, accum, target, 0);
2987     }
2988   else if (variant == add_variant)
2989     {
2990       val_so_far = val_so_far + 1;
2991       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
2992     }
2993
2994   /* Compare only the bits of val and val_so_far that are significant
2995      in the result mode, to avoid sign-/zero-extension confusion.  */
2996   val &= GET_MODE_MASK (mode);
2997   val_so_far &= GET_MODE_MASK (mode);
2998   gcc_assert (val == val_so_far);
2999
3000   return accum;
3001 }
3002
3003 /* Perform a multiplication and return an rtx for the result.
3004    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3005    TARGET is a suggestion for where to store the result (an rtx).
3006
3007    We check specially for a constant integer as OP1.
3008    If you want this check for OP0 as well, then before calling
3009    you should swap the two operands if OP0 would be constant.  */
3010
3011 rtx
3012 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3013              int unsignedp)
3014 {
3015   enum mult_variant variant;
3016   struct algorithm algorithm;
3017   int max_cost;
3018   bool speed = optimize_insn_for_speed_p ();
3019
3020   /* Handling const0_rtx here allows us to use zero as a rogue value for
3021      coeff below.  */
3022   if (op1 == const0_rtx)
3023     return const0_rtx;
3024   if (op1 == const1_rtx)
3025     return op0;
3026   if (op1 == constm1_rtx)
3027     return expand_unop (mode,
3028                         GET_MODE_CLASS (mode) == MODE_INT
3029                         && !unsignedp && flag_trapv
3030                         ? negv_optab : neg_optab,
3031                         op0, target, 0);
3032
3033   /* These are the operations that are potentially turned into a sequence
3034      of shifts and additions.  */
3035   if (SCALAR_INT_MODE_P (mode)
3036       && (unsignedp || !flag_trapv))
3037     {
3038       HOST_WIDE_INT coeff = 0;
3039       rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3040
3041       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3042          less than or equal in size to `unsigned int' this doesn't matter.
3043          If the mode is larger than `unsigned int', then synth_mult works
3044          only if the constant value exactly fits in an `unsigned int' without
3045          any truncation.  This means that multiplying by negative values does
3046          not work; results are off by 2^32 on a 32 bit machine.  */
3047
3048       if (GET_CODE (op1) == CONST_INT)
3049         {
3050           /* Attempt to handle multiplication of DImode values by negative
3051              coefficients, by performing the multiplication by a positive
3052              multiplier and then inverting the result.  */
3053           if (INTVAL (op1) < 0
3054               && GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3055             {
3056               /* Its safe to use -INTVAL (op1) even for INT_MIN, as the
3057                  result is interpreted as an unsigned coefficient.
3058                  Exclude cost of op0 from max_cost to match the cost
3059                  calculation of the synth_mult.  */
3060               max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET, speed)
3061                          - neg_cost[speed][mode];
3062               if (max_cost > 0
3063                   && choose_mult_variant (mode, -INTVAL (op1), &algorithm,
3064                                           &variant, max_cost))
3065                 {
3066                   rtx temp = expand_mult_const (mode, op0, -INTVAL (op1),
3067                                                 NULL_RTX, &algorithm,
3068                                                 variant);
3069                   return expand_unop (mode, neg_optab, temp, target, 0);
3070                 }
3071             }
3072           else coeff = INTVAL (op1);
3073         }
3074       else if (GET_CODE (op1) == CONST_DOUBLE)
3075         {
3076           /* If we are multiplying in DImode, it may still be a win
3077              to try to work with shifts and adds.  */
3078           if (CONST_DOUBLE_HIGH (op1) == 0)
3079             coeff = CONST_DOUBLE_LOW (op1);
3080           else if (CONST_DOUBLE_LOW (op1) == 0
3081                    && EXACT_POWER_OF_2_OR_ZERO_P (CONST_DOUBLE_HIGH (op1)))
3082             {
3083               int shift = floor_log2 (CONST_DOUBLE_HIGH (op1))
3084                           + HOST_BITS_PER_WIDE_INT;
3085               return expand_shift (LSHIFT_EXPR, mode, op0,
3086                                    build_int_cst (NULL_TREE, shift),
3087                                    target, unsignedp);
3088             }
3089         }
3090
3091       /* We used to test optimize here, on the grounds that it's better to
3092          produce a smaller program when -O is not used.  But this causes
3093          such a terrible slowdown sometimes that it seems better to always
3094          use synth_mult.  */
3095       if (coeff != 0)
3096         {
3097           /* Special case powers of two.  */
3098           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3099             return expand_shift (LSHIFT_EXPR, mode, op0,
3100                                  build_int_cst (NULL_TREE, floor_log2 (coeff)),
3101                                  target, unsignedp);
3102
3103           /* Exclude cost of op0 from max_cost to match the cost
3104              calculation of the synth_mult.  */
3105           max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET, speed);
3106           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3107                                    max_cost))
3108             return expand_mult_const (mode, op0, coeff, target,
3109                                       &algorithm, variant);
3110         }
3111     }
3112
3113   if (GET_CODE (op0) == CONST_DOUBLE)
3114     {
3115       rtx temp = op0;
3116       op0 = op1;
3117       op1 = temp;
3118     }
3119
3120   /* Expand x*2.0 as x+x.  */
3121   if (GET_CODE (op1) == CONST_DOUBLE
3122       && SCALAR_FLOAT_MODE_P (mode))
3123     {
3124       REAL_VALUE_TYPE d;
3125       REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3126
3127       if (REAL_VALUES_EQUAL (d, dconst2))
3128         {
3129           op0 = force_reg (GET_MODE (op0), op0);
3130           return expand_binop (mode, add_optab, op0, op0,
3131                                target, unsignedp, OPTAB_LIB_WIDEN);
3132         }
3133     }
3134
3135   /* This used to use umul_optab if unsigned, but for non-widening multiply
3136      there is no difference between signed and unsigned.  */
3137   op0 = expand_binop (mode,
3138                       ! unsignedp
3139                       && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3140                       ? smulv_optab : smul_optab,
3141                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3142   gcc_assert (op0);
3143   return op0;
3144 }
3145 \f
3146 /* Return the smallest n such that 2**n >= X.  */
3147
3148 int
3149 ceil_log2 (unsigned HOST_WIDE_INT x)
3150 {
3151   return floor_log2 (x - 1) + 1;
3152 }
3153
3154 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3155    replace division by D, and put the least significant N bits of the result
3156    in *MULTIPLIER_PTR and return the most significant bit.
3157
3158    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3159    needed precision is in PRECISION (should be <= N).
3160
3161    PRECISION should be as small as possible so this function can choose
3162    multiplier more freely.
3163
3164    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3165    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3166
3167    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3168    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3169
3170 static
3171 unsigned HOST_WIDE_INT
3172 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3173                    rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr)
3174 {
3175   HOST_WIDE_INT mhigh_hi, mlow_hi;
3176   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3177   int lgup, post_shift;
3178   int pow, pow2;
3179   unsigned HOST_WIDE_INT nl, dummy1;
3180   HOST_WIDE_INT nh, dummy2;
3181
3182   /* lgup = ceil(log2(divisor)); */
3183   lgup = ceil_log2 (d);
3184
3185   gcc_assert (lgup <= n);
3186
3187   pow = n + lgup;
3188   pow2 = n + lgup - precision;
3189
3190   /* We could handle this with some effort, but this case is much
3191      better handled directly with a scc insn, so rely on caller using
3192      that.  */
3193   gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3194
3195   /* mlow = 2^(N + lgup)/d */
3196  if (pow >= HOST_BITS_PER_WIDE_INT)
3197     {
3198       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3199       nl = 0;
3200     }
3201   else
3202     {
3203       nh = 0;
3204       nl = (unsigned HOST_WIDE_INT) 1 << pow;
3205     }
3206   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3207                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3208
3209   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3210   if (pow2 >= HOST_BITS_PER_WIDE_INT)
3211     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3212   else
3213     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3214   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3215                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3216
3217   gcc_assert (!mhigh_hi || nh - d < d);
3218   gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3219   /* Assert that mlow < mhigh.  */
3220   gcc_assert (mlow_hi < mhigh_hi
3221               || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3222
3223   /* If precision == N, then mlow, mhigh exceed 2^N
3224      (but they do not exceed 2^(N+1)).  */
3225
3226   /* Reduce to lowest terms.  */
3227   for (post_shift = lgup; post_shift > 0; post_shift--)
3228     {
3229       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3230       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3231       if (ml_lo >= mh_lo)
3232         break;
3233
3234       mlow_hi = 0;
3235       mlow_lo = ml_lo;
3236       mhigh_hi = 0;
3237       mhigh_lo = mh_lo;
3238     }
3239
3240   *post_shift_ptr = post_shift;
3241   *lgup_ptr = lgup;
3242   if (n < HOST_BITS_PER_WIDE_INT)
3243     {
3244       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3245       *multiplier_ptr = GEN_INT (mhigh_lo & mask);
3246       return mhigh_lo >= mask;
3247     }
3248   else
3249     {
3250       *multiplier_ptr = GEN_INT (mhigh_lo);
3251       return mhigh_hi;
3252     }
3253 }
3254
3255 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3256    congruent to 1 (mod 2**N).  */
3257
3258 static unsigned HOST_WIDE_INT
3259 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3260 {
3261   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3262
3263   /* The algorithm notes that the choice y = x satisfies
3264      x*y == 1 mod 2^3, since x is assumed odd.
3265      Each iteration doubles the number of bits of significance in y.  */
3266
3267   unsigned HOST_WIDE_INT mask;
3268   unsigned HOST_WIDE_INT y = x;
3269   int nbit = 3;
3270
3271   mask = (n == HOST_BITS_PER_WIDE_INT
3272           ? ~(unsigned HOST_WIDE_INT) 0
3273           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3274
3275   while (nbit < n)
3276     {
3277       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3278       nbit *= 2;
3279     }
3280   return y;
3281 }
3282
3283 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3284    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3285    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3286    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3287    become signed.
3288
3289    The result is put in TARGET if that is convenient.
3290
3291    MODE is the mode of operation.  */
3292
3293 rtx
3294 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3295                              rtx op1, rtx target, int unsignedp)
3296 {
3297   rtx tem;
3298   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3299
3300   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3301                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3302                       NULL_RTX, 0);
3303   tem = expand_and (mode, tem, op1, NULL_RTX);
3304   adj_operand
3305     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3306                      adj_operand);
3307
3308   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3309                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3310                       NULL_RTX, 0);
3311   tem = expand_and (mode, tem, op0, NULL_RTX);
3312   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3313                           target);
3314
3315   return target;
3316 }
3317
3318 /* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */
3319
3320 static rtx
3321 extract_high_half (enum machine_mode mode, rtx op)
3322 {
3323   enum machine_mode wider_mode;
3324
3325   if (mode == word_mode)
3326     return gen_highpart (mode, op);
3327
3328   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3329
3330   wider_mode = GET_MODE_WIDER_MODE (mode);
3331   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3332                      build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode)), 0, 1);
3333   return convert_modes (mode, wider_mode, op, 0);
3334 }
3335
3336 /* Like expand_mult_highpart, but only consider using a multiplication
3337    optab.  OP1 is an rtx for the constant operand.  */
3338
3339 static rtx
3340 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3341                             rtx target, int unsignedp, int max_cost)
3342 {
3343   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3344   enum machine_mode wider_mode;
3345   optab moptab;
3346   rtx tem;
3347   int size;
3348   bool speed = optimize_insn_for_speed_p ();
3349
3350   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3351
3352   wider_mode = GET_MODE_WIDER_MODE (mode);
3353   size = GET_MODE_BITSIZE (mode);
3354
3355   /* Firstly, try using a multiplication insn that only generates the needed
3356      high part of the product, and in the sign flavor of unsignedp.  */
3357   if (mul_highpart_cost[speed][mode] < max_cost)
3358     {
3359       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3360       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3361                           unsignedp, OPTAB_DIRECT);
3362       if (tem)
3363         return tem;
3364     }
3365
3366   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3367      Need to adjust the result after the multiplication.  */
3368   if (size - 1 < BITS_PER_WORD
3369       && (mul_highpart_cost[speed][mode] + 2 * shift_cost[speed][mode][size-1]
3370           + 4 * add_cost[speed][mode] < max_cost))
3371     {
3372       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3373       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3374                           unsignedp, OPTAB_DIRECT);
3375       if (tem)
3376         /* We used the wrong signedness.  Adjust the result.  */
3377         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3378                                             tem, unsignedp);
3379     }
3380
3381   /* Try widening multiplication.  */
3382   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3383   if (optab_handler (moptab, wider_mode)->insn_code != CODE_FOR_nothing
3384       && mul_widen_cost[speed][wider_mode] < max_cost)
3385     {
3386       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3387                           unsignedp, OPTAB_WIDEN);
3388       if (tem)
3389         return extract_high_half (mode, tem);
3390     }
3391
3392   /* Try widening the mode and perform a non-widening multiplication.  */
3393   if (optab_handler (smul_optab, wider_mode)->insn_code != CODE_FOR_nothing
3394       && size - 1 < BITS_PER_WORD
3395       && mul_cost[speed][wider_mode] + shift_cost[speed][mode][size-1] < max_cost)
3396     {
3397       rtx insns, wop0, wop1;
3398
3399       /* We need to widen the operands, for example to ensure the
3400          constant multiplier is correctly sign or zero extended.
3401          Use a sequence to clean-up any instructions emitted by
3402          the conversions if things don't work out.  */
3403       start_sequence ();
3404       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3405       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3406       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3407                           unsignedp, OPTAB_WIDEN);
3408       insns = get_insns ();
3409       end_sequence ();
3410
3411       if (tem)
3412         {
3413           emit_insn (insns);
3414           return extract_high_half (mode, tem);
3415         }
3416     }
3417
3418   /* Try widening multiplication of opposite signedness, and adjust.  */
3419   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3420   if (optab_handler (moptab, wider_mode)->insn_code != CODE_FOR_nothing
3421       && size - 1 < BITS_PER_WORD
3422       && (mul_widen_cost[speed][wider_mode] + 2 * shift_cost[speed][mode][size-1]
3423           + 4 * add_cost[speed][mode] < max_cost))
3424     {
3425       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3426                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3427       if (tem != 0)
3428         {
3429           tem = extract_high_half (mode, tem);
3430           /* We used the wrong signedness.  Adjust the result.  */
3431           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3432                                               target, unsignedp);
3433         }
3434     }
3435
3436   return 0;
3437 }
3438
3439 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3440    putting the high half of the result in TARGET if that is convenient,
3441    and return where the result is.  If the operation can not be performed,
3442    0 is returned.
3443
3444    MODE is the mode of operation and result.
3445
3446    UNSIGNEDP nonzero means unsigned multiply.
3447
3448    MAX_COST is the total allowed cost for the expanded RTL.  */
3449
3450 static rtx
3451 expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3452                       rtx target, int unsignedp, int max_cost)
3453 {
3454   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3455   unsigned HOST_WIDE_INT cnst1;
3456   int extra_cost;
3457   bool sign_adjust = false;
3458   enum mult_variant variant;
3459   struct algorithm alg;
3460   rtx tem;
3461   bool speed = optimize_insn_for_speed_p ();
3462
3463   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3464   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3465   gcc_assert (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT);
3466
3467   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3468
3469   /* We can't optimize modes wider than BITS_PER_WORD.
3470      ??? We might be able to perform double-word arithmetic if
3471      mode == word_mode, however all the cost calculations in
3472      synth_mult etc. assume single-word operations.  */
3473   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3474     return expand_mult_highpart_optab (mode, op0, op1, target,
3475                                        unsignedp, max_cost);
3476
3477   extra_cost = shift_cost[speed][mode][GET_MODE_BITSIZE (mode) - 1];
3478
3479   /* Check whether we try to multiply by a negative constant.  */
3480   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3481     {
3482       sign_adjust = true;
3483       extra_cost += add_cost[speed][mode];
3484     }
3485
3486   /* See whether shift/add multiplication is cheap enough.  */
3487   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3488                            max_cost - extra_cost))
3489     {
3490       /* See whether the specialized multiplication optabs are
3491          cheaper than the shift/add version.  */
3492       tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3493                                         alg.cost.cost + extra_cost);
3494       if (tem)
3495         return tem;
3496
3497       tem = convert_to_mode (wider_mode, op0, unsignedp);
3498       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3499       tem = extract_high_half (mode, tem);
3500
3501       /* Adjust result for signedness.  */
3502       if (sign_adjust)
3503         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3504
3505       return tem;
3506     }
3507   return expand_mult_highpart_optab (mode, op0, op1, target,
3508                                      unsignedp, max_cost);
3509 }
3510
3511
3512 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3513
3514 static rtx
3515 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3516 {
3517   unsigned HOST_WIDE_INT masklow, maskhigh;
3518   rtx result, temp, shift, label;
3519   int logd;
3520
3521   logd = floor_log2 (d);
3522   result = gen_reg_rtx (mode);
3523
3524   /* Avoid conditional branches when they're expensive.  */
3525   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3526       && optimize_insn_for_speed_p ())
3527     {
3528       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3529                                       mode, 0, -1);
3530       if (signmask)
3531         {
3532           signmask = force_reg (mode, signmask);
3533           masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3534           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3535
3536           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3537              which instruction sequence to use.  If logical right shifts
3538              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3539              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3540
3541           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3542           if (optab_handler (lshr_optab, mode)->insn_code == CODE_FOR_nothing
3543               || rtx_cost (temp, SET, optimize_insn_for_speed_p ()) > COSTS_N_INSNS (2))
3544             {
3545               temp = expand_binop (mode, xor_optab, op0, signmask,
3546                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3547               temp = expand_binop (mode, sub_optab, temp, signmask,
3548                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3549               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3550                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3551               temp = expand_binop (mode, xor_optab, temp, signmask,
3552                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3553               temp = expand_binop (mode, sub_optab, temp, signmask,
3554                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3555             }
3556           else
3557             {
3558               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3559                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3560               signmask = force_reg (mode, signmask);
3561
3562               temp = expand_binop (mode, add_optab, op0, signmask,
3563                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3564               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3565                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3566               temp = expand_binop (mode, sub_optab, temp, signmask,
3567                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3568             }
3569           return temp;
3570         }
3571     }
3572
3573   /* Mask contains the mode's signbit and the significant bits of the
3574      modulus.  By including the signbit in the operation, many targets
3575      can avoid an explicit compare operation in the following comparison
3576      against zero.  */
3577
3578   masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3579   if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3580     {
3581       masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3582       maskhigh = -1;
3583     }
3584   else
3585     maskhigh = (HOST_WIDE_INT) -1
3586                  << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3587
3588   temp = expand_binop (mode, and_optab, op0,
3589                        immed_double_const (masklow, maskhigh, mode),
3590                        result, 1, OPTAB_LIB_WIDEN);
3591   if (temp != result)
3592     emit_move_insn (result, temp);
3593
3594   label = gen_label_rtx ();
3595   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3596
3597   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3598                        0, OPTAB_LIB_WIDEN);
3599   masklow = (HOST_WIDE_INT) -1 << logd;
3600   maskhigh = -1;
3601   temp = expand_binop (mode, ior_optab, temp,
3602                        immed_double_const (masklow, maskhigh, mode),
3603                        result, 1, OPTAB_LIB_WIDEN);
3604   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3605                        0, OPTAB_LIB_WIDEN);
3606   if (temp != result)
3607     emit_move_insn (result, temp);
3608   emit_label (label);
3609   return result;
3610 }
3611
3612 /* Expand signed division of OP0 by a power of two D in mode MODE.
3613    This routine is only called for positive values of D.  */
3614
3615 static rtx
3616 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3617 {
3618   rtx temp, label;
3619   tree shift;
3620   int logd;
3621
3622   logd = floor_log2 (d);
3623   shift = build_int_cst (NULL_TREE, logd);
3624
3625   if (d == 2
3626       && BRANCH_COST (optimize_insn_for_speed_p (),
3627                       false) >= 1)
3628     {
3629       temp = gen_reg_rtx (mode);
3630       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3631       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3632                            0, OPTAB_LIB_WIDEN);
3633       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3634     }
3635
3636 #ifdef HAVE_conditional_move
3637   if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3638       >= 2)
3639     {
3640       rtx temp2;
3641
3642       /* ??? emit_conditional_move forces a stack adjustment via
3643          compare_from_rtx so, if the sequence is discarded, it will
3644          be lost.  Do it now instead.  */
3645       do_pending_stack_adjust ();
3646
3647       start_sequence ();
3648       temp2 = copy_to_mode_reg (mode, op0);
3649       temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3650                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3651       temp = force_reg (mode, temp);
3652
3653       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3654       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3655                                      mode, temp, temp2, mode, 0);
3656       if (temp2)
3657         {
3658           rtx seq = get_insns ();
3659           end_sequence ();
3660           emit_insn (seq);
3661           return expand_shift (RSHIFT_EXPR, mode, temp2, shift, NULL_RTX, 0);
3662         }
3663       end_sequence ();
3664     }
3665 #endif
3666
3667   if (BRANCH_COST (optimize_insn_for_speed_p (),
3668                    false) >= 2)
3669     {
3670       int ushift = GET_MODE_BITSIZE (mode) - logd;
3671
3672       temp = gen_reg_rtx (mode);
3673       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3674       if (shift_cost[optimize_insn_for_speed_p ()][mode][ushift] > COSTS_N_INSNS (1))
3675         temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3676                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3677       else
3678         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3679                              build_int_cst (NULL_TREE, ushift),
3680                              NULL_RTX, 1);
3681       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3682                            0, OPTAB_LIB_WIDEN);
3683       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3684     }
3685
3686   label = gen_label_rtx ();
3687   temp = copy_to_mode_reg (mode, op0);
3688   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3689   expand_inc (temp, GEN_INT (d - 1));
3690   emit_label (label);
3691   return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3692 }
3693 \f
3694 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3695    if that is convenient, and returning where the result is.
3696    You may request either the quotient or the remainder as the result;
3697    specify REM_FLAG nonzero to get the remainder.
3698
3699    CODE is the expression code for which kind of division this is;
3700    it controls how rounding is done.  MODE is the machine mode to use.
3701    UNSIGNEDP nonzero means do unsigned division.  */
3702
3703 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3704    and then correct it by or'ing in missing high bits
3705    if result of ANDI is nonzero.
3706    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3707    This could optimize to a bfexts instruction.
3708    But C doesn't use these operations, so their optimizations are
3709    left for later.  */
3710 /* ??? For modulo, we don't actually need the highpart of the first product,
3711    the low part will do nicely.  And for small divisors, the second multiply
3712    can also be a low-part only multiply or even be completely left out.
3713    E.g. to calculate the remainder of a division by 3 with a 32 bit
3714    multiply, multiply with 0x55555556 and extract the upper two bits;
3715    the result is exact for inputs up to 0x1fffffff.
3716    The input range can be reduced by using cross-sum rules.
3717    For odd divisors >= 3, the following table gives right shift counts
3718    so that if a number is shifted by an integer multiple of the given
3719    amount, the remainder stays the same:
3720    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3721    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3722    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3723    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3724    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3725
3726    Cross-sum rules for even numbers can be derived by leaving as many bits
3727    to the right alone as the divisor has zeros to the right.
3728    E.g. if x is an unsigned 32 bit number:
3729    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3730    */
3731
3732 rtx
3733 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3734                rtx op0, rtx op1, rtx target, int unsignedp)
3735 {
3736   enum machine_mode compute_mode;
3737   rtx tquotient;
3738   rtx quotient = 0, remainder = 0;
3739   rtx last;
3740   int size;
3741   rtx insn, set;
3742   optab optab1, optab2;
3743   int op1_is_constant, op1_is_pow2 = 0;
3744   int max_cost, extra_cost;
3745   static HOST_WIDE_INT last_div_const = 0;
3746   static HOST_WIDE_INT ext_op1;
3747   bool speed = optimize_insn_for_speed_p ();
3748
3749   op1_is_constant = GET_CODE (op1) == CONST_INT;
3750   if (op1_is_constant)
3751     {
3752       ext_op1 = INTVAL (op1);
3753       if (unsignedp)
3754         ext_op1 &= GET_MODE_MASK (mode);
3755       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3756                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3757     }
3758
3759   /*
3760      This is the structure of expand_divmod:
3761
3762      First comes code to fix up the operands so we can perform the operations
3763      correctly and efficiently.
3764
3765      Second comes a switch statement with code specific for each rounding mode.
3766      For some special operands this code emits all RTL for the desired
3767      operation, for other cases, it generates only a quotient and stores it in
3768      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3769      to indicate that it has not done anything.
3770
3771      Last comes code that finishes the operation.  If QUOTIENT is set and
3772      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3773      QUOTIENT is not set, it is computed using trunc rounding.
3774
3775      We try to generate special code for division and remainder when OP1 is a
3776      constant.  If |OP1| = 2**n we can use shifts and some other fast
3777      operations.  For other values of OP1, we compute a carefully selected
3778      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3779      by m.
3780
3781      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3782      half of the product.  Different strategies for generating the product are
3783      implemented in expand_mult_highpart.
3784
3785      If what we actually want is the remainder, we generate that by another
3786      by-constant multiplication and a subtraction.  */
3787
3788   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3789      code below will malfunction if we are, so check here and handle
3790      the special case if so.  */
3791   if (op1 == const1_rtx)
3792     return rem_flag ? const0_rtx : op0;
3793
3794     /* When dividing by -1, we could get an overflow.
3795      negv_optab can handle overflows.  */
3796   if (! unsignedp && op1 == constm1_rtx)
3797     {
3798       if (rem_flag)
3799         return const0_rtx;
3800       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3801                           ? negv_optab : neg_optab, op0, target, 0);
3802     }
3803
3804   if (target
3805       /* Don't use the function value register as a target
3806          since we have to read it as well as write it,
3807          and function-inlining gets confused by this.  */
3808       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3809           /* Don't clobber an operand while doing a multi-step calculation.  */
3810           || ((rem_flag || op1_is_constant)
3811               && (reg_mentioned_p (target, op0)
3812                   || (MEM_P (op0) && MEM_P (target))))
3813           || reg_mentioned_p (target, op1)
3814           || (MEM_P (op1) && MEM_P (target))))
3815     target = 0;
3816
3817   /* Get the mode in which to perform this computation.  Normally it will
3818      be MODE, but sometimes we can't do the desired operation in MODE.
3819      If so, pick a wider mode in which we can do the operation.  Convert
3820      to that mode at the start to avoid repeated conversions.
3821
3822      First see what operations we need.  These depend on the expression
3823      we are evaluating.  (We assume that divxx3 insns exist under the
3824      same conditions that modxx3 insns and that these insns don't normally
3825      fail.  If these assumptions are not correct, we may generate less
3826      efficient code in some cases.)
3827
3828      Then see if we find a mode in which we can open-code that operation
3829      (either a division, modulus, or shift).  Finally, check for the smallest
3830      mode for which we can do the operation with a library call.  */
3831
3832   /* We might want to refine this now that we have division-by-constant
3833      optimization.  Since expand_mult_highpart tries so many variants, it is
3834      not straightforward to generalize this.  Maybe we should make an array
3835      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3836
3837   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3838             ? (unsignedp ? lshr_optab : ashr_optab)
3839             : (unsignedp ? udiv_optab : sdiv_optab));
3840   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3841             ? optab1
3842             : (unsignedp ? udivmod_optab : sdivmod_optab));
3843
3844   for (compute_mode = mode; compute_mode != VOIDmode;
3845        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3846     if (optab_handler (optab1, compute_mode)->insn_code != CODE_FOR_nothing
3847         || optab_handler (optab2, compute_mode)->insn_code != CODE_FOR_nothing)
3848       break;
3849
3850   if (compute_mode == VOIDmode)
3851     for (compute_mode = mode; compute_mode != VOIDmode;
3852          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3853       if (optab_libfunc (optab1, compute_mode)
3854           || optab_libfunc (optab2, compute_mode))
3855         break;
3856
3857   /* If we still couldn't find a mode, use MODE, but expand_binop will
3858      probably die.  */
3859   if (compute_mode == VOIDmode)
3860     compute_mode = mode;
3861
3862   if (target && GET_MODE (target) == compute_mode)
3863     tquotient = target;
3864   else
3865     tquotient = gen_reg_rtx (compute_mode);
3866
3867   size = GET_MODE_BITSIZE (compute_mode);
3868 #if 0
3869   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3870      (mode), and thereby get better code when OP1 is a constant.  Do that
3871      later.  It will require going over all usages of SIZE below.  */
3872   size = GET_MODE_BITSIZE (mode);
3873 #endif
3874
3875   /* Only deduct something for a REM if the last divide done was
3876      for a different constant.   Then set the constant of the last
3877      divide.  */
3878   max_cost = unsignedp ? udiv_cost[speed][compute_mode] : sdiv_cost[speed][compute_mode];
3879   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
3880                      && INTVAL (op1) == last_div_const))
3881     max_cost -= mul_cost[speed][compute_mode] + add_cost[speed][compute_mode];
3882
3883   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3884
3885   /* Now convert to the best mode to use.  */
3886   if (compute_mode != mode)
3887     {
3888       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3889       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3890
3891       /* convert_modes may have placed op1 into a register, so we
3892          must recompute the following.  */
3893       op1_is_constant = GET_CODE (op1) == CONST_INT;
3894       op1_is_pow2 = (op1_is_constant
3895                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3896                           || (! unsignedp
3897                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3898     }
3899
3900   /* If one of the operands is a volatile MEM, copy it into a register.  */
3901
3902   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3903     op0 = force_reg (compute_mode, op0);
3904   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3905     op1 = force_reg (compute_mode, op1);
3906
3907   /* If we need the remainder or if OP1 is constant, we need to
3908      put OP0 in a register in case it has any queued subexpressions.  */
3909   if (rem_flag || op1_is_constant)
3910     op0 = force_reg (compute_mode, op0);
3911
3912   last = get_last_insn ();
3913
3914   /* Promote floor rounding to trunc rounding for unsigned operations.  */
3915   if (unsignedp)
3916     {
3917       if (code == FLOOR_DIV_EXPR)
3918         code = TRUNC_DIV_EXPR;
3919       if (code == FLOOR_MOD_EXPR)
3920         code = TRUNC_MOD_EXPR;
3921       if (code == EXACT_DIV_EXPR && op1_is_pow2)
3922         code = TRUNC_DIV_EXPR;
3923     }
3924
3925   if (op1 != const0_rtx)
3926     switch (code)
3927       {
3928       case TRUNC_MOD_EXPR:
3929       case TRUNC_DIV_EXPR:
3930         if (op1_is_constant)
3931           {
3932             if (unsignedp)
3933               {
3934                 unsigned HOST_WIDE_INT mh;
3935                 int pre_shift, post_shift;
3936                 int dummy;
3937                 rtx ml;
3938                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
3939                                             & GET_MODE_MASK (compute_mode));
3940
3941                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3942                   {
3943                     pre_shift = floor_log2 (d);
3944                     if (rem_flag)
3945                       {
3946                         remainder
3947                           = expand_binop (compute_mode, and_optab, op0,
3948                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3949                                           remainder, 1,
3950                                           OPTAB_LIB_WIDEN);
3951                         if (remainder)
3952                           return gen_lowpart (mode, remainder);
3953                       }
3954                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3955                                              build_int_cst (NULL_TREE,
3956                                                             pre_shift),
3957                                              tquotient, 1);
3958                   }
3959                 else if (size <= HOST_BITS_PER_WIDE_INT)
3960                   {
3961                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
3962                       {
3963                         /* Most significant bit of divisor is set; emit an scc
3964                            insn.  */
3965                         quotient = emit_store_flag (tquotient, GEU, op0, op1,
3966                                                     compute_mode, 1, 1);
3967                         if (quotient == 0)
3968                           goto fail1;
3969                       }
3970                     else
3971                       {
3972                         /* Find a suitable multiplier and right shift count
3973                            instead of multiplying with D.  */
3974
3975                         mh = choose_multiplier (d, size, size,
3976                                                 &ml, &post_shift, &dummy);
3977
3978                         /* If the suggested multiplier is more than SIZE bits,
3979                            we can do better for even divisors, using an
3980                            initial right shift.  */
3981                         if (mh != 0 && (d & 1) == 0)
3982                           {
3983                             pre_shift = floor_log2 (d & -d);
3984                             mh = choose_multiplier (d >> pre_shift, size,
3985                                                     size - pre_shift,
3986                                                     &ml, &post_shift, &dummy);
3987                             gcc_assert (!mh);
3988                           }
3989                         else
3990                           pre_shift = 0;
3991
3992                         if (mh != 0)
3993                           {
3994                             rtx t1, t2, t3, t4;
3995
3996                             if (post_shift - 1 >= BITS_PER_WORD)
3997                               goto fail1;
3998
3999                             extra_cost
4000                               = (shift_cost[speed][compute_mode][post_shift - 1]
4001                                  + shift_cost[speed][compute_mode][1]
4002                                  + 2 * add_cost[speed][compute_mode]);
4003                             t1 = expand_mult_highpart (compute_mode, op0, ml,
4004                                                        NULL_RTX, 1,
4005                                                        max_cost - extra_cost);
4006                             if (t1 == 0)
4007                               goto fail1;
4008                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4009                                                                op0, t1),
4010                                                 NULL_RTX);
4011                             t3 = expand_shift
4012                               (RSHIFT_EXPR, compute_mode, t2,
4013                                build_int_cst (NULL_TREE, 1),
4014                                NULL_RTX,1);
4015                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4016                                                               t1, t3),
4017                                                 NULL_RTX);
4018                             quotient = expand_shift
4019                               (RSHIFT_EXPR, compute_mode, t4,
4020                                build_int_cst (NULL_TREE, post_shift - 1),
4021                                tquotient, 1);
4022                           }
4023                         else
4024                           {
4025                             rtx t1, t2;
4026
4027                             if (pre_shift >= BITS_PER_WORD
4028                                 || post_shift >= BITS_PER_WORD)
4029                               goto fail1;
4030
4031                             t1 = expand_shift
4032                               (RSHIFT_EXPR, compute_mode, op0,
4033                                build_int_cst (NULL_TREE, pre_shift),
4034                                NULL_RTX, 1);
4035                             extra_cost
4036                               = (shift_cost[speed][compute_mode][pre_shift]
4037                                  + shift_cost[speed][compute_mode][post_shift]);
4038                             t2 = expand_mult_highpart (compute_mode, t1, ml,
4039                                                        NULL_RTX, 1,
4040                                                        max_cost - extra_cost);
4041                             if (t2 == 0)
4042                               goto fail1;
4043                             quotient = expand_shift
4044                               (RSHIFT_EXPR, compute_mode, t2,
4045                                build_int_cst (NULL_TREE, post_shift),
4046                                tquotient, 1);
4047                           }
4048                       }
4049                   }
4050                 else            /* Too wide mode to use tricky code */
4051                   break;
4052
4053                 insn = get_last_insn ();
4054                 if (insn != last
4055                     && (set = single_set (insn)) != 0
4056                     && SET_DEST (set) == quotient)
4057                   set_unique_reg_note (insn,
4058                                        REG_EQUAL,
4059                                        gen_rtx_UDIV (compute_mode, op0, op1));
4060               }
4061             else                /* TRUNC_DIV, signed */
4062               {
4063                 unsigned HOST_WIDE_INT ml;
4064                 int lgup, post_shift;
4065                 rtx mlr;
4066                 HOST_WIDE_INT d = INTVAL (op1);
4067                 unsigned HOST_WIDE_INT abs_d;
4068
4069                 /* Since d might be INT_MIN, we have to cast to
4070                    unsigned HOST_WIDE_INT before negating to avoid
4071                    undefined signed overflow.  */
4072                 abs_d = (d >= 0
4073                          ? (unsigned HOST_WIDE_INT) d
4074                          : - (unsigned HOST_WIDE_INT) d);
4075
4076                 /* n rem d = n rem -d */
4077                 if (rem_flag && d < 0)
4078                   {
4079                     d = abs_d;
4080                     op1 = gen_int_mode (abs_d, compute_mode);
4081                   }
4082
4083                 if (d == 1)
4084                   quotient = op0;
4085                 else if (d == -1)
4086                   quotient = expand_unop (compute_mode, neg_optab, op0,
4087                                           tquotient, 0);
4088                 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4089                   {
4090                     /* This case is not handled correctly below.  */
4091                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4092                                                 compute_mode, 1, 1);
4093                     if (quotient == 0)
4094                       goto fail1;
4095                   }
4096                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4097                          && (rem_flag ? smod_pow2_cheap[speed][compute_mode]
4098                                       : sdiv_pow2_cheap[speed][compute_mode])
4099                          /* We assume that cheap metric is true if the
4100                             optab has an expander for this mode.  */
4101                          && ((optab_handler ((rem_flag ? smod_optab
4102                                               : sdiv_optab),
4103                                               compute_mode)->insn_code
4104                               != CODE_FOR_nothing)
4105                              || (optab_handler(sdivmod_optab,
4106                                                compute_mode)
4107                                  ->insn_code != CODE_FOR_nothing)))
4108                   ;
4109                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4110                   {
4111                     if (rem_flag)
4112                       {
4113                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4114                         if (remainder)
4115                           return gen_lowpart (mode, remainder);
4116                       }
4117
4118                     if (sdiv_pow2_cheap[speed][compute_mode]
4119                         && ((optab_handler (sdiv_optab, compute_mode)->insn_code
4120                              != CODE_FOR_nothing)
4121                             || (optab_handler (sdivmod_optab, compute_mode)->insn_code
4122                                 != CODE_FOR_nothing)))
4123                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4124                                                 compute_mode, op0,
4125                                                 gen_int_mode (abs_d,
4126                                                               compute_mode),
4127                                                 NULL_RTX, 0);
4128                     else
4129                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4130
4131                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4132                        negate the quotient.  */
4133                     if (d < 0)
4134                       {
4135                         insn = get_last_insn ();
4136                         if (insn != last
4137                             && (set = single_set (insn)) != 0
4138                             && SET_DEST (set) == quotient
4139                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4140                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4141                           set_unique_reg_note (insn,
4142                                                REG_EQUAL,
4143                                                gen_rtx_DIV (compute_mode,
4144                                                             op0,
4145                                                             GEN_INT
4146                                                             (trunc_int_for_mode
4147                                                              (abs_d,
4148                                                               compute_mode))));
4149
4150                         quotient = expand_unop (compute_mode, neg_optab,
4151                                                 quotient, quotient, 0);
4152                       }
4153                   }
4154                 else if (size <= HOST_BITS_PER_WIDE_INT)
4155                   {
4156                     choose_multiplier (abs_d, size, size - 1,
4157                                        &mlr, &post_shift, &lgup);
4158                     ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
4159                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4160                       {
4161                         rtx t1, t2, t3;
4162
4163                         if (post_shift >= BITS_PER_WORD
4164                             || size - 1 >= BITS_PER_WORD)
4165                           goto fail1;
4166
4167                         extra_cost = (shift_cost[speed][compute_mode][post_shift]
4168                                       + shift_cost[speed][compute_mode][size - 1]
4169                                       + add_cost[speed][compute_mode]);
4170                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4171                                                    NULL_RTX, 0,
4172                                                    max_cost - extra_cost);
4173                         if (t1 == 0)
4174                           goto fail1;
4175                         t2 = expand_shift
4176                           (RSHIFT_EXPR, compute_mode, t1,
4177                            build_int_cst (NULL_TREE, post_shift),
4178                            NULL_RTX, 0);
4179                         t3 = expand_shift
4180                           (RSHIFT_EXPR, compute_mode, op0,
4181                            build_int_cst (NULL_TREE, size - 1),
4182                            NULL_RTX, 0);
4183                         if (d < 0)
4184                           quotient
4185                             = force_operand (gen_rtx_MINUS (compute_mode,
4186                                                             t3, t2),
4187                                              tquotient);
4188                         else
4189                           quotient
4190                             = force_operand (gen_rtx_MINUS (compute_mode,
4191                                                             t2, t3),
4192                                              tquotient);
4193                       }
4194                     else
4195                       {
4196                         rtx t1, t2, t3, t4;
4197
4198                         if (post_shift >= BITS_PER_WORD
4199                             || size - 1 >= BITS_PER_WORD)
4200                           goto fail1;
4201
4202                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4203                         mlr = gen_int_mode (ml, compute_mode);
4204                         extra_cost = (shift_cost[speed][compute_mode][post_shift]
4205                                       + shift_cost[speed][compute_mode][size - 1]
4206                                       + 2 * add_cost[speed][compute_mode]);
4207                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4208                                                    NULL_RTX, 0,
4209                                                    max_cost - extra_cost);
4210                         if (t1 == 0)
4211                           goto fail1;
4212                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4213                                                           t1, op0),
4214                                             NULL_RTX);
4215                         t3 = expand_shift
4216                           (RSHIFT_EXPR, compute_mode, t2,
4217                            build_int_cst (NULL_TREE, post_shift),
4218                            NULL_RTX, 0);
4219                         t4 = expand_shift
4220                           (RSHIFT_EXPR, compute_mode, op0,
4221                            build_int_cst (NULL_TREE, size - 1),
4222                            NULL_RTX, 0);
4223                         if (d < 0)
4224                           quotient
4225                             = force_operand (gen_rtx_MINUS (compute_mode,
4226                                                             t4, t3),
4227                                              tquotient);
4228                         else
4229                           quotient
4230                             = force_operand (gen_rtx_MINUS (compute_mode,
4231                                                             t3, t4),
4232                                              tquotient);
4233                       }
4234                   }
4235                 else            /* Too wide mode to use tricky code */
4236                   break;
4237
4238                 insn = get_last_insn ();
4239                 if (insn != last
4240                     && (set = single_set (insn)) != 0
4241                     && SET_DEST (set) == quotient)
4242                   set_unique_reg_note (insn,
4243                                        REG_EQUAL,
4244                                        gen_rtx_DIV (compute_mode, op0, op1));
4245               }
4246             break;
4247           }
4248       fail1:
4249         delete_insns_since (last);
4250         break;
4251
4252       case FLOOR_DIV_EXPR:
4253       case FLOOR_MOD_EXPR:
4254       /* We will come here only for signed operations.  */
4255         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4256           {
4257             unsigned HOST_WIDE_INT mh;
4258             int pre_shift, lgup, post_shift;
4259             HOST_WIDE_INT d = INTVAL (op1);
4260             rtx ml;
4261
4262             if (d > 0)
4263               {
4264                 /* We could just as easily deal with negative constants here,
4265                    but it does not seem worth the trouble for GCC 2.6.  */
4266                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4267                   {
4268                     pre_shift = floor_log2 (d);
4269                     if (rem_flag)
4270                       {
4271                         remainder = expand_binop (compute_mode, and_optab, op0,
4272                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4273                                                   remainder, 0, OPTAB_LIB_WIDEN);
4274                         if (remainder)
4275                           return gen_lowpart (mode, remainder);
4276                       }
4277                     quotient = expand_shift
4278                       (RSHIFT_EXPR, compute_mode, op0,
4279                        build_int_cst (NULL_TREE, pre_shift),
4280                        tquotient, 0);
4281                   }
4282                 else
4283                   {
4284                     rtx t1, t2, t3, t4;
4285
4286                     mh = choose_multiplier (d, size, size - 1,
4287                                             &ml, &post_shift, &lgup);
4288                     gcc_assert (!mh);
4289
4290                     if (post_shift < BITS_PER_WORD
4291                         && size - 1 < BITS_PER_WORD)
4292                       {
4293                         t1 = expand_shift
4294                           (RSHIFT_EXPR, compute_mode, op0,
4295                            build_int_cst (NULL_TREE, size - 1),
4296                            NULL_RTX, 0);
4297                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4298                                            NULL_RTX, 0, OPTAB_WIDEN);
4299                         extra_cost = (shift_cost[speed][compute_mode][post_shift]
4300                                       + shift_cost[speed][compute_mode][size - 1]
4301                                       + 2 * add_cost[speed][compute_mode]);
4302                         t3 = expand_mult_highpart (compute_mode, t2, ml,
4303                                                    NULL_RTX, 1,
4304                                                    max_cost - extra_cost);
4305                         if (t3 != 0)
4306                           {
4307                             t4 = expand_shift
4308                               (RSHIFT_EXPR, compute_mode, t3,
4309                                build_int_cst (NULL_TREE, post_shift),
4310                                NULL_RTX, 1);
4311                             quotient = expand_binop (compute_mode, xor_optab,
4312                                                      t4, t1, tquotient, 0,
4313                                                      OPTAB_WIDEN);
4314                           }
4315                       }
4316                   }
4317               }
4318             else
4319               {
4320                 rtx nsign, t1, t2, t3, t4;
4321                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4322                                                   op0, constm1_rtx), NULL_RTX);
4323                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4324                                    0, OPTAB_WIDEN);
4325                 nsign = expand_shift
4326                   (RSHIFT_EXPR, compute_mode, t2,
4327                    build_int_cst (NULL_TREE, size - 1),
4328                    NULL_RTX, 0);
4329                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4330                                     NULL_RTX);
4331                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4332                                     NULL_RTX, 0);
4333                 if (t4)
4334                   {
4335                     rtx t5;
4336                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4337                                       NULL_RTX, 0);
4338                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4339                                                             t4, t5),
4340                                               tquotient);
4341                   }
4342               }
4343           }
4344
4345         if (quotient != 0)
4346           break;
4347         delete_insns_since (last);
4348
4349         /* Try using an instruction that produces both the quotient and
4350            remainder, using truncation.  We can easily compensate the quotient
4351            or remainder to get floor rounding, once we have the remainder.
4352            Notice that we compute also the final remainder value here,
4353            and return the result right away.  */
4354         if (target == 0 || GET_MODE (target) != compute_mode)
4355           target = gen_reg_rtx (compute_mode);
4356
4357         if (rem_flag)
4358           {
4359             remainder
4360               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4361             quotient = gen_reg_rtx (compute_mode);
4362           }
4363         else
4364           {
4365             quotient
4366               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4367             remainder = gen_reg_rtx (compute_mode);
4368           }
4369
4370         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4371                                  quotient, remainder, 0))
4372           {
4373             /* This could be computed with a branch-less sequence.
4374                Save that for later.  */
4375             rtx tem;
4376             rtx label = gen_label_rtx ();
4377             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4378             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4379                                 NULL_RTX, 0, OPTAB_WIDEN);
4380             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4381             expand_dec (quotient, const1_rtx);
4382             expand_inc (remainder, op1);
4383             emit_label (label);
4384             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4385           }
4386
4387         /* No luck with division elimination or divmod.  Have to do it
4388            by conditionally adjusting op0 *and* the result.  */
4389         {
4390           rtx label1, label2, label3, label4, label5;
4391           rtx adjusted_op0;
4392           rtx tem;
4393
4394           quotient = gen_reg_rtx (compute_mode);
4395           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4396           label1 = gen_label_rtx ();
4397           label2 = gen_label_rtx ();
4398           label3 = gen_label_rtx ();
4399           label4 = gen_label_rtx ();
4400           label5 = gen_label_rtx ();
4401           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4402           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4403           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4404                               quotient, 0, OPTAB_LIB_WIDEN);
4405           if (tem != quotient)
4406             emit_move_insn (quotient, tem);
4407           emit_jump_insn (gen_jump (label5));
4408           emit_barrier ();
4409           emit_label (label1);
4410           expand_inc (adjusted_op0, const1_rtx);
4411           emit_jump_insn (gen_jump (label4));
4412           emit_barrier ();
4413           emit_label (label2);
4414           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4415           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4416                               quotient, 0, OPTAB_LIB_WIDEN);
4417           if (tem != quotient)
4418             emit_move_insn (quotient, tem);
4419           emit_jump_insn (gen_jump (label5));
4420           emit_barrier ();
4421           emit_label (label3);
4422           expand_dec (adjusted_op0, const1_rtx);
4423           emit_label (label4);
4424           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4425                               quotient, 0, OPTAB_LIB_WIDEN);
4426           if (tem != quotient)
4427             emit_move_insn (quotient, tem);
4428           expand_dec (quotient, const1_rtx);
4429           emit_label (label5);
4430         }
4431         break;
4432
4433       case CEIL_DIV_EXPR:
4434       case CEIL_MOD_EXPR:
4435         if (unsignedp)
4436           {
4437             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4438               {
4439                 rtx t1, t2, t3;
4440                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4441                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4442                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4443                                    tquotient, 1);
4444                 t2 = expand_binop (compute_mode, and_optab, op0,
4445                                    GEN_INT (d - 1),
4446                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4447                 t3 = gen_reg_rtx (compute_mode);
4448                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4449                                       compute_mode, 1, 1);
4450                 if (t3 == 0)
4451                   {
4452                     rtx lab;
4453                     lab = gen_label_rtx ();
4454                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4455                     expand_inc (t1, const1_rtx);
4456                     emit_label (lab);
4457                     quotient = t1;
4458                   }
4459                 else
4460                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4461                                                           t1, t3),
4462                                             tquotient);
4463                 break;
4464               }
4465
4466             /* Try using an instruction that produces both the quotient and
4467                remainder, using truncation.  We can easily compensate the
4468                quotient or remainder to get ceiling rounding, once we have the
4469                remainder.  Notice that we compute also the final remainder
4470                value here, and return the result right away.  */
4471             if (target == 0 || GET_MODE (target) != compute_mode)
4472               target = gen_reg_rtx (compute_mode);
4473
4474             if (rem_flag)
4475               {
4476                 remainder = (REG_P (target)
4477                              ? target : gen_reg_rtx (compute_mode));
4478                 quotient = gen_reg_rtx (compute_mode);
4479               }
4480             else
4481               {
4482                 quotient = (REG_P (target)
4483                             ? target : gen_reg_rtx (compute_mode));
4484                 remainder = gen_reg_rtx (compute_mode);
4485               }
4486
4487             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4488                                      remainder, 1))
4489               {
4490                 /* This could be computed with a branch-less sequence.
4491                    Save that for later.  */
4492                 rtx label = gen_label_rtx ();
4493                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4494                                  compute_mode, label);
4495                 expand_inc (quotient, const1_rtx);
4496                 expand_dec (remainder, op1);
4497                 emit_label (label);
4498                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4499               }
4500
4501             /* No luck with division elimination or divmod.  Have to do it
4502                by conditionally adjusting op0 *and* the result.  */
4503             {
4504               rtx label1, label2;
4505               rtx adjusted_op0, tem;
4506
4507               quotient = gen_reg_rtx (compute_mode);
4508               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4509               label1 = gen_label_rtx ();
4510               label2 = gen_label_rtx ();
4511               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4512                                compute_mode, label1);
4513               emit_move_insn  (quotient, const0_rtx);
4514               emit_jump_insn (gen_jump (label2));
4515               emit_barrier ();
4516               emit_label (label1);
4517               expand_dec (adjusted_op0, const1_rtx);
4518               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4519                                   quotient, 1, OPTAB_LIB_WIDEN);
4520               if (tem != quotient)
4521                 emit_move_insn (quotient, tem);
4522               expand_inc (quotient, const1_rtx);
4523               emit_label (label2);
4524             }
4525           }
4526         else /* signed */
4527           {
4528             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4529                 && INTVAL (op1) >= 0)
4530               {
4531                 /* This is extremely similar to the code for the unsigned case
4532                    above.  For 2.7 we should merge these variants, but for
4533                    2.6.1 I don't want to touch the code for unsigned since that
4534                    get used in C.  The signed case will only be used by other
4535                    languages (Ada).  */
4536
4537                 rtx t1, t2, t3;
4538                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4539                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4540                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4541                                    tquotient, 0);
4542                 t2 = expand_binop (compute_mode, and_optab, op0,
4543                                    GEN_INT (d - 1),
4544                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4545                 t3 = gen_reg_rtx (compute_mode);
4546                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4547                                       compute_mode, 1, 1);
4548                 if (t3 == 0)
4549                   {
4550                     rtx lab;
4551                     lab = gen_label_rtx ();
4552                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4553                     expand_inc (t1, const1_rtx);
4554                     emit_label (lab);
4555                     quotient = t1;
4556                   }
4557                 else
4558                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4559                                                           t1, t3),
4560                                             tquotient);
4561                 break;
4562               }
4563
4564             /* Try using an instruction that produces both the quotient and
4565                remainder, using truncation.  We can easily compensate the
4566                quotient or remainder to get ceiling rounding, once we have the
4567                remainder.  Notice that we compute also the final remainder
4568                value here, and return the result right away.  */
4569             if (target == 0 || GET_MODE (target) != compute_mode)
4570               target = gen_reg_rtx (compute_mode);
4571             if (rem_flag)
4572               {
4573                 remainder= (REG_P (target)
4574                             ? target : gen_reg_rtx (compute_mode));
4575                 quotient = gen_reg_rtx (compute_mode);
4576               }
4577             else
4578               {
4579                 quotient = (REG_P (target)
4580                             ? target : gen_reg_rtx (compute_mode));
4581                 remainder = gen_reg_rtx (compute_mode);
4582               }
4583
4584             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4585                                      remainder, 0))
4586               {
4587                 /* This could be computed with a branch-less sequence.
4588                    Save that for later.  */
4589                 rtx tem;
4590                 rtx label = gen_label_rtx ();
4591                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4592                                  compute_mode, label);
4593                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4594                                     NULL_RTX, 0, OPTAB_WIDEN);
4595                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4596                 expand_inc (quotient, const1_rtx);
4597                 expand_dec (remainder, op1);
4598                 emit_label (label);
4599                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4600               }
4601
4602             /* No luck with division elimination or divmod.  Have to do it
4603                by conditionally adjusting op0 *and* the result.  */
4604             {
4605               rtx label1, label2, label3, label4, label5;
4606               rtx adjusted_op0;
4607               rtx tem;
4608
4609               quotient = gen_reg_rtx (compute_mode);
4610               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4611               label1 = gen_label_rtx ();
4612               label2 = gen_label_rtx ();
4613               label3 = gen_label_rtx ();
4614               label4 = gen_label_rtx ();
4615               label5 = gen_label_rtx ();
4616               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4617               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4618                                compute_mode, label1);
4619               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4620                                   quotient, 0, OPTAB_LIB_WIDEN);
4621               if (tem != quotient)
4622                 emit_move_insn (quotient, tem);
4623               emit_jump_insn (gen_jump (label5));
4624               emit_barrier ();
4625               emit_label (label1);
4626               expand_dec (adjusted_op0, const1_rtx);
4627               emit_jump_insn (gen_jump (label4));
4628               emit_barrier ();
4629               emit_label (label2);
4630               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4631                                compute_mode, label3);
4632               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4633                                   quotient, 0, OPTAB_LIB_WIDEN);
4634               if (tem != quotient)
4635                 emit_move_insn (quotient, tem);
4636               emit_jump_insn (gen_jump (label5));
4637               emit_barrier ();
4638               emit_label (label3);
4639               expand_inc (adjusted_op0, const1_rtx);
4640               emit_label (label4);
4641               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4642                                   quotient, 0, OPTAB_LIB_WIDEN);
4643               if (tem != quotient)
4644                 emit_move_insn (quotient, tem);
4645               expand_inc (quotient, const1_rtx);
4646               emit_label (label5);
4647             }
4648           }
4649         break;
4650
4651       case EXACT_DIV_EXPR:
4652         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4653           {
4654             HOST_WIDE_INT d = INTVAL (op1);
4655             unsigned HOST_WIDE_INT ml;
4656             int pre_shift;
4657             rtx t1;
4658
4659             pre_shift = floor_log2 (d & -d);
4660             ml = invert_mod2n (d >> pre_shift, size);
4661             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4662                                build_int_cst (NULL_TREE, pre_shift),
4663                                NULL_RTX, unsignedp);
4664             quotient = expand_mult (compute_mode, t1,
4665                                     gen_int_mode (ml, compute_mode),
4666                                     NULL_RTX, 1);
4667
4668             insn = get_last_insn ();
4669             set_unique_reg_note (insn,
4670                                  REG_EQUAL,
4671                                  gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4672                                                  compute_mode,
4673                                                  op0, op1));
4674           }
4675         break;
4676
4677       case ROUND_DIV_EXPR:
4678       case ROUND_MOD_EXPR:
4679         if (unsignedp)
4680           {
4681             rtx tem;
4682             rtx label;
4683             label = gen_label_rtx ();
4684             quotient = gen_reg_rtx (compute_mode);
4685             remainder = gen_reg_rtx (compute_mode);
4686             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4687               {
4688                 rtx tem;
4689                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4690                                          quotient, 1, OPTAB_LIB_WIDEN);
4691                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4692                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4693                                           remainder, 1, OPTAB_LIB_WIDEN);
4694               }
4695             tem = plus_constant (op1, -1);
4696             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4697                                 build_int_cst (NULL_TREE, 1),
4698                                 NULL_RTX, 1);
4699             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4700             expand_inc (quotient, const1_rtx);
4701             expand_dec (remainder, op1);
4702             emit_label (label);
4703           }
4704         else
4705           {
4706             rtx abs_rem, abs_op1, tem, mask;
4707             rtx label;
4708             label = gen_label_rtx ();
4709             quotient = gen_reg_rtx (compute_mode);
4710             remainder = gen_reg_rtx (compute_mode);
4711             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4712               {
4713                 rtx tem;
4714                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4715                                          quotient, 0, OPTAB_LIB_WIDEN);
4716                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4717                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4718                                           remainder, 0, OPTAB_LIB_WIDEN);
4719               }
4720             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4721             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4722             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4723                                 build_int_cst (NULL_TREE, 1),
4724                                 NULL_RTX, 1);
4725             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4726             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4727                                 NULL_RTX, 0, OPTAB_WIDEN);
4728             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4729                                  build_int_cst (NULL_TREE, size - 1),
4730                                  NULL_RTX, 0);
4731             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4732                                 NULL_RTX, 0, OPTAB_WIDEN);
4733             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4734                                 NULL_RTX, 0, OPTAB_WIDEN);
4735             expand_inc (quotient, tem);
4736             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4737                                 NULL_RTX, 0, OPTAB_WIDEN);
4738             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4739                                 NULL_RTX, 0, OPTAB_WIDEN);
4740             expand_dec (remainder, tem);
4741             emit_label (label);
4742           }
4743         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4744
4745       default:
4746         gcc_unreachable ();
4747       }
4748
4749   if (quotient == 0)
4750     {
4751       if (target && GET_MODE (target) != compute_mode)
4752         target = 0;
4753
4754       if (rem_flag)
4755         {
4756           /* Try to produce the remainder without producing the quotient.
4757              If we seem to have a divmod pattern that does not require widening,
4758              don't try widening here.  We should really have a WIDEN argument
4759              to expand_twoval_binop, since what we'd really like to do here is
4760              1) try a mod insn in compute_mode
4761              2) try a divmod insn in compute_mode
4762              3) try a div insn in compute_mode and multiply-subtract to get
4763                 remainder
4764              4) try the same things with widening allowed.  */
4765           remainder
4766             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4767                                  op0, op1, target,
4768                                  unsignedp,
4769                                  ((optab_handler (optab2, compute_mode)->insn_code
4770                                    != CODE_FOR_nothing)
4771                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4772           if (remainder == 0)
4773             {
4774               /* No luck there.  Can we do remainder and divide at once
4775                  without a library call?  */
4776               remainder = gen_reg_rtx (compute_mode);
4777               if (! expand_twoval_binop ((unsignedp
4778                                           ? udivmod_optab
4779                                           : sdivmod_optab),
4780                                          op0, op1,
4781                                          NULL_RTX, remainder, unsignedp))
4782                 remainder = 0;
4783             }
4784
4785           if (remainder)
4786             return gen_lowpart (mode, remainder);
4787         }
4788
4789       /* Produce the quotient.  Try a quotient insn, but not a library call.
4790          If we have a divmod in this mode, use it in preference to widening
4791          the div (for this test we assume it will not fail). Note that optab2
4792          is set to the one of the two optabs that the call below will use.  */
4793       quotient
4794         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4795                              op0, op1, rem_flag ? NULL_RTX : target,
4796                              unsignedp,
4797                              ((optab_handler (optab2, compute_mode)->insn_code
4798                                != CODE_FOR_nothing)
4799                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4800
4801       if (quotient == 0)
4802         {
4803           /* No luck there.  Try a quotient-and-remainder insn,
4804              keeping the quotient alone.  */
4805           quotient = gen_reg_rtx (compute_mode);
4806           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4807                                      op0, op1,
4808                                      quotient, NULL_RTX, unsignedp))
4809             {
4810               quotient = 0;
4811               if (! rem_flag)
4812                 /* Still no luck.  If we are not computing the remainder,
4813                    use a library call for the quotient.  */
4814                 quotient = sign_expand_binop (compute_mode,
4815                                               udiv_optab, sdiv_optab,
4816                                               op0, op1, target,
4817                                               unsignedp, OPTAB_LIB_WIDEN);
4818             }
4819         }
4820     }
4821
4822   if (rem_flag)
4823     {
4824       if (target && GET_MODE (target) != compute_mode)
4825         target = 0;
4826
4827       if (quotient == 0)
4828         {
4829           /* No divide instruction either.  Use library for remainder.  */
4830           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4831                                          op0, op1, target,
4832                                          unsignedp, OPTAB_LIB_WIDEN);
4833           /* No remainder function.  Try a quotient-and-remainder
4834              function, keeping the remainder.  */
4835           if (!remainder)
4836             {
4837               remainder = gen_reg_rtx (compute_mode);
4838               if (!expand_twoval_binop_libfunc
4839                   (unsignedp ? udivmod_optab : sdivmod_optab,
4840                    op0, op1,
4841                    NULL_RTX, remainder,
4842                    unsignedp ? UMOD : MOD))
4843                 remainder = NULL_RTX;
4844             }
4845         }
4846       else
4847         {
4848           /* We divided.  Now finish doing X - Y * (X / Y).  */
4849           remainder = expand_mult (compute_mode, quotient, op1,
4850                                    NULL_RTX, unsignedp);
4851           remainder = expand_binop (compute_mode, sub_optab, op0,
4852                                     remainder, target, unsignedp,
4853                                     OPTAB_LIB_WIDEN);
4854         }
4855     }
4856
4857   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4858 }
4859 \f
4860 /* Return a tree node with data type TYPE, describing the value of X.
4861    Usually this is an VAR_DECL, if there is no obvious better choice.
4862    X may be an expression, however we only support those expressions
4863    generated by loop.c.  */
4864
4865 tree
4866 make_tree (tree type, rtx x)
4867 {
4868   tree t;
4869
4870   switch (GET_CODE (x))
4871     {
4872     case CONST_INT:
4873       {
4874         HOST_WIDE_INT hi = 0;
4875
4876         if (INTVAL (x) < 0
4877             && !(TYPE_UNSIGNED (type)
4878                  && (GET_MODE_BITSIZE (TYPE_MODE (type))
4879                      < HOST_BITS_PER_WIDE_INT)))
4880           hi = -1;
4881
4882         t = build_int_cst_wide (type, INTVAL (x), hi);
4883
4884         return t;
4885       }
4886
4887     case CONST_DOUBLE:
4888       if (GET_MODE (x) == VOIDmode)
4889         t = build_int_cst_wide (type,
4890                                 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4891       else
4892         {
4893           REAL_VALUE_TYPE d;
4894
4895           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4896           t = build_real (type, d);
4897         }
4898
4899       return t;
4900
4901     case CONST_VECTOR:
4902       {
4903         int units = CONST_VECTOR_NUNITS (x);
4904         tree itype = TREE_TYPE (type);
4905         tree t = NULL_TREE;
4906         int i;
4907
4908
4909         /* Build a tree with vector elements.  */
4910         for (i = units - 1; i >= 0; --i)
4911           {
4912             rtx elt = CONST_VECTOR_ELT (x, i);
4913             t = tree_cons (NULL_TREE, make_tree (itype, elt), t);
4914           }
4915
4916         return build_vector (type, t);
4917       }
4918
4919     case PLUS:
4920       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4921                           make_tree (type, XEXP (x, 1)));
4922
4923     case MINUS:
4924       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4925                           make_tree (type, XEXP (x, 1)));
4926
4927     case NEG:
4928       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
4929
4930     case MULT:
4931       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4932                           make_tree (type, XEXP (x, 1)));
4933
4934     case ASHIFT:
4935       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4936                           make_tree (type, XEXP (x, 1)));
4937
4938     case LSHIFTRT:
4939       t = unsigned_type_for (type);
4940       return fold_convert (type, build2 (RSHIFT_EXPR, t,
4941                                          make_tree (t, XEXP (x, 0)),
4942                                          make_tree (type, XEXP (x, 1))));
4943
4944     case ASHIFTRT:
4945       t = signed_type_for (type);
4946       return fold_convert (type, build2 (RSHIFT_EXPR, t,
4947                                          make_tree (t, XEXP (x, 0)),
4948                                          make_tree (type, XEXP (x, 1))));
4949
4950     case DIV:
4951       if (TREE_CODE (type) != REAL_TYPE)
4952         t = signed_type_for (type);
4953       else
4954         t = type;
4955
4956       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
4957                                          make_tree (t, XEXP (x, 0)),
4958                                          make_tree (t, XEXP (x, 1))));
4959     case UDIV:
4960       t = unsigned_type_for (type);
4961       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
4962                                          make_tree (t, XEXP (x, 0)),
4963                                          make_tree (t, XEXP (x, 1))));
4964
4965     case SIGN_EXTEND:
4966     case ZERO_EXTEND:
4967       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
4968                                           GET_CODE (x) == ZERO_EXTEND);
4969       return fold_convert (type, make_tree (t, XEXP (x, 0)));
4970
4971     case CONST:
4972       return make_tree (type, XEXP (x, 0));
4973
4974     case SYMBOL_REF:
4975       t = SYMBOL_REF_DECL (x);
4976       if (t)
4977         return fold_convert (type, build_fold_addr_expr (t));
4978       /* else fall through.  */
4979
4980     default:
4981       t = build_decl (VAR_DECL, NULL_TREE, type);
4982
4983       /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
4984          ptr_mode.  So convert.  */
4985       if (POINTER_TYPE_P (type))
4986         x = convert_memory_address (TYPE_MODE (type), x);
4987
4988       /* Note that we do *not* use SET_DECL_RTL here, because we do not
4989          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
4990       t->decl_with_rtl.rtl = x;
4991
4992       return t;
4993     }
4994 }
4995 \f
4996 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
4997    and returning TARGET.
4998
4999    If TARGET is 0, a pseudo-register or constant is returned.  */
5000
5001 rtx
5002 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5003 {
5004   rtx tem = 0;
5005
5006   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5007     tem = simplify_binary_operation (AND, mode, op0, op1);
5008   if (tem == 0)
5009     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5010
5011   if (target == 0)
5012     target = tem;
5013   else if (tem != target)
5014     emit_move_insn (target, tem);
5015   return target;
5016 }
5017 \f
5018 /* Helper function for emit_store_flag.  */
5019 static rtx
5020 emit_store_flag_1 (rtx target, rtx subtarget, enum machine_mode mode,
5021                    int normalizep)
5022 {
5023   rtx op0;
5024   enum machine_mode target_mode = GET_MODE (target);
5025
5026   /* If we are converting to a wider mode, first convert to
5027      TARGET_MODE, then normalize.  This produces better combining
5028      opportunities on machines that have a SIGN_EXTRACT when we are
5029      testing a single bit.  This mostly benefits the 68k.
5030
5031      If STORE_FLAG_VALUE does not have the sign bit set when
5032      interpreted in MODE, we can do this conversion as unsigned, which
5033      is usually more efficient.  */
5034   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5035     {
5036       convert_move (target, subtarget,
5037                     (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
5038                     && 0 == (STORE_FLAG_VALUE
5039                              & ((HOST_WIDE_INT) 1
5040                                 << (GET_MODE_BITSIZE (mode) -1))));
5041       op0 = target;
5042       mode = target_mode;
5043     }
5044   else
5045     op0 = subtarget;
5046
5047   /* If we want to keep subexpressions around, don't reuse our last
5048      target.  */
5049   if (optimize)
5050     subtarget = 0;
5051
5052   /* Now normalize to the proper value in MODE.  Sometimes we don't
5053      have to do anything.  */
5054   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5055     ;
5056   /* STORE_FLAG_VALUE might be the most negative number, so write
5057      the comparison this way to avoid a compiler-time warning.  */
5058   else if (- normalizep == STORE_FLAG_VALUE)
5059     op0 = expand_unop (mode, neg_optab, op0, subtarget, 0);
5060
5061   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5062      it hard to use a value of just the sign bit due to ANSI integer
5063      constant typing rules.  */
5064   else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5065            && (STORE_FLAG_VALUE
5066                & ((HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1))))
5067     op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5068                         size_int (GET_MODE_BITSIZE (mode) - 1), subtarget,
5069                         normalizep == 1);
5070   else
5071     {
5072       gcc_assert (STORE_FLAG_VALUE & 1);
5073
5074       op0 = expand_and (mode, op0, const1_rtx, subtarget);
5075       if (normalizep == -1)
5076         op0 = expand_unop (mode, neg_optab, op0, op0, 0);
5077     }
5078
5079   /* If we were converting to a smaller mode, do the conversion now.  */
5080   if (target_mode != mode)
5081     {
5082       convert_move (target, op0, 0);
5083       return target;
5084     }
5085   else
5086     return op0;
5087 }
5088
5089 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5090    and storing in TARGET.  Normally return TARGET.
5091    Return 0 if that cannot be done.
5092
5093    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5094    it is VOIDmode, they cannot both be CONST_INT.
5095
5096    UNSIGNEDP is for the case where we have to widen the operands
5097    to perform the operation.  It says to use zero-extension.
5098
5099    NORMALIZEP is 1 if we should convert the result to be either zero
5100    or one.  Normalize is -1 if we should convert the result to be
5101    either zero or -1.  If NORMALIZEP is zero, the result will be left
5102    "raw" out of the scc insn.  */
5103
5104 rtx
5105 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5106                  enum machine_mode mode, int unsignedp, int normalizep)
5107 {
5108   rtx subtarget;
5109   enum insn_code icode;
5110   enum machine_mode compare_mode;
5111   enum machine_mode target_mode = GET_MODE (target);
5112   rtx tem;
5113   rtx last = get_last_insn ();
5114   rtx pattern, comparison;
5115
5116   if (unsignedp)
5117     code = unsigned_condition (code);
5118
5119   /* If one operand is constant, make it the second one.  Only do this
5120      if the other operand is not constant as well.  */
5121
5122   if (swap_commutative_operands_p (op0, op1))
5123     {
5124       tem = op0;
5125       op0 = op1;
5126       op1 = tem;
5127       code = swap_condition (code);
5128     }
5129
5130   if (mode == VOIDmode)
5131     mode = GET_MODE (op0);
5132
5133   /* For some comparisons with 1 and -1, we can convert this to
5134      comparisons with zero.  This will often produce more opportunities for
5135      store-flag insns.  */
5136
5137   switch (code)
5138     {
5139     case LT:
5140       if (op1 == const1_rtx)
5141         op1 = const0_rtx, code = LE;
5142       break;
5143     case LE:
5144       if (op1 == constm1_rtx)
5145         op1 = const0_rtx, code = LT;
5146       break;
5147     case GE:
5148       if (op1 == const1_rtx)
5149         op1 = const0_rtx, code = GT;
5150       break;
5151     case GT:
5152       if (op1 == constm1_rtx)
5153         op1 = const0_rtx, code = GE;
5154       break;
5155     case GEU:
5156       if (op1 == const1_rtx)
5157         op1 = const0_rtx, code = NE;
5158       break;
5159     case LTU:
5160       if (op1 == const1_rtx)
5161         op1 = const0_rtx, code = EQ;
5162       break;
5163     default:
5164       break;
5165     }
5166
5167   /* If we are comparing a double-word integer with zero or -1, we can
5168      convert the comparison into one involving a single word.  */
5169   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5170       && GET_MODE_CLASS (mode) == MODE_INT
5171       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5172     {
5173       if ((code == EQ || code == NE)
5174           && (op1 == const0_rtx || op1 == constm1_rtx))
5175         {
5176           rtx op00, op01, op0both;
5177
5178           /* Do a logical OR or AND of the two words and compare the
5179              result.  */
5180           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5181           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5182           op0both = expand_binop (word_mode,
5183                                   op1 == const0_rtx ? ior_optab : and_optab,
5184                                   op00, op01, NULL_RTX, unsignedp,
5185                                   OPTAB_DIRECT);
5186
5187           if (op0both != 0)
5188             return emit_store_flag (target, code, op0both, op1, word_mode,
5189                                     unsignedp, normalizep);
5190         }
5191       else if ((code == LT || code == GE) && op1 == const0_rtx)
5192         {
5193           rtx op0h;
5194
5195           /* If testing the sign bit, can just test on high word.  */
5196           op0h = simplify_gen_subreg (word_mode, op0, mode,
5197                                       subreg_highpart_offset (word_mode,
5198                                                               mode));
5199           return emit_store_flag (target, code, op0h, op1, word_mode,
5200                                   unsignedp, normalizep);
5201         }
5202     }
5203
5204   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5205      complement of A (for GE) and shifting the sign bit to the low bit.  */
5206   if (op1 == const0_rtx && (code == LT || code == GE)
5207       && GET_MODE_CLASS (mode) == MODE_INT
5208       && (normalizep || STORE_FLAG_VALUE == 1
5209           || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5210               && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5211                   == ((unsigned HOST_WIDE_INT) 1
5212                       << (GET_MODE_BITSIZE (mode) - 1))))))
5213     {
5214       subtarget = target;
5215
5216       /* If the result is to be wider than OP0, it is best to convert it
5217          first.  If it is to be narrower, it is *incorrect* to convert it
5218          first.  */
5219       if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5220         {
5221           op0 = convert_modes (target_mode, mode, op0, 0);
5222           mode = target_mode;
5223         }
5224
5225       if (target_mode != mode)
5226         subtarget = 0;
5227
5228       if (code == GE)
5229         op0 = expand_unop (mode, one_cmpl_optab, op0,
5230                            ((STORE_FLAG_VALUE == 1 || normalizep)
5231                             ? 0 : subtarget), 0);
5232
5233       if (STORE_FLAG_VALUE == 1 || normalizep)
5234         /* If we are supposed to produce a 0/1 value, we want to do
5235            a logical shift from the sign bit to the low-order bit; for
5236            a -1/0 value, we do an arithmetic shift.  */
5237         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5238                             size_int (GET_MODE_BITSIZE (mode) - 1),
5239                             subtarget, normalizep != -1);
5240
5241       if (mode != target_mode)
5242         op0 = convert_modes (target_mode, mode, op0, 0);
5243
5244       return op0;
5245     }
5246
5247   icode = setcc_gen_code[(int) code];
5248
5249   if (icode != CODE_FOR_nothing)
5250     {
5251       insn_operand_predicate_fn pred;
5252
5253       /* We think we may be able to do this with a scc insn.  Emit the
5254          comparison and then the scc insn.  */
5255
5256       do_pending_stack_adjust ();
5257       last = get_last_insn ();
5258
5259       comparison
5260         = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX);
5261       if (CONSTANT_P (comparison))
5262         {
5263           switch (GET_CODE (comparison))
5264             {
5265             case CONST_INT:
5266               if (comparison == const0_rtx)
5267                 return const0_rtx;
5268               break;
5269
5270 #ifdef FLOAT_STORE_FLAG_VALUE
5271             case CONST_DOUBLE:
5272               if (comparison == CONST0_RTX (GET_MODE (comparison)))
5273                 return const0_rtx;
5274               break;
5275 #endif
5276             default:
5277               gcc_unreachable ();
5278             }
5279
5280           if (normalizep == 1)
5281             return const1_rtx;
5282           if (normalizep == -1)
5283             return constm1_rtx;
5284           return const_true_rtx;
5285         }
5286
5287       /* The code of COMPARISON may not match CODE if compare_from_rtx
5288          decided to swap its operands and reverse the original code.
5289
5290          We know that compare_from_rtx returns either a CONST_INT or
5291          a new comparison code, so it is safe to just extract the
5292          code from COMPARISON.  */
5293       code = GET_CODE (comparison);
5294
5295       /* Get a reference to the target in the proper mode for this insn.  */
5296       compare_mode = insn_data[(int) icode].operand[0].mode;
5297       subtarget = target;
5298       pred = insn_data[(int) icode].operand[0].predicate;
5299       if (optimize || ! (*pred) (subtarget, compare_mode))
5300         subtarget = gen_reg_rtx (compare_mode);
5301
5302       pattern = GEN_FCN (icode) (subtarget);
5303       if (pattern)
5304         {
5305           emit_insn (pattern);
5306           return emit_store_flag_1 (target, subtarget, compare_mode,
5307                                     normalizep);
5308         }
5309     }
5310   else
5311     {
5312       /* We don't have an scc insn, so try a cstore insn.  */
5313
5314       for (compare_mode = mode; compare_mode != VOIDmode;
5315            compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5316         {
5317           icode = optab_handler (cstore_optab, compare_mode)->insn_code;
5318           if (icode != CODE_FOR_nothing)
5319             break;
5320         }
5321
5322       if (icode != CODE_FOR_nothing)
5323         {
5324           enum machine_mode result_mode
5325             = insn_data[(int) icode].operand[0].mode;
5326           rtx cstore_op0 = op0;
5327           rtx cstore_op1 = op1;
5328
5329           do_pending_stack_adjust ();
5330           last = get_last_insn ();
5331
5332           if (compare_mode != mode)
5333             {
5334               cstore_op0 = convert_modes (compare_mode, mode, cstore_op0,
5335                                           unsignedp);
5336               cstore_op1 = convert_modes (compare_mode, mode, cstore_op1,
5337                                           unsignedp);
5338             }
5339
5340           if (!insn_data[(int) icode].operand[2].predicate (cstore_op0,
5341                                                             compare_mode))
5342             cstore_op0 = copy_to_mode_reg (compare_mode, cstore_op0);
5343
5344           if (!insn_data[(int) icode].operand[3].predicate (cstore_op1,
5345                                                             compare_mode))
5346             cstore_op1 = copy_to_mode_reg (compare_mode, cstore_op1);
5347
5348           comparison = gen_rtx_fmt_ee (code, result_mode, cstore_op0,
5349                                        cstore_op1);
5350           subtarget = target;
5351
5352           if (optimize || !(insn_data[(int) icode].operand[0].predicate
5353                             (subtarget, result_mode)))
5354             subtarget = gen_reg_rtx (result_mode);
5355
5356           pattern = GEN_FCN (icode) (subtarget, comparison, cstore_op0,
5357                                      cstore_op1);
5358
5359           if (pattern)
5360             {
5361               emit_insn (pattern);
5362               return emit_store_flag_1 (target, subtarget, result_mode,
5363                                         normalizep);
5364             }
5365         }
5366     }
5367
5368   delete_insns_since (last);
5369
5370   /* If optimizing, use different pseudo registers for each insn, instead
5371      of reusing the same pseudo.  This leads to better CSE, but slows
5372      down the compiler, since there are more pseudos */
5373   subtarget = (!optimize
5374                && (target_mode == mode)) ? target : NULL_RTX;
5375
5376   /* If we reached here, we can't do this with a scc insn.  However, there
5377      are some comparisons that can be done directly.  For example, if
5378      this is an equality comparison of integers, we can try to exclusive-or
5379      (or subtract) the two operands and use a recursive call to try the
5380      comparison with zero.  Don't do any of these cases if branches are
5381      very cheap.  */
5382
5383   if (BRANCH_COST (optimize_insn_for_speed_p (),
5384                    false) > 0
5385       && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
5386       && op1 != const0_rtx)
5387     {
5388       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5389                           OPTAB_WIDEN);
5390
5391       if (tem == 0)
5392         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5393                             OPTAB_WIDEN);
5394       if (tem != 0)
5395         tem = emit_store_flag (target, code, tem, const0_rtx,
5396                                mode, unsignedp, normalizep);
5397       if (tem == 0)
5398         delete_insns_since (last);
5399       return tem;
5400     }
5401
5402   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5403      the constant zero.  Reject all other comparisons at this point.  Only
5404      do LE and GT if branches are expensive since they are expensive on
5405      2-operand machines.  */
5406
5407   if (BRANCH_COST (optimize_insn_for_speed_p (),
5408                    false) == 0
5409       || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
5410       || (code != EQ && code != NE
5411           && (BRANCH_COST (optimize_insn_for_speed_p (),
5412                            false) <= 1 || (code != LE && code != GT))))
5413     return 0;
5414
5415   /* See what we need to return.  We can only return a 1, -1, or the
5416      sign bit.  */
5417
5418   if (normalizep == 0)
5419     {
5420       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5421         normalizep = STORE_FLAG_VALUE;
5422
5423       else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5424                && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5425                    == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
5426         ;
5427       else
5428         return 0;
5429     }
5430
5431   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5432      do the necessary operation below.  */
5433
5434   tem = 0;
5435
5436   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5437      the sign bit set.  */
5438
5439   if (code == LE)
5440     {
5441       /* This is destructive, so SUBTARGET can't be OP0.  */
5442       if (rtx_equal_p (subtarget, op0))
5443         subtarget = 0;
5444
5445       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5446                           OPTAB_WIDEN);
5447       if (tem)
5448         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5449                             OPTAB_WIDEN);
5450     }
5451
5452   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5453      number of bits in the mode of OP0, minus one.  */
5454
5455   if (code == GT)
5456     {
5457       if (rtx_equal_p (subtarget, op0))
5458         subtarget = 0;
5459
5460       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5461                           size_int (GET_MODE_BITSIZE (mode) - 1),
5462                           subtarget, 0);
5463       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5464                           OPTAB_WIDEN);
5465     }
5466
5467   if (code == EQ || code == NE)
5468     {
5469       /* For EQ or NE, one way to do the comparison is to apply an operation
5470          that converts the operand into a positive number if it is nonzero
5471          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5472          for NE we negate.  This puts the result in the sign bit.  Then we
5473          normalize with a shift, if needed.
5474
5475          Two operations that can do the above actions are ABS and FFS, so try
5476          them.  If that doesn't work, and MODE is smaller than a full word,
5477          we can use zero-extension to the wider mode (an unsigned conversion)
5478          as the operation.  */
5479
5480       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5481          that is compensated by the subsequent overflow when subtracting
5482          one / negating.  */
5483
5484       if (optab_handler (abs_optab, mode)->insn_code != CODE_FOR_nothing)
5485         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5486       else if (optab_handler (ffs_optab, mode)->insn_code != CODE_FOR_nothing)
5487         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5488       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5489         {
5490           tem = convert_modes (word_mode, mode, op0, 1);
5491           mode = word_mode;
5492         }
5493
5494       if (tem != 0)
5495         {
5496           if (code == EQ)
5497             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5498                                 0, OPTAB_WIDEN);
5499           else
5500             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5501         }
5502
5503       /* If we couldn't do it that way, for NE we can "or" the two's complement
5504          of the value with itself.  For EQ, we take the one's complement of
5505          that "or", which is an extra insn, so we only handle EQ if branches
5506          are expensive.  */
5507
5508       if (tem == 0
5509           && (code == NE
5510               || BRANCH_COST (optimize_insn_for_speed_p (),
5511                               false) > 1))
5512         {
5513           if (rtx_equal_p (subtarget, op0))
5514             subtarget = 0;
5515
5516           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5517           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5518                               OPTAB_WIDEN);
5519
5520           if (tem && code == EQ)
5521             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5522         }
5523     }
5524
5525   if (tem && normalizep)
5526     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5527                         size_int (GET_MODE_BITSIZE (mode) - 1),
5528                         subtarget, normalizep == 1);
5529
5530   if (tem)
5531     {
5532       if (GET_MODE (tem) != target_mode)
5533         {
5534           convert_move (target, tem, 0);
5535           tem = target;
5536         }
5537       else if (!subtarget)
5538         {
5539           emit_move_insn (target, tem);
5540           tem = target;
5541         }
5542     }
5543   else
5544     delete_insns_since (last);
5545
5546   return tem;
5547 }
5548
5549 /* Like emit_store_flag, but always succeeds.  */
5550
5551 rtx
5552 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5553                        enum machine_mode mode, int unsignedp, int normalizep)
5554 {
5555   rtx tem, label;
5556
5557   /* First see if emit_store_flag can do the job.  */
5558   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5559   if (tem != 0)
5560     return tem;
5561
5562   if (normalizep == 0)
5563     normalizep = 1;
5564
5565   /* If this failed, we have to do this with set/compare/jump/set code.  */
5566
5567   if (!REG_P (target)
5568       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5569     target = gen_reg_rtx (GET_MODE (target));
5570
5571   emit_move_insn (target, const1_rtx);
5572   label = gen_label_rtx ();
5573   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5574                            NULL_RTX, label);
5575
5576   emit_move_insn (target, const0_rtx);
5577   emit_label (label);
5578
5579   return target;
5580 }
5581 \f
5582 /* Perform possibly multi-word comparison and conditional jump to LABEL
5583    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5584    now a thin wrapper around do_compare_rtx_and_jump.  */
5585
5586 static void
5587 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5588                  rtx label)
5589 {
5590   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5591   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5592                            NULL_RTX, NULL_RTX, label);
5593 }