gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
   5    Free Software Foundation, Inc.
   6
   7 This file is part of GCC.
   8
   9 GCC is free software; you can redistribute it and/or modify it under
  10 the terms of the GNU General Public License as published by the Free
  11 Software Foundation; either version 3, or (at your option) any later
  12 version.
  13
  14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  16 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  17 for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with GCC; see the file COPYING3.  If not see
  21 <http://www.gnu.org/licenses/>.  */
  22
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "coretypes.h"
  27 #include "tm.h"
  28 #include "toplev.h"
  29 #include "rtl.h"
  30 #include "tree.h"
  31 #include "tm_p.h"
  32 #include "flags.h"
  33 #include "insn-config.h"
  34 #include "expr.h"
  35 #include "optabs.h"
  36 #include "recog.h"
  37 #include "langhooks.h"
  38 #include "df.h"
  39 #include "target.h"
  40
  41 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  42                                    unsigned HOST_WIDE_INT,
  43                                    unsigned HOST_WIDE_INT, rtx);
  44 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  45                                    unsigned HOST_WIDE_INT, rtx);
  46 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  47                                     unsigned HOST_WIDE_INT,
  48                                     unsigned HOST_WIDE_INT,
  49                                     unsigned HOST_WIDE_INT, rtx, int);
  50 static rtx mask_rtx (enum machine_mode, int, int, int);
  51 static rtx lshift_value (enum machine_mode, rtx, int, int);
  52 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  53                                     unsigned HOST_WIDE_INT, int);
  54 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  55 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  56 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  57
  58 /* Test whether a value is zero of a power of two.  */
  59 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
  60
  61 /* Nonzero means divides or modulus operations are relatively cheap for
  62    powers of two, so don't use branches; emit the operation instead.
  63    Usually, this will mean that the MD file will emit non-branch
  64    sequences.  */
  65
  66 static bool sdiv_pow2_cheap[2][NUM_MACHINE_MODES];
  67 static bool smod_pow2_cheap[2][NUM_MACHINE_MODES];
  68
  69 #ifndef SLOW_UNALIGNED_ACCESS
  70 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  71 #endif
  72
  73 /* For compilers that support multiple targets with different word sizes,
  74    MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
  75    is the H8/300(H) compiler.  */
  76
  77 #ifndef MAX_BITS_PER_WORD
  78 #define MAX_BITS_PER_WORD BITS_PER_WORD
  79 #endif
  80
  81 /* Reduce conditional compilation elsewhere.  */
  82 #ifndef HAVE_insv
  83 #define HAVE_insv       0
  84 #define CODE_FOR_insv   CODE_FOR_nothing
  85 #define gen_insv(a,b,c,d) NULL_RTX
  86 #endif
  87 #ifndef HAVE_extv
  88 #define HAVE_extv       0
  89 #define CODE_FOR_extv   CODE_FOR_nothing
  90 #define gen_extv(a,b,c,d) NULL_RTX
  91 #endif
  92 #ifndef HAVE_extzv
  93 #define HAVE_extzv      0
  94 #define CODE_FOR_extzv  CODE_FOR_nothing
  95 #define gen_extzv(a,b,c,d) NULL_RTX
  96 #endif
  97
  98 /* Cost of various pieces of RTL.  Note that some of these are indexed by
  99    shift count and some by mode.  */
 100 static int zero_cost[2];
 101 static int add_cost[2][NUM_MACHINE_MODES];
 102 static int neg_cost[2][NUM_MACHINE_MODES];
 103 static int shift_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 104 static int shiftadd_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 105 static int shiftsub0_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 106 static int shiftsub1_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 107 static int mul_cost[2][NUM_MACHINE_MODES];
 108 static int sdiv_cost[2][NUM_MACHINE_MODES];
 109 static int udiv_cost[2][NUM_MACHINE_MODES];
 110 static int mul_widen_cost[2][NUM_MACHINE_MODES];
 111 static int mul_highpart_cost[2][NUM_MACHINE_MODES];
 112
 113 void
 114 init_expmed (void)
 115 {
 116   struct
 117   {
 118     struct rtx_def reg;         rtunion reg_fld[2];
 119     struct rtx_def plus;        rtunion plus_fld1;
 120     struct rtx_def neg;
 121     struct rtx_def mult;        rtunion mult_fld1;
 122     struct rtx_def sdiv;        rtunion sdiv_fld1;
 123     struct rtx_def udiv;        rtunion udiv_fld1;
 124     struct rtx_def zext;
 125     struct rtx_def sdiv_32;     rtunion sdiv_32_fld1;
 126     struct rtx_def smod_32;     rtunion smod_32_fld1;
 127     struct rtx_def wide_mult;   rtunion wide_mult_fld1;
 128     struct rtx_def wide_lshr;   rtunion wide_lshr_fld1;
 129     struct rtx_def wide_trunc;
 130     struct rtx_def shift;       rtunion shift_fld1;
 131     struct rtx_def shift_mult;  rtunion shift_mult_fld1;
 132     struct rtx_def shift_add;   rtunion shift_add_fld1;
 133     struct rtx_def shift_sub0;  rtunion shift_sub0_fld1;
 134     struct rtx_def shift_sub1;  rtunion shift_sub1_fld1;
 135   } all;
 136
 137   rtx pow2[MAX_BITS_PER_WORD];
 138   rtx cint[MAX_BITS_PER_WORD];
 139   int m, n;
 140   enum machine_mode mode, wider_mode;
 141   int speed;
 142
 143
 144   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 145     {
 146       pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 147       cint[m] = GEN_INT (m);
 148     }
 149   memset (&all, 0, sizeof all);
 150
 151   PUT_CODE (&all.reg, REG);
 152   /* Avoid using hard regs in ways which may be unsupported.  */
 153   SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
 154
 155   PUT_CODE (&all.plus, PLUS);
 156   XEXP (&all.plus, 0) = &all.reg;
 157   XEXP (&all.plus, 1) = &all.reg;
 158
 159   PUT_CODE (&all.neg, NEG);
 160   XEXP (&all.neg, 0) = &all.reg;
 161
 162   PUT_CODE (&all.mult, MULT);
 163   XEXP (&all.mult, 0) = &all.reg;
 164   XEXP (&all.mult, 1) = &all.reg;
 165
 166   PUT_CODE (&all.sdiv, DIV);
 167   XEXP (&all.sdiv, 0) = &all.reg;
 168   XEXP (&all.sdiv, 1) = &all.reg;
 169
 170   PUT_CODE (&all.udiv, UDIV);
 171   XEXP (&all.udiv, 0) = &all.reg;
 172   XEXP (&all.udiv, 1) = &all.reg;
 173
 174   PUT_CODE (&all.sdiv_32, DIV);
 175   XEXP (&all.sdiv_32, 0) = &all.reg;
 176   XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
 177
 178   PUT_CODE (&all.smod_32, MOD);
 179   XEXP (&all.smod_32, 0) = &all.reg;
 180   XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
 181
 182   PUT_CODE (&all.zext, ZERO_EXTEND);
 183   XEXP (&all.zext, 0) = &all.reg;
 184
 185   PUT_CODE (&all.wide_mult, MULT);
 186   XEXP (&all.wide_mult, 0) = &all.zext;
 187   XEXP (&all.wide_mult, 1) = &all.zext;
 188
 189   PUT_CODE (&all.wide_lshr, LSHIFTRT);
 190   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
 191
 192   PUT_CODE (&all.wide_trunc, TRUNCATE);
 193   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
 194
 195   PUT_CODE (&all.shift, ASHIFT);
 196   XEXP (&all.shift, 0) = &all.reg;
 197
 198   PUT_CODE (&all.shift_mult, MULT);
 199   XEXP (&all.shift_mult, 0) = &all.reg;
 200
 201   PUT_CODE (&all.shift_add, PLUS);
 202   XEXP (&all.shift_add, 0) = &all.shift_mult;
 203   XEXP (&all.shift_add, 1) = &all.reg;
 204
 205   PUT_CODE (&all.shift_sub0, MINUS);
 206   XEXP (&all.shift_sub0, 0) = &all.shift_mult;
 207   XEXP (&all.shift_sub0, 1) = &all.reg;
 208
 209   PUT_CODE (&all.shift_sub1, MINUS);
 210   XEXP (&all.shift_sub1, 0) = &all.reg;
 211   XEXP (&all.shift_sub1, 1) = &all.shift_mult;
 212
 213   for (speed = 0; speed < 2; speed++)
 214     {
 215       crtl->maybe_hot_insn_p = speed;
 216       zero_cost[speed] = rtx_cost (const0_rtx, SET, speed);
 217
 218       for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 219            mode != VOIDmode;
 220            mode = GET_MODE_WIDER_MODE (mode))
 221         {
 222           PUT_MODE (&all.reg, mode);
 223           PUT_MODE (&all.plus, mode);
 224           PUT_MODE (&all.neg, mode);
 225           PUT_MODE (&all.mult, mode);
 226           PUT_MODE (&all.sdiv, mode);
 227           PUT_MODE (&all.udiv, mode);
 228           PUT_MODE (&all.sdiv_32, mode);
 229           PUT_MODE (&all.smod_32, mode);
 230           PUT_MODE (&all.wide_trunc, mode);
 231           PUT_MODE (&all.shift, mode);
 232           PUT_MODE (&all.shift_mult, mode);
 233           PUT_MODE (&all.shift_add, mode);
 234           PUT_MODE (&all.shift_sub0, mode);
 235           PUT_MODE (&all.shift_sub1, mode);
 236
 237           add_cost[speed][mode] = rtx_cost (&all.plus, SET, speed);
 238           neg_cost[speed][mode] = rtx_cost (&all.neg, SET, speed);
 239           mul_cost[speed][mode] = rtx_cost (&all.mult, SET, speed);
 240           sdiv_cost[speed][mode] = rtx_cost (&all.sdiv, SET, speed);
 241           udiv_cost[speed][mode] = rtx_cost (&all.udiv, SET, speed);
 242
 243           sdiv_pow2_cheap[speed][mode] = (rtx_cost (&all.sdiv_32, SET, speed)
 244                                           <= 2 * add_cost[speed][mode]);
 245           smod_pow2_cheap[speed][mode] = (rtx_cost (&all.smod_32, SET, speed)
 246                                           <= 4 * add_cost[speed][mode]);
 247
 248           wider_mode = GET_MODE_WIDER_MODE (mode);
 249           if (wider_mode != VOIDmode)
 250             {
 251               PUT_MODE (&all.zext, wider_mode);
 252               PUT_MODE (&all.wide_mult, wider_mode);
 253               PUT_MODE (&all.wide_lshr, wider_mode);
 254               XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
 255
 256               mul_widen_cost[speed][wider_mode]
 257                 = rtx_cost (&all.wide_mult, SET, speed);
 258               mul_highpart_cost[speed][mode]
 259                 = rtx_cost (&all.wide_trunc, SET, speed);
 260             }
 261
 262           shift_cost[speed][mode][0] = 0;
 263           shiftadd_cost[speed][mode][0] = shiftsub0_cost[speed][mode][0]
 264             = shiftsub1_cost[speed][mode][0] = add_cost[speed][mode];
 265
 266           n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
 267           for (m = 1; m < n; m++)
 268             {
 269               XEXP (&all.shift, 1) = cint[m];
 270               XEXP (&all.shift_mult, 1) = pow2[m];
 271
 272               shift_cost[speed][mode][m] = rtx_cost (&all.shift, SET, speed);
 273               shiftadd_cost[speed][mode][m] = rtx_cost (&all.shift_add, SET, speed);
 274               shiftsub0_cost[speed][mode][m] = rtx_cost (&all.shift_sub0, SET, speed);
 275               shiftsub1_cost[speed][mode][m] = rtx_cost (&all.shift_sub1, SET, speed);
 276             }
 277         }
 278     }
 279   default_rtl_profile ();
 280 }
 281
 282 /* Return an rtx representing minus the value of X.
 283    MODE is the intended mode of the result,
 284    useful if X is a CONST_INT.  */
 285
 286 rtx
 287 negate_rtx (enum machine_mode mode, rtx x)
 288 {
 289   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 290
 291   if (result == 0)
 292     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 293
 294   return result;
 295 }
 296
 297 /* Report on the availability of insv/extv/extzv and the desired mode
 298    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
 299    is false; else the mode of the specified operand.  If OPNO is -1,
 300    all the caller cares about is whether the insn is available.  */
 301 enum machine_mode
 302 mode_for_extraction (enum extraction_pattern pattern, int opno)
 303 {
 304   const struct insn_data *data;
 305
 306   switch (pattern)
 307     {
 308     case EP_insv:
 309       if (HAVE_insv)
 310         {
 311           data = &insn_data[CODE_FOR_insv];
 312           break;
 313         }
 314       return MAX_MACHINE_MODE;
 315
 316     case EP_extv:
 317       if (HAVE_extv)
 318         {
 319           data = &insn_data[CODE_FOR_extv];
 320           break;
 321         }
 322       return MAX_MACHINE_MODE;
 323
 324     case EP_extzv:
 325       if (HAVE_extzv)
 326         {
 327           data = &insn_data[CODE_FOR_extzv];
 328           break;
 329         }
 330       return MAX_MACHINE_MODE;
 331
 332     default:
 333       gcc_unreachable ();
 334     }
 335
 336   if (opno == -1)
 337     return VOIDmode;
 338
 339   /* Everyone who uses this function used to follow it with
 340      if (result == VOIDmode) result = word_mode; */
 341   if (data->operand[opno].mode == VOIDmode)
 342     return word_mode;
 343   return data->operand[opno].mode;
 344 }
 345
 346 /* Return true if X, of mode MODE, matches the predicate for operand
 347    OPNO of instruction ICODE.  Allow volatile memories, regardless of
 348    the ambient volatile_ok setting.  */
 349
 350 static bool
 351 check_predicate_volatile_ok (enum insn_code icode, int opno,
 352                              rtx x, enum machine_mode mode)
 353 {
 354   bool save_volatile_ok, result;
 355
 356   save_volatile_ok = volatile_ok;
 357   result = insn_data[(int) icode].operand[opno].predicate (x, mode);
 358   volatile_ok = save_volatile_ok;
 359   return result;
 360 }
 361 \f
 362 /* A subroutine of store_bit_field, with the same arguments.  Return true
 363    if the operation could be implemented.
 364
 365    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 366    no other way of implementing the operation.  If FALLBACK_P is false,
 367    return false instead.  */
 368
 369 static bool
 370 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 371                    unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
 372                    rtx value, bool fallback_p)
 373 {
 374   unsigned int unit
 375     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
 376   unsigned HOST_WIDE_INT offset, bitpos;
 377   rtx op0 = str_rtx;
 378   int byte_offset;
 379   rtx orig_value;
 380
 381   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
 382
 383   while (GET_CODE (op0) == SUBREG)
 384     {
 385       /* The following line once was done only if WORDS_BIG_ENDIAN,
 386          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 387          meaningful at a much higher level; when structures are copied
 388          between memory and regs, the higher-numbered regs
 389          always get higher addresses.  */
 390       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 391       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 392
 393       byte_offset = 0;
 394
 395       /* Paradoxical subregs need special handling on big endian machines.  */
 396       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 397         {
 398           int difference = inner_mode_size - outer_mode_size;
 399
 400           if (WORDS_BIG_ENDIAN)
 401             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 402           if (BYTES_BIG_ENDIAN)
 403             byte_offset += difference % UNITS_PER_WORD;
 404         }
 405       else
 406         byte_offset = SUBREG_BYTE (op0);
 407
 408       bitnum += byte_offset * BITS_PER_UNIT;
 409       op0 = SUBREG_REG (op0);
 410     }
 411
 412   /* No action is needed if the target is a register and if the field
 413      lies completely outside that register.  This can occur if the source
 414      code contains an out-of-bounds access to a small array.  */
 415   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 416     return true;
 417
 418   /* Use vec_set patterns for inserting parts of vectors whenever
 419      available.  */
 420   if (VECTOR_MODE_P (GET_MODE (op0))
 421       && !MEM_P (op0)
 422       && (optab_handler (vec_set_optab, GET_MODE (op0))->insn_code
 423           != CODE_FOR_nothing)
 424       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 425       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 426       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 427     {
 428       enum machine_mode outermode = GET_MODE (op0);
 429       enum machine_mode innermode = GET_MODE_INNER (outermode);
 430       int icode = (int) optab_handler (vec_set_optab, outermode)->insn_code;
 431       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 432       rtx rtxpos = GEN_INT (pos);
 433       rtx src = value;
 434       rtx dest = op0;
 435       rtx pat, seq;
 436       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
 437       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
 438       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
 439
 440       start_sequence ();
 441
 442       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
 443         src = copy_to_mode_reg (mode1, src);
 444
 445       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
 446         rtxpos = copy_to_mode_reg (mode1, rtxpos);
 447
 448       /* We could handle this, but we should always be called with a pseudo
 449          for our targets and all insns should take them as outputs.  */
 450       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
 451                   && (*insn_data[icode].operand[1].predicate) (src, mode1)
 452                   && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
 453       pat = GEN_FCN (icode) (dest, src, rtxpos);
 454       seq = get_insns ();
 455       end_sequence ();
 456       if (pat)
 457         {
 458           emit_insn (seq);
 459           emit_insn (pat);
 460           return true;
 461         }
 462     }
 463
 464   /* If the target is a register, overwriting the entire object, or storing
 465      a full-word or multi-word field can be done with just a SUBREG.
 466
 467      If the target is memory, storing any naturally aligned field can be
 468      done with a simple store.  For targets that support fast unaligned
 469      memory, any naturally sized, unit aligned field can be done directly.  */
 470
 471   offset = bitnum / unit;
 472   bitpos = bitnum % unit;
 473   byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 474                 + (offset * UNITS_PER_WORD);
 475
 476   if (bitpos == 0
 477       && bitsize == GET_MODE_BITSIZE (fieldmode)
 478       && (!MEM_P (op0)
 479           ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 480              || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
 481              && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
 482           : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 483              || (offset * BITS_PER_UNIT % bitsize == 0
 484                  && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
 485     {
 486       if (MEM_P (op0))
 487         op0 = adjust_address (op0, fieldmode, offset);
 488       else if (GET_MODE (op0) != fieldmode)
 489         op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 490                                    byte_offset);
 491       emit_move_insn (op0, value);
 492       return true;
 493     }
 494
 495   /* Make sure we are playing with integral modes.  Pun with subregs
 496      if we aren't.  This must come after the entire register case above,
 497      since that case is valid for any mode.  The following cases are only
 498      valid for integral modes.  */
 499   {
 500     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 501     if (imode != GET_MODE (op0))
 502       {
 503         if (MEM_P (op0))
 504           op0 = adjust_address (op0, imode, 0);
 505         else
 506           {
 507             gcc_assert (imode != BLKmode);
 508             op0 = gen_lowpart (imode, op0);
 509           }
 510       }
 511   }
 512
 513   /* We may be accessing data outside the field, which means
 514      we can alias adjacent data.  */
 515   if (MEM_P (op0))
 516     {
 517       op0 = shallow_copy_rtx (op0);
 518       set_mem_alias_set (op0, 0);
 519       set_mem_expr (op0, 0);
 520     }
 521
 522   /* If OP0 is a register, BITPOS must count within a word.
 523      But as we have it, it counts within whatever size OP0 now has.
 524      On a bigendian machine, these are not the same, so convert.  */
 525   if (BYTES_BIG_ENDIAN
 526       && !MEM_P (op0)
 527       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 528     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 529
 530   /* Storing an lsb-aligned field in a register
 531      can be done with a movestrict instruction.  */
 532
 533   if (!MEM_P (op0)
 534       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 535       && bitsize == GET_MODE_BITSIZE (fieldmode)
 536       && (optab_handler (movstrict_optab, fieldmode)->insn_code
 537           != CODE_FOR_nothing))
 538     {
 539       int icode = optab_handler (movstrict_optab, fieldmode)->insn_code;
 540       rtx insn;
 541       rtx start = get_last_insn ();
 542       rtx arg0 = op0;
 543
 544       /* Get appropriate low part of the value being stored.  */
 545       if (CONST_INT_P (value) || REG_P (value))
 546         value = gen_lowpart (fieldmode, value);
 547       else if (!(GET_CODE (value) == SYMBOL_REF
 548                  || GET_CODE (value) == LABEL_REF
 549                  || GET_CODE (value) == CONST))
 550         value = convert_to_mode (fieldmode, value, 0);
 551
 552       if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
 553         value = copy_to_mode_reg (fieldmode, value);
 554
 555       if (GET_CODE (op0) == SUBREG)
 556         {
 557           /* Else we've got some float mode source being extracted into
 558              a different float mode destination -- this combination of
 559              subregs results in Severe Tire Damage.  */
 560           gcc_assert (GET_MODE (SUBREG_REG (op0)) == fieldmode
 561                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 562                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 563           arg0 = SUBREG_REG (op0);
 564         }
 565
 566       insn = (GEN_FCN (icode)
 567                  (gen_rtx_SUBREG (fieldmode, arg0,
 568                                   (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 569                                   + (offset * UNITS_PER_WORD)),
 570                                   value));
 571       if (insn)
 572         {
 573           emit_insn (insn);
 574           return true;
 575         }
 576       delete_insns_since (start);
 577     }
 578
 579   /* Handle fields bigger than a word.  */
 580
 581   if (bitsize > BITS_PER_WORD)
 582     {
 583       /* Here we transfer the words of the field
 584          in the order least significant first.
 585          This is because the most significant word is the one which may
 586          be less than full.
 587          However, only do that if the value is not BLKmode.  */
 588
 589       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 590       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 591       unsigned int i;
 592       rtx last;
 593
 594       /* This is the mode we must force value to, so that there will be enough
 595          subwords to extract.  Note that fieldmode will often (always?) be
 596          VOIDmode, because that is what store_field uses to indicate that this
 597          is a bit field, but passing VOIDmode to operand_subword_force
 598          is not allowed.  */
 599       fieldmode = GET_MODE (value);
 600       if (fieldmode == VOIDmode)
 601         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 602
 603       last = get_last_insn ();
 604       for (i = 0; i < nwords; i++)
 605         {
 606           /* If I is 0, use the low-order word in both field and target;
 607              if I is 1, use the next to lowest word; and so on.  */
 608           unsigned int wordnum = (backwards ? nwords - i - 1 : i);
 609           unsigned int bit_offset = (backwards
 610                                      ? MAX ((int) bitsize - ((int) i + 1)
 611                                             * BITS_PER_WORD,
 612                                             0)
 613                                      : (int) i * BITS_PER_WORD);
 614           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 615
 616           if (!store_bit_field_1 (op0, MIN (BITS_PER_WORD,
 617                                             bitsize - i * BITS_PER_WORD),
 618                                   bitnum + bit_offset, word_mode,
 619                                   value_word, fallback_p))
 620             {
 621               delete_insns_since (last);
 622               return false;
 623             }
 624         }
 625       return true;
 626     }
 627
 628   /* From here on we can assume that the field to be stored in is
 629      a full-word (whatever type that is), since it is shorter than a word.  */
 630
 631   /* OFFSET is the number of words or bytes (UNIT says which)
 632      from STR_RTX to the first word or byte containing part of the field.  */
 633
 634   if (!MEM_P (op0))
 635     {
 636       if (offset != 0
 637           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 638         {
 639           if (!REG_P (op0))
 640             {
 641               /* Since this is a destination (lvalue), we can't copy
 642                  it to a pseudo.  We can remove a SUBREG that does not
 643                  change the size of the operand.  Such a SUBREG may
 644                  have been added above.  */
 645               gcc_assert (GET_CODE (op0) == SUBREG
 646                           && (GET_MODE_SIZE (GET_MODE (op0))
 647                               == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
 648               op0 = SUBREG_REG (op0);
 649             }
 650           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 651                                 op0, (offset * UNITS_PER_WORD));
 652         }
 653       offset = 0;
 654     }
 655
 656   /* If VALUE has a floating-point or complex mode, access it as an
 657      integer of the corresponding size.  This can occur on a machine
 658      with 64 bit registers that uses SFmode for float.  It can also
 659      occur for unaligned float or complex fields.  */
 660   orig_value = value;
 661   if (GET_MODE (value) != VOIDmode
 662       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 663       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 664     {
 665       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 666       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 667     }
 668
 669   /* Now OFFSET is nonzero only if OP0 is memory
 670      and is therefore always measured in bytes.  */
 671
 672   if (HAVE_insv
 673       && GET_MODE (value) != BLKmode
 674       && bitsize > 0
 675       && GET_MODE_BITSIZE (op_mode) >= bitsize
 676       && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
 677             && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode)))
 678       && insn_data[CODE_FOR_insv].operand[1].predicate (GEN_INT (bitsize),
 679                                                         VOIDmode)
 680       && check_predicate_volatile_ok (CODE_FOR_insv, 0, op0, VOIDmode))
 681     {
 682       int xbitpos = bitpos;
 683       rtx value1;
 684       rtx xop0 = op0;
 685       rtx last = get_last_insn ();
 686       rtx pat;
 687       bool copy_back = false;
 688
 689       /* Add OFFSET into OP0's address.  */
 690       if (MEM_P (xop0))
 691         xop0 = adjust_address (xop0, byte_mode, offset);
 692
 693       /* If xop0 is a register, we need it in OP_MODE
 694          to make it acceptable to the format of insv.  */
 695       if (GET_CODE (xop0) == SUBREG)
 696         /* We can't just change the mode, because this might clobber op0,
 697            and we will need the original value of op0 if insv fails.  */
 698         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 699       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 700         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 701
 702       /* If the destination is a paradoxical subreg such that we need a
 703          truncate to the inner mode, perform the insertion on a temporary and
 704          truncate the result to the original destination.  Note that we can't
 705          just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 706          X) 0)) is (reg:N X).  */
 707       if (GET_CODE (xop0) == SUBREG
 708           && REG_P (SUBREG_REG (xop0))
 709           && (!TRULY_NOOP_TRUNCATION
 710               (GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (xop0))),
 711                GET_MODE_BITSIZE (op_mode))))
 712         {
 713           rtx tem = gen_reg_rtx (op_mode);
 714           emit_move_insn (tem, xop0);
 715           xop0 = tem;
 716           copy_back = true;
 717         }
 718
 719       /* On big-endian machines, we count bits from the most significant.
 720          If the bit field insn does not, we must invert.  */
 721
 722       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 723         xbitpos = unit - bitsize - xbitpos;
 724
 725       /* We have been counting XBITPOS within UNIT.
 726          Count instead within the size of the register.  */
 727       if (BITS_BIG_ENDIAN && !MEM_P (xop0))
 728         xbitpos += GET_MODE_BITSIZE (op_mode) - unit;
 729
 730       unit = GET_MODE_BITSIZE (op_mode);
 731
 732       /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 733       value1 = value;
 734       if (GET_MODE (value) != op_mode)
 735         {
 736           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 737             {
 738               /* Optimization: Don't bother really extending VALUE
 739                  if it has all the bits we will actually use.  However,
 740                  if we must narrow it, be sure we do it correctly.  */
 741
 742               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 743                 {
 744                   rtx tmp;
 745
 746                   tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 747                   if (! tmp)
 748                     tmp = simplify_gen_subreg (op_mode,
 749                                                force_reg (GET_MODE (value),
 750                                                           value1),
 751                                                GET_MODE (value), 0);
 752                   value1 = tmp;
 753                 }
 754               else
 755                 value1 = gen_lowpart (op_mode, value1);
 756             }
 757           else if (CONST_INT_P (value))
 758             value1 = gen_int_mode (INTVAL (value), op_mode);
 759           else
 760             /* Parse phase is supposed to make VALUE's data type
 761                match that of the component reference, which is a type
 762                at least as wide as the field; so VALUE should have
 763                a mode that corresponds to that type.  */
 764             gcc_assert (CONSTANT_P (value));
 765         }
 766
 767       /* If this machine's insv insists on a register,
 768          get VALUE1 into a register.  */
 769       if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
 770              (value1, op_mode)))
 771         value1 = force_reg (op_mode, value1);
 772
 773       pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
 774       if (pat)
 775         {
 776           emit_insn (pat);
 777
 778           if (copy_back)
 779             convert_move (op0, xop0, true);
 780           return true;
 781         }
 782       delete_insns_since (last);
 783     }
 784
 785   /* If OP0 is a memory, try copying it to a register and seeing if a
 786      cheap register alternative is available.  */
 787   if (HAVE_insv && MEM_P (op0))
 788     {
 789       enum machine_mode bestmode;
 790
 791       /* Get the mode to use for inserting into this field.  If OP0 is
 792          BLKmode, get the smallest mode consistent with the alignment. If
 793          OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
 794          mode. Otherwise, use the smallest mode containing the field.  */
 795
 796       if (GET_MODE (op0) == BLKmode
 797           || (op_mode != MAX_MACHINE_MODE
 798               && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode)))
 799         bestmode = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0),
 800                                   (op_mode == MAX_MACHINE_MODE
 801                                    ? VOIDmode : op_mode),
 802                                   MEM_VOLATILE_P (op0));
 803       else
 804         bestmode = GET_MODE (op0);
 805
 806       if (bestmode != VOIDmode
 807           && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode)
 808           && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
 809                && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
 810         {
 811           rtx last, tempreg, xop0;
 812           unsigned HOST_WIDE_INT xoffset, xbitpos;
 813
 814           last = get_last_insn ();
 815
 816           /* Adjust address to point to the containing unit of
 817              that mode.  Compute the offset as a multiple of this unit,
 818              counting in bytes.  */
 819           unit = GET_MODE_BITSIZE (bestmode);
 820           xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 821           xbitpos = bitnum % unit;
 822           xop0 = adjust_address (op0, bestmode, xoffset);
 823
 824           /* Fetch that unit, store the bitfield in it, then store
 825              the unit.  */
 826           tempreg = copy_to_reg (xop0);
 827           if (store_bit_field_1 (tempreg, bitsize, xbitpos,
 828                                  fieldmode, orig_value, false))
 829             {
 830               emit_move_insn (xop0, tempreg);
 831               return true;
 832             }
 833           delete_insns_since (last);
 834         }
 835     }
 836
 837   if (!fallback_p)
 838     return false;
 839
 840   store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 841   return true;
 842 }
 843
 844 /* Generate code to store value from rtx VALUE
 845    into a bit-field within structure STR_RTX
 846    containing BITSIZE bits starting at bit BITNUM.
 847    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.  */
 848
 849 void
 850 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 851                  unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
 852                  rtx value)
 853 {
 854   if (!store_bit_field_1 (str_rtx, bitsize, bitnum, fieldmode, value, true))
 855     gcc_unreachable ();
 856 }
 857 \f
 858 /* Use shifts and boolean operations to store VALUE
 859    into a bit field of width BITSIZE
 860    in a memory location specified by OP0 except offset by OFFSET bytes.
 861      (OFFSET must be 0 if OP0 is a register.)
 862    The field starts at position BITPOS within the byte.
 863     (If OP0 is a register, it may be a full word or a narrower mode,
 864      but BITPOS still counts within a full word,
 865      which is significant on bigendian machines.)  */
 866
 867 static void
 868 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
 869                        unsigned HOST_WIDE_INT bitsize,
 870                        unsigned HOST_WIDE_INT bitpos, rtx value)
 871 {
 872   enum machine_mode mode;
 873   unsigned int total_bits = BITS_PER_WORD;
 874   rtx temp;
 875   int all_zero = 0;
 876   int all_one = 0;
 877
 878   /* There is a case not handled here:
 879      a structure with a known alignment of just a halfword
 880      and a field split across two aligned halfwords within the structure.
 881      Or likewise a structure with a known alignment of just a byte
 882      and a field split across two bytes.
 883      Such cases are not supposed to be able to occur.  */
 884
 885   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 886     {
 887       gcc_assert (!offset);
 888       /* Special treatment for a bit field split across two registers.  */
 889       if (bitsize + bitpos > BITS_PER_WORD)
 890         {
 891           store_split_bit_field (op0, bitsize, bitpos, value);
 892           return;
 893         }
 894     }
 895   else
 896     {
 897       /* Get the proper mode to use for this field.  We want a mode that
 898          includes the entire field.  If such a mode would be larger than
 899          a word, we won't be doing the extraction the normal way.
 900          We don't want a mode bigger than the destination.  */
 901
 902       mode = GET_MODE (op0);
 903       if (GET_MODE_BITSIZE (mode) == 0
 904           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 905         mode = word_mode;
 906       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 907                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 908
 909       if (mode == VOIDmode)
 910         {
 911           /* The only way this should occur is if the field spans word
 912              boundaries.  */
 913           store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
 914                                  value);
 915           return;
 916         }
 917
 918       total_bits = GET_MODE_BITSIZE (mode);
 919
 920       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 921          be in the range 0 to total_bits-1, and put any excess bytes in
 922          OFFSET.  */
 923       if (bitpos >= total_bits)
 924         {
 925           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 926           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 927                      * BITS_PER_UNIT);
 928         }
 929
 930       /* Get ref to an aligned byte, halfword, or word containing the field.
 931          Adjust BITPOS to be position within a word,
 932          and OFFSET to be the offset of that word.
 933          Then alter OP0 to refer to that word.  */
 934       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 935       offset -= (offset % (total_bits / BITS_PER_UNIT));
 936       op0 = adjust_address (op0, mode, offset);
 937     }
 938
 939   mode = GET_MODE (op0);
 940
 941   /* Now MODE is either some integral mode for a MEM as OP0,
 942      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 943      The bit field is contained entirely within OP0.
 944      BITPOS is the starting bit number within OP0.
 945      (OP0's mode may actually be narrower than MODE.)  */
 946
 947   if (BYTES_BIG_ENDIAN)
 948       /* BITPOS is the distance between our msb
 949          and that of the containing datum.
 950          Convert it to the distance from the lsb.  */
 951       bitpos = total_bits - bitsize - bitpos;
 952
 953   /* Now BITPOS is always the distance between our lsb
 954      and that of OP0.  */
 955
 956   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 957      we must first convert its mode to MODE.  */
 958
 959   if (CONST_INT_P (value))
 960     {
 961       HOST_WIDE_INT v = INTVAL (value);
 962
 963       if (bitsize < HOST_BITS_PER_WIDE_INT)
 964         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 965
 966       if (v == 0)
 967         all_zero = 1;
 968       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 969                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 970                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 971         all_one = 1;
 972
 973       value = lshift_value (mode, value, bitpos, bitsize);
 974     }
 975   else
 976     {
 977       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 978                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 979
 980       if (GET_MODE (value) != mode)
 981         value = convert_to_mode (mode, value, 1);
 982
 983       if (must_and)
 984         value = expand_binop (mode, and_optab, value,
 985                               mask_rtx (mode, 0, bitsize, 0),
 986                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
 987       if (bitpos > 0)
 988         value = expand_shift (LSHIFT_EXPR, mode, value,
 989                               build_int_cst (NULL_TREE, bitpos), NULL_RTX, 1);
 990     }
 991
 992   /* Now clear the chosen bits in OP0,
 993      except that if VALUE is -1 we need not bother.  */
 994   /* We keep the intermediates in registers to allow CSE to combine
 995      consecutive bitfield assignments.  */
 996
 997   temp = force_reg (mode, op0);
 998
 999   if (! all_one)
1000     {
1001       temp = expand_binop (mode, and_optab, temp,
1002                            mask_rtx (mode, bitpos, bitsize, 1),
1003                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1004       temp = force_reg (mode, temp);
1005     }
1006
1007   /* Now logical-or VALUE into OP0, unless it is zero.  */
1008
1009   if (! all_zero)
1010     {
1011       temp = expand_binop (mode, ior_optab, temp, value,
1012                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1013       temp = force_reg (mode, temp);
1014     }
1015
1016   if (op0 != temp)
1017     {
1018       op0 = copy_rtx (op0);
1019       emit_move_insn (op0, temp);
1020     }
1021 }
1022 \f
1023 /* Store a bit field that is split across multiple accessible memory objects.
1024
1025    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1026    BITSIZE is the field width; BITPOS the position of its first bit
1027    (within the word).
1028    VALUE is the value to store.
1029
1030    This does not yet handle fields wider than BITS_PER_WORD.  */
1031
1032 static void
1033 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1034                        unsigned HOST_WIDE_INT bitpos, rtx value)
1035 {
1036   unsigned int unit;
1037   unsigned int bitsdone = 0;
1038
1039   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1040      much at a time.  */
1041   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1042     unit = BITS_PER_WORD;
1043   else
1044     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1045
1046   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1047      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1048      that VALUE might be a floating-point constant.  */
1049   if (CONSTANT_P (value) && !CONST_INT_P (value))
1050     {
1051       rtx word = gen_lowpart_common (word_mode, value);
1052
1053       if (word && (value != word))
1054         value = word;
1055       else
1056         value = gen_lowpart_common (word_mode,
1057                                     force_reg (GET_MODE (value) != VOIDmode
1058                                                ? GET_MODE (value)
1059                                                : word_mode, value));
1060     }
1061
1062   while (bitsdone < bitsize)
1063     {
1064       unsigned HOST_WIDE_INT thissize;
1065       rtx part, word;
1066       unsigned HOST_WIDE_INT thispos;
1067       unsigned HOST_WIDE_INT offset;
1068
1069       offset = (bitpos + bitsdone) / unit;
1070       thispos = (bitpos + bitsdone) % unit;
1071
1072       /* THISSIZE must not overrun a word boundary.  Otherwise,
1073          store_fixed_bit_field will call us again, and we will mutually
1074          recurse forever.  */
1075       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1076       thissize = MIN (thissize, unit - thispos);
1077
1078       if (BYTES_BIG_ENDIAN)
1079         {
1080           int total_bits;
1081
1082           /* We must do an endian conversion exactly the same way as it is
1083              done in extract_bit_field, so that the two calls to
1084              extract_fixed_bit_field will have comparable arguments.  */
1085           if (!MEM_P (value) || GET_MODE (value) == BLKmode)
1086             total_bits = BITS_PER_WORD;
1087           else
1088             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1089
1090           /* Fetch successively less significant portions.  */
1091           if (CONST_INT_P (value))
1092             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1093                              >> (bitsize - bitsdone - thissize))
1094                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1095           else
1096             /* The args are chosen so that the last part includes the
1097                lsb.  Give extract_bit_field the value it needs (with
1098                endianness compensation) to fetch the piece we want.  */
1099             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1100                                             total_bits - bitsize + bitsdone,
1101                                             NULL_RTX, 1);
1102         }
1103       else
1104         {
1105           /* Fetch successively more significant portions.  */
1106           if (CONST_INT_P (value))
1107             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1108                              >> bitsdone)
1109                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1110           else
1111             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1112                                             bitsdone, NULL_RTX, 1);
1113         }
1114
1115       /* If OP0 is a register, then handle OFFSET here.
1116
1117          When handling multiword bitfields, extract_bit_field may pass
1118          down a word_mode SUBREG of a larger REG for a bitfield that actually
1119          crosses a word boundary.  Thus, for a SUBREG, we must find
1120          the current word starting from the base register.  */
1121       if (GET_CODE (op0) == SUBREG)
1122         {
1123           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1124           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1125                                         GET_MODE (SUBREG_REG (op0)));
1126           offset = 0;
1127         }
1128       else if (REG_P (op0))
1129         {
1130           word = operand_subword_force (op0, offset, GET_MODE (op0));
1131           offset = 0;
1132         }
1133       else
1134         word = op0;
1135
1136       /* OFFSET is in UNITs, and UNIT is in bits.
1137          store_fixed_bit_field wants offset in bytes.  */
1138       store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1139                              thispos, part);
1140       bitsdone += thissize;
1141     }
1142 }
1143 \f
1144 /* A subroutine of extract_bit_field_1 that converts return value X
1145    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1146    to extract_bit_field.  */
1147
1148 static rtx
1149 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1150                              enum machine_mode tmode, bool unsignedp)
1151 {
1152   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1153     return x;
1154
1155   /* If the x mode is not a scalar integral, first convert to the
1156      integer mode of that size and then access it as a floating-point
1157      value via a SUBREG.  */
1158   if (!SCALAR_INT_MODE_P (tmode))
1159     {
1160       enum machine_mode smode;
1161
1162       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1163       x = convert_to_mode (smode, x, unsignedp);
1164       x = force_reg (smode, x);
1165       return gen_lowpart (tmode, x);
1166     }
1167
1168   return convert_to_mode (tmode, x, unsignedp);
1169 }
1170
1171 /* A subroutine of extract_bit_field, with the same arguments.
1172    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1173    if we can find no other means of implementing the operation.
1174    if FALLBACK_P is false, return NULL instead.  */
1175
1176 static rtx
1177 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1178                      unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1179                      enum machine_mode mode, enum machine_mode tmode,
1180                      bool fallback_p)
1181 {
1182   unsigned int unit
1183     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1184   unsigned HOST_WIDE_INT offset, bitpos;
1185   rtx op0 = str_rtx;
1186   enum machine_mode int_mode;
1187   enum machine_mode ext_mode;
1188   enum machine_mode mode1;
1189   enum insn_code icode;
1190   int byte_offset;
1191
1192   if (tmode == VOIDmode)
1193     tmode = mode;
1194
1195   while (GET_CODE (op0) == SUBREG)
1196     {
1197       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1198       op0 = SUBREG_REG (op0);
1199     }
1200
1201   /* If we have an out-of-bounds access to a register, just return an
1202      uninitialized register of the required mode.  This can occur if the
1203      source code contains an out-of-bounds access to a small array.  */
1204   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1205     return gen_reg_rtx (tmode);
1206
1207   if (REG_P (op0)
1208       && mode == GET_MODE (op0)
1209       && bitnum == 0
1210       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1211     {
1212       /* We're trying to extract a full register from itself.  */
1213       return op0;
1214     }
1215
1216   /* See if we can get a better vector mode before extracting.  */
1217   if (VECTOR_MODE_P (GET_MODE (op0))
1218       && !MEM_P (op0)
1219       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1220     {
1221       enum machine_mode new_mode;
1222       int nunits = GET_MODE_NUNITS (GET_MODE (op0));
1223
1224       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1225         new_mode = MIN_MODE_VECTOR_FLOAT;
1226       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1227         new_mode = MIN_MODE_VECTOR_FRACT;
1228       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1229         new_mode = MIN_MODE_VECTOR_UFRACT;
1230       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1231         new_mode = MIN_MODE_VECTOR_ACCUM;
1232       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1233         new_mode = MIN_MODE_VECTOR_UACCUM;
1234       else
1235         new_mode = MIN_MODE_VECTOR_INT;
1236
1237       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1238         if (GET_MODE_NUNITS (new_mode) == nunits
1239             && GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1240             && targetm.vector_mode_supported_p (new_mode))
1241           break;
1242       if (new_mode != VOIDmode)
1243         op0 = gen_lowpart (new_mode, op0);
1244     }
1245
1246   /* Use vec_extract patterns for extracting parts of vectors whenever
1247      available.  */
1248   if (VECTOR_MODE_P (GET_MODE (op0))
1249       && !MEM_P (op0)
1250       && (optab_handler (vec_extract_optab, GET_MODE (op0))->insn_code
1251           != CODE_FOR_nothing)
1252       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1253           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1254     {
1255       enum machine_mode outermode = GET_MODE (op0);
1256       enum machine_mode innermode = GET_MODE_INNER (outermode);
1257       int icode = (int) optab_handler (vec_extract_optab, outermode)->insn_code;
1258       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1259       rtx rtxpos = GEN_INT (pos);
1260       rtx src = op0;
1261       rtx dest = NULL, pat, seq;
1262       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
1263       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
1264       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
1265
1266       if (innermode == tmode || innermode == mode)
1267         dest = target;
1268
1269       if (!dest)
1270         dest = gen_reg_rtx (innermode);
1271
1272       start_sequence ();
1273
1274       if (! (*insn_data[icode].operand[0].predicate) (dest, mode0))
1275         dest = copy_to_mode_reg (mode0, dest);
1276
1277       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
1278         src = copy_to_mode_reg (mode1, src);
1279
1280       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1281         rtxpos = copy_to_mode_reg (mode1, rtxpos);
1282
1283       /* We could handle this, but we should always be called with a pseudo
1284          for our targets and all insns should take them as outputs.  */
1285       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
1286                   && (*insn_data[icode].operand[1].predicate) (src, mode1)
1287                   && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
1288
1289       pat = GEN_FCN (icode) (dest, src, rtxpos);
1290       seq = get_insns ();
1291       end_sequence ();
1292       if (pat)
1293         {
1294           emit_insn (seq);
1295           emit_insn (pat);
1296           if (mode0 != mode)
1297             return gen_lowpart (tmode, dest);
1298           return dest;
1299         }
1300     }
1301
1302   /* Make sure we are playing with integral modes.  Pun with subregs
1303      if we aren't.  */
1304   {
1305     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1306     if (imode != GET_MODE (op0))
1307       {
1308         if (MEM_P (op0))
1309           op0 = adjust_address (op0, imode, 0);
1310         else if (imode != BLKmode)
1311           {
1312             op0 = gen_lowpart (imode, op0);
1313
1314             /* If we got a SUBREG, force it into a register since we
1315                aren't going to be able to do another SUBREG on it.  */
1316             if (GET_CODE (op0) == SUBREG)
1317               op0 = force_reg (imode, op0);
1318           }
1319         else if (REG_P (op0))
1320           {
1321             rtx reg, subreg;
1322             imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1323                                             MODE_INT);
1324             reg = gen_reg_rtx (imode);
1325             subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1326             emit_move_insn (subreg, op0);
1327             op0 = reg;
1328             bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1329           }
1330         else
1331           {
1332             rtx mem = assign_stack_temp (GET_MODE (op0),
1333                                          GET_MODE_SIZE (GET_MODE (op0)), 0);
1334             emit_move_insn (mem, op0);
1335             op0 = adjust_address (mem, BLKmode, 0);
1336           }
1337       }
1338   }
1339
1340   /* We may be accessing data outside the field, which means
1341      we can alias adjacent data.  */
1342   if (MEM_P (op0))
1343     {
1344       op0 = shallow_copy_rtx (op0);
1345       set_mem_alias_set (op0, 0);
1346       set_mem_expr (op0, 0);
1347     }
1348
1349   /* Extraction of a full-word or multi-word value from a structure
1350      in a register or aligned memory can be done with just a SUBREG.
1351      A subword value in the least significant part of a register
1352      can also be extracted with a SUBREG.  For this, we need the
1353      byte offset of the value in op0.  */
1354
1355   bitpos = bitnum % unit;
1356   offset = bitnum / unit;
1357   byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1358
1359   /* If OP0 is a register, BITPOS must count within a word.
1360      But as we have it, it counts within whatever size OP0 now has.
1361      On a bigendian machine, these are not the same, so convert.  */
1362   if (BYTES_BIG_ENDIAN
1363       && !MEM_P (op0)
1364       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1365     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1366
1367   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1368      If that's wrong, the solution is to test for it and set TARGET to 0
1369      if needed.  */
1370
1371   /* Only scalar integer modes can be converted via subregs.  There is an
1372      additional problem for FP modes here in that they can have a precision
1373      which is different from the size.  mode_for_size uses precision, but
1374      we want a mode based on the size, so we must avoid calling it for FP
1375      modes.  */
1376   mode1  = (SCALAR_INT_MODE_P (tmode)
1377             ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1378             : mode);
1379
1380   if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1381         && bitpos % BITS_PER_WORD == 0)
1382        || (mode1 != BLKmode
1383            /* ??? The big endian test here is wrong.  This is correct
1384               if the value is in a register, and if mode_for_size is not
1385               the same mode as op0.  This causes us to get unnecessarily
1386               inefficient code from the Thumb port when -mbig-endian.  */
1387            && (BYTES_BIG_ENDIAN
1388                ? bitpos + bitsize == BITS_PER_WORD
1389                : bitpos == 0)))
1390       && ((!MEM_P (op0)
1391            && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode1),
1392                                      GET_MODE_BITSIZE (GET_MODE (op0)))
1393            && GET_MODE_SIZE (mode1) != 0
1394            && byte_offset % GET_MODE_SIZE (mode1) == 0)
1395           || (MEM_P (op0)
1396               && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1397                   || (offset * BITS_PER_UNIT % bitsize == 0
1398                       && MEM_ALIGN (op0) % bitsize == 0)))))
1399     {
1400       if (MEM_P (op0))
1401         op0 = adjust_address (op0, mode1, offset);
1402       else if (mode1 != GET_MODE (op0))
1403         {
1404           rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1405                                          byte_offset);
1406           if (sub == NULL)
1407             goto no_subreg_mode_swap;
1408           op0 = sub;
1409         }
1410       if (mode1 != mode)
1411         return convert_to_mode (tmode, op0, unsignedp);
1412       return op0;
1413     }
1414  no_subreg_mode_swap:
1415
1416   /* Handle fields bigger than a word.  */
1417
1418   if (bitsize > BITS_PER_WORD)
1419     {
1420       /* Here we transfer the words of the field
1421          in the order least significant first.
1422          This is because the most significant word is the one which may
1423          be less than full.  */
1424
1425       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1426       unsigned int i;
1427
1428       if (target == 0 || !REG_P (target))
1429         target = gen_reg_rtx (mode);
1430
1431       /* Indicate for flow that the entire target reg is being set.  */
1432       emit_clobber (target);
1433
1434       for (i = 0; i < nwords; i++)
1435         {
1436           /* If I is 0, use the low-order word in both field and target;
1437              if I is 1, use the next to lowest word; and so on.  */
1438           /* Word number in TARGET to use.  */
1439           unsigned int wordnum
1440             = (WORDS_BIG_ENDIAN
1441                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1442                : i);
1443           /* Offset from start of field in OP0.  */
1444           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1445                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1446                                                 * (int) BITS_PER_WORD))
1447                                      : (int) i * BITS_PER_WORD);
1448           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1449           rtx result_part
1450             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1451                                            bitsize - i * BITS_PER_WORD),
1452                                  bitnum + bit_offset, 1, target_part, mode,
1453                                  word_mode);
1454
1455           gcc_assert (target_part);
1456
1457           if (result_part != target_part)
1458             emit_move_insn (target_part, result_part);
1459         }
1460
1461       if (unsignedp)
1462         {
1463           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1464              need to be zero'd out.  */
1465           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1466             {
1467               unsigned int i, total_words;
1468
1469               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1470               for (i = nwords; i < total_words; i++)
1471                 emit_move_insn
1472                   (operand_subword (target,
1473                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1474                                     1, VOIDmode),
1475                    const0_rtx);
1476             }
1477           return target;
1478         }
1479
1480       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1481       target = expand_shift (LSHIFT_EXPR, mode, target,
1482                              build_int_cst (NULL_TREE,
1483                                             GET_MODE_BITSIZE (mode) - bitsize),
1484                              NULL_RTX, 0);
1485       return expand_shift (RSHIFT_EXPR, mode, target,
1486                            build_int_cst (NULL_TREE,
1487                                           GET_MODE_BITSIZE (mode) - bitsize),
1488                            NULL_RTX, 0);
1489     }
1490
1491   /* From here on we know the desired field is smaller than a word.  */
1492
1493   /* Check if there is a correspondingly-sized integer field, so we can
1494      safely extract it as one size of integer, if necessary; then
1495      truncate or extend to the size that is wanted; then use SUBREGs or
1496      convert_to_mode to get one of the modes we really wanted.  */
1497
1498   int_mode = int_mode_for_mode (tmode);
1499   if (int_mode == BLKmode)
1500     int_mode = int_mode_for_mode (mode);
1501   /* Should probably push op0 out to memory and then do a load.  */
1502   gcc_assert (int_mode != BLKmode);
1503
1504   /* OFFSET is the number of words or bytes (UNIT says which)
1505      from STR_RTX to the first word or byte containing part of the field.  */
1506   if (!MEM_P (op0))
1507     {
1508       if (offset != 0
1509           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1510         {
1511           if (!REG_P (op0))
1512             op0 = copy_to_reg (op0);
1513           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1514                                 op0, (offset * UNITS_PER_WORD));
1515         }
1516       offset = 0;
1517     }
1518
1519   /* Now OFFSET is nonzero only for memory operands.  */
1520   ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0);
1521   icode = unsignedp ? CODE_FOR_extzv : CODE_FOR_extv;
1522   if (ext_mode != MAX_MACHINE_MODE
1523       && bitsize > 0
1524       && GET_MODE_BITSIZE (ext_mode) >= bitsize
1525       /* If op0 is a register, we need it in EXT_MODE to make it
1526          acceptable to the format of ext(z)v.  */
1527       && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1528       && !((REG_P (op0) || GET_CODE (op0) == SUBREG)
1529            && (bitsize + bitpos > GET_MODE_BITSIZE (ext_mode)))
1530       && check_predicate_volatile_ok (icode, 1, op0, GET_MODE (op0)))
1531     {
1532       unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1533       rtx bitsize_rtx, bitpos_rtx;
1534       rtx last = get_last_insn ();
1535       rtx xop0 = op0;
1536       rtx xtarget = target;
1537       rtx xspec_target = target;
1538       rtx xspec_target_subreg = 0;
1539       rtx pat;
1540
1541       /* If op0 is a register, we need it in EXT_MODE to make it
1542          acceptable to the format of ext(z)v.  */
1543       if (REG_P (xop0) && GET_MODE (xop0) != ext_mode)
1544         xop0 = gen_lowpart_SUBREG (ext_mode, xop0);
1545       if (MEM_P (xop0))
1546         /* Get ref to first byte containing part of the field.  */
1547         xop0 = adjust_address (xop0, byte_mode, xoffset);
1548
1549       /* On big-endian machines, we count bits from the most significant.
1550          If the bit field insn does not, we must invert.  */
1551       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1552         xbitpos = unit - bitsize - xbitpos;
1553
1554       /* Now convert from counting within UNIT to counting in EXT_MODE.  */
1555       if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1556         xbitpos += GET_MODE_BITSIZE (ext_mode) - unit;
1557
1558       unit = GET_MODE_BITSIZE (ext_mode);
1559
1560       if (xtarget == 0)
1561         xtarget = xspec_target = gen_reg_rtx (tmode);
1562
1563       if (GET_MODE (xtarget) != ext_mode)
1564         {
1565           /* Don't use LHS paradoxical subreg if explicit truncation is needed
1566              between the mode of the extraction (word_mode) and the target
1567              mode.  Instead, create a temporary and use convert_move to set
1568              the target.  */
1569           if (REG_P (xtarget)
1570               && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (GET_MODE (xtarget)),
1571                                         GET_MODE_BITSIZE (ext_mode)))
1572             {
1573               xtarget = gen_lowpart (ext_mode, xtarget);
1574               if (GET_MODE_SIZE (ext_mode)
1575                   > GET_MODE_SIZE (GET_MODE (xspec_target)))
1576                 xspec_target_subreg = xtarget;
1577             }
1578           else
1579             xtarget = gen_reg_rtx (ext_mode);
1580         }
1581
1582       /* If this machine's ext(z)v insists on a register target,
1583          make sure we have one.  */
1584       if (!insn_data[(int) icode].operand[0].predicate (xtarget, ext_mode))
1585         xtarget = gen_reg_rtx (ext_mode);
1586
1587       bitsize_rtx = GEN_INT (bitsize);
1588       bitpos_rtx = GEN_INT (xbitpos);
1589
1590       pat = (unsignedp
1591              ? gen_extzv (xtarget, xop0, bitsize_rtx, bitpos_rtx)
1592              : gen_extv (xtarget, xop0, bitsize_rtx, bitpos_rtx));
1593       if (pat)
1594         {
1595           emit_insn (pat);
1596           if (xtarget == xspec_target)
1597             return xtarget;
1598           if (xtarget == xspec_target_subreg)
1599             return xspec_target;
1600           return convert_extracted_bit_field (xtarget, mode, tmode, unsignedp);
1601         }
1602       delete_insns_since (last);
1603     }
1604
1605   /* If OP0 is a memory, try copying it to a register and seeing if a
1606      cheap register alternative is available.  */
1607   if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0))
1608     {
1609       enum machine_mode bestmode;
1610
1611       /* Get the mode to use for inserting into this field.  If
1612          OP0 is BLKmode, get the smallest mode consistent with the
1613          alignment. If OP0 is a non-BLKmode object that is no
1614          wider than EXT_MODE, use its mode. Otherwise, use the
1615          smallest mode containing the field.  */
1616
1617       if (GET_MODE (op0) == BLKmode
1618           || (ext_mode != MAX_MACHINE_MODE
1619               && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode)))
1620         bestmode = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0),
1621                                   (ext_mode == MAX_MACHINE_MODE
1622                                    ? VOIDmode : ext_mode),
1623                                   MEM_VOLATILE_P (op0));
1624       else
1625         bestmode = GET_MODE (op0);
1626
1627       if (bestmode != VOIDmode
1628           && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
1629                && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
1630         {
1631           unsigned HOST_WIDE_INT xoffset, xbitpos;
1632
1633           /* Compute the offset as a multiple of this unit,
1634              counting in bytes.  */
1635           unit = GET_MODE_BITSIZE (bestmode);
1636           xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1637           xbitpos = bitnum % unit;
1638
1639           /* Make sure the register is big enough for the whole field.  */
1640           if (xoffset * BITS_PER_UNIT + unit
1641               >= offset * BITS_PER_UNIT + bitsize)
1642             {
1643               rtx last, result, xop0;
1644
1645               last = get_last_insn ();
1646
1647               /* Fetch it to a register in that size.  */
1648               xop0 = adjust_address (op0, bestmode, xoffset);
1649               xop0 = force_reg (bestmode, xop0);
1650               result = extract_bit_field_1 (xop0, bitsize, xbitpos,
1651                                             unsignedp, target,
1652                                             mode, tmode, false);
1653               if (result)
1654                 return result;
1655
1656               delete_insns_since (last);
1657             }
1658         }
1659     }
1660
1661   if (!fallback_p)
1662     return NULL;
1663
1664   target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1665                                     bitpos, target, unsignedp);
1666   return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1667 }
1668
1669 /* Generate code to extract a byte-field from STR_RTX
1670    containing BITSIZE bits, starting at BITNUM,
1671    and put it in TARGET if possible (if TARGET is nonzero).
1672    Regardless of TARGET, we return the rtx for where the value is placed.
1673
1674    STR_RTX is the structure containing the byte (a REG or MEM).
1675    UNSIGNEDP is nonzero if this is an unsigned bit field.
1676    MODE is the natural mode of the field value once extracted.
1677    TMODE is the mode the caller would like the value to have;
1678    but the value may be returned with type MODE instead.
1679
1680    If a TARGET is specified and we can store in it at no extra cost,
1681    we do so, and return TARGET.
1682    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1683    if they are equally easy.  */
1684
1685 rtx
1686 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1687                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1688                    enum machine_mode mode, enum machine_mode tmode)
1689 {
1690   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1691                               target, mode, tmode, true);
1692 }
1693 \f
1694 /* Extract a bit field using shifts and boolean operations
1695    Returns an rtx to represent the value.
1696    OP0 addresses a register (word) or memory (byte).
1697    BITPOS says which bit within the word or byte the bit field starts in.
1698    OFFSET says how many bytes farther the bit field starts;
1699     it is 0 if OP0 is a register.
1700    BITSIZE says how many bits long the bit field is.
1701     (If OP0 is a register, it may be narrower than a full word,
1702      but BITPOS still counts within a full word,
1703      which is significant on bigendian machines.)
1704
1705    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1706    If TARGET is nonzero, attempts to store the value there
1707    and return TARGET, but this is not guaranteed.
1708    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1709
1710 static rtx
1711 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1712                          unsigned HOST_WIDE_INT offset,
1713                          unsigned HOST_WIDE_INT bitsize,
1714                          unsigned HOST_WIDE_INT bitpos, rtx target,
1715                          int unsignedp)
1716 {
1717   unsigned int total_bits = BITS_PER_WORD;
1718   enum machine_mode mode;
1719
1720   if (GET_CODE (op0) == SUBREG || REG_P (op0))
1721     {
1722       /* Special treatment for a bit field split across two registers.  */
1723       if (bitsize + bitpos > BITS_PER_WORD)
1724         return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1725     }
1726   else
1727     {
1728       /* Get the proper mode to use for this field.  We want a mode that
1729          includes the entire field.  If such a mode would be larger than
1730          a word, we won't be doing the extraction the normal way.  */
1731
1732       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1733                             MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1734
1735       if (mode == VOIDmode)
1736         /* The only way this should occur is if the field spans word
1737            boundaries.  */
1738         return extract_split_bit_field (op0, bitsize,
1739                                         bitpos + offset * BITS_PER_UNIT,
1740                                         unsignedp);
1741
1742       total_bits = GET_MODE_BITSIZE (mode);
1743
1744       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1745          be in the range 0 to total_bits-1, and put any excess bytes in
1746          OFFSET.  */
1747       if (bitpos >= total_bits)
1748         {
1749           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1750           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1751                      * BITS_PER_UNIT);
1752         }
1753
1754       /* Get ref to an aligned byte, halfword, or word containing the field.
1755          Adjust BITPOS to be position within a word,
1756          and OFFSET to be the offset of that word.
1757          Then alter OP0 to refer to that word.  */
1758       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1759       offset -= (offset % (total_bits / BITS_PER_UNIT));
1760       op0 = adjust_address (op0, mode, offset);
1761     }
1762
1763   mode = GET_MODE (op0);
1764
1765   if (BYTES_BIG_ENDIAN)
1766     /* BITPOS is the distance between our msb and that of OP0.
1767        Convert it to the distance from the lsb.  */
1768     bitpos = total_bits - bitsize - bitpos;
1769
1770   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1771      We have reduced the big-endian case to the little-endian case.  */
1772
1773   if (unsignedp)
1774     {
1775       if (bitpos)
1776         {
1777           /* If the field does not already start at the lsb,
1778              shift it so it does.  */
1779           tree amount = build_int_cst (NULL_TREE, bitpos);
1780           /* Maybe propagate the target for the shift.  */
1781           /* But not if we will return it--could confuse integrate.c.  */
1782           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1783           if (tmode != mode) subtarget = 0;
1784           op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1785         }
1786       /* Convert the value to the desired mode.  */
1787       if (mode != tmode)
1788         op0 = convert_to_mode (tmode, op0, 1);
1789
1790       /* Unless the msb of the field used to be the msb when we shifted,
1791          mask out the upper bits.  */
1792
1793       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1794         return expand_binop (GET_MODE (op0), and_optab, op0,
1795                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1796                              target, 1, OPTAB_LIB_WIDEN);
1797       return op0;
1798     }
1799
1800   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1801      then arithmetic-shift its lsb to the lsb of the word.  */
1802   op0 = force_reg (mode, op0);
1803   if (mode != tmode)
1804     target = 0;
1805
1806   /* Find the narrowest integer mode that contains the field.  */
1807
1808   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1809        mode = GET_MODE_WIDER_MODE (mode))
1810     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1811       {
1812         op0 = convert_to_mode (mode, op0, 0);
1813         break;
1814       }
1815
1816   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1817     {
1818       tree amount
1819         = build_int_cst (NULL_TREE,
1820                          GET_MODE_BITSIZE (mode) - (bitsize + bitpos));
1821       /* Maybe propagate the target for the shift.  */
1822       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1823       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1824     }
1825
1826   return expand_shift (RSHIFT_EXPR, mode, op0,
1827                        build_int_cst (NULL_TREE,
1828                                       GET_MODE_BITSIZE (mode) - bitsize),
1829                        target, 0);
1830 }
1831 \f
1832 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1833    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1834    complement of that if COMPLEMENT.  The mask is truncated if
1835    necessary to the width of mode MODE.  The mask is zero-extended if
1836    BITSIZE+BITPOS is too small for MODE.  */
1837
1838 static rtx
1839 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1840 {
1841   double_int mask;
1842
1843   mask = double_int_mask (bitsize);
1844   mask = double_int_lshift (mask, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1845
1846   if (complement)
1847     mask = double_int_not (mask);
1848
1849   return immed_double_int_const (mask, mode);
1850 }
1851
1852 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1853    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1854
1855 static rtx
1856 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1857 {
1858   double_int val;
1859
1860   val = double_int_zext (uhwi_to_double_int (INTVAL (value)), bitsize);
1861   val = double_int_lshift (val, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1862
1863   return immed_double_int_const (val, mode);
1864 }
1865 \f
1866 /* Extract a bit field that is split across two words
1867    and return an RTX for the result.
1868
1869    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1870    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1871    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1872
1873 static rtx
1874 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1875                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1876 {
1877   unsigned int unit;
1878   unsigned int bitsdone = 0;
1879   rtx result = NULL_RTX;
1880   int first = 1;
1881
1882   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1883      much at a time.  */
1884   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1885     unit = BITS_PER_WORD;
1886   else
1887     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1888
1889   while (bitsdone < bitsize)
1890     {
1891       unsigned HOST_WIDE_INT thissize;
1892       rtx part, word;
1893       unsigned HOST_WIDE_INT thispos;
1894       unsigned HOST_WIDE_INT offset;
1895
1896       offset = (bitpos + bitsdone) / unit;
1897       thispos = (bitpos + bitsdone) % unit;
1898
1899       /* THISSIZE must not overrun a word boundary.  Otherwise,
1900          extract_fixed_bit_field will call us again, and we will mutually
1901          recurse forever.  */
1902       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1903       thissize = MIN (thissize, unit - thispos);
1904
1905       /* If OP0 is a register, then handle OFFSET here.
1906
1907          When handling multiword bitfields, extract_bit_field may pass
1908          down a word_mode SUBREG of a larger REG for a bitfield that actually
1909          crosses a word boundary.  Thus, for a SUBREG, we must find
1910          the current word starting from the base register.  */
1911       if (GET_CODE (op0) == SUBREG)
1912         {
1913           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1914           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1915                                         GET_MODE (SUBREG_REG (op0)));
1916           offset = 0;
1917         }
1918       else if (REG_P (op0))
1919         {
1920           word = operand_subword_force (op0, offset, GET_MODE (op0));
1921           offset = 0;
1922         }
1923       else
1924         word = op0;
1925
1926       /* Extract the parts in bit-counting order,
1927          whose meaning is determined by BYTES_PER_UNIT.
1928          OFFSET is in UNITs, and UNIT is in bits.
1929          extract_fixed_bit_field wants offset in bytes.  */
1930       part = extract_fixed_bit_field (word_mode, word,
1931                                       offset * unit / BITS_PER_UNIT,
1932                                       thissize, thispos, 0, 1);
1933       bitsdone += thissize;
1934
1935       /* Shift this part into place for the result.  */
1936       if (BYTES_BIG_ENDIAN)
1937         {
1938           if (bitsize != bitsdone)
1939             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1940                                  build_int_cst (NULL_TREE, bitsize - bitsdone),
1941                                  0, 1);
1942         }
1943       else
1944         {
1945           if (bitsdone != thissize)
1946             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1947                                  build_int_cst (NULL_TREE,
1948                                                 bitsdone - thissize), 0, 1);
1949         }
1950
1951       if (first)
1952         result = part;
1953       else
1954         /* Combine the parts with bitwise or.  This works
1955            because we extracted each part as an unsigned bit field.  */
1956         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1957                                OPTAB_LIB_WIDEN);
1958
1959       first = 0;
1960     }
1961
1962   /* Unsigned bit field: we are done.  */
1963   if (unsignedp)
1964     return result;
1965   /* Signed bit field: sign-extend with two arithmetic shifts.  */
1966   result = expand_shift (LSHIFT_EXPR, word_mode, result,
1967                          build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
1968                          NULL_RTX, 0);
1969   return expand_shift (RSHIFT_EXPR, word_mode, result,
1970                        build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
1971                        NULL_RTX, 0);
1972 }
1973 \f
1974 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
1975    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
1976    MODE, fill the upper bits with zeros.  Fail if the layout of either
1977    mode is unknown (as for CC modes) or if the extraction would involve
1978    unprofitable mode punning.  Return the value on success, otherwise
1979    return null.
1980
1981    This is different from gen_lowpart* in these respects:
1982
1983      - the returned value must always be considered an rvalue
1984
1985      - when MODE is wider than SRC_MODE, the extraction involves
1986        a zero extension
1987
1988      - when MODE is smaller than SRC_MODE, the extraction involves
1989        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
1990
1991    In other words, this routine performs a computation, whereas the
1992    gen_lowpart* routines are conceptually lvalue or rvalue subreg
1993    operations.  */
1994
1995 rtx
1996 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
1997 {
1998   enum machine_mode int_mode, src_int_mode;
1999
2000   if (mode == src_mode)
2001     return src;
2002
2003   if (CONSTANT_P (src))
2004     {
2005       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2006          fails, it will happily create (subreg (symbol_ref)) or similar
2007          invalid SUBREGs.  */
2008       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2009       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2010       if (ret)
2011         return ret;
2012
2013       if (GET_MODE (src) == VOIDmode
2014           || !validate_subreg (mode, src_mode, src, byte))
2015         return NULL_RTX;
2016
2017       src = force_reg (GET_MODE (src), src);
2018       return gen_rtx_SUBREG (mode, src, byte);
2019     }
2020
2021   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2022     return NULL_RTX;
2023
2024   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2025       && MODES_TIEABLE_P (mode, src_mode))
2026     {
2027       rtx x = gen_lowpart_common (mode, src);
2028       if (x)
2029         return x;
2030     }
2031
2032   src_int_mode = int_mode_for_mode (src_mode);
2033   int_mode = int_mode_for_mode (mode);
2034   if (src_int_mode == BLKmode || int_mode == BLKmode)
2035     return NULL_RTX;
2036
2037   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2038     return NULL_RTX;
2039   if (!MODES_TIEABLE_P (int_mode, mode))
2040     return NULL_RTX;
2041
2042   src = gen_lowpart (src_int_mode, src);
2043   src = convert_modes (int_mode, src_int_mode, src, true);
2044   src = gen_lowpart (mode, src);
2045   return src;
2046 }
2047 \f
2048 /* Add INC into TARGET.  */
2049
2050 void
2051 expand_inc (rtx target, rtx inc)
2052 {
2053   rtx value = expand_binop (GET_MODE (target), add_optab,
2054                             target, inc,
2055                             target, 0, OPTAB_LIB_WIDEN);
2056   if (value != target)
2057     emit_move_insn (target, value);
2058 }
2059
2060 /* Subtract DEC from TARGET.  */
2061
2062 void
2063 expand_dec (rtx target, rtx dec)
2064 {
2065   rtx value = expand_binop (GET_MODE (target), sub_optab,
2066                             target, dec,
2067                             target, 0, OPTAB_LIB_WIDEN);
2068   if (value != target)
2069     emit_move_insn (target, value);
2070 }
2071 \f
2072 /* Output a shift instruction for expression code CODE,
2073    with SHIFTED being the rtx for the value to shift,
2074    and AMOUNT the tree for the amount to shift by.
2075    Store the result in the rtx TARGET, if that is convenient.
2076    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2077    Return the rtx for where the value is.  */
2078
2079 rtx
2080 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2081               tree amount, rtx target, int unsignedp)
2082 {
2083   rtx op1, temp = 0;
2084   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2085   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2086   optab lshift_optab = ashl_optab;
2087   optab rshift_arith_optab = ashr_optab;
2088   optab rshift_uns_optab = lshr_optab;
2089   optab lrotate_optab = rotl_optab;
2090   optab rrotate_optab = rotr_optab;
2091   enum machine_mode op1_mode;
2092   int attempt;
2093   bool speed = optimize_insn_for_speed_p ();
2094
2095   op1 = expand_normal (amount);
2096   op1_mode = GET_MODE (op1);
2097
2098   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2099      shift amount is a vector, use the vector/vector shift patterns.  */
2100   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2101     {
2102       lshift_optab = vashl_optab;
2103       rshift_arith_optab = vashr_optab;
2104       rshift_uns_optab = vlshr_optab;
2105       lrotate_optab = vrotl_optab;
2106       rrotate_optab = vrotr_optab;
2107     }
2108
2109   /* Previously detected shift-counts computed by NEGATE_EXPR
2110      and shifted in the other direction; but that does not work
2111      on all machines.  */
2112
2113   if (SHIFT_COUNT_TRUNCATED)
2114     {
2115       if (CONST_INT_P (op1)
2116           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2117               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2118         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2119                        % GET_MODE_BITSIZE (mode));
2120       else if (GET_CODE (op1) == SUBREG
2121                && subreg_lowpart_p (op1)
2122                && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (op1))))
2123         op1 = SUBREG_REG (op1);
2124     }
2125
2126   if (op1 == const0_rtx)
2127     return shifted;
2128
2129   /* Check whether its cheaper to implement a left shift by a constant
2130      bit count by a sequence of additions.  */
2131   if (code == LSHIFT_EXPR
2132       && CONST_INT_P (op1)
2133       && INTVAL (op1) > 0
2134       && INTVAL (op1) < GET_MODE_BITSIZE (mode)
2135       && INTVAL (op1) < MAX_BITS_PER_WORD
2136       && shift_cost[speed][mode][INTVAL (op1)] > INTVAL (op1) * add_cost[speed][mode]
2137       && shift_cost[speed][mode][INTVAL (op1)] != MAX_COST)
2138     {
2139       int i;
2140       for (i = 0; i < INTVAL (op1); i++)
2141         {
2142           temp = force_reg (mode, shifted);
2143           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2144                                   unsignedp, OPTAB_LIB_WIDEN);
2145         }
2146       return shifted;
2147     }
2148
2149   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2150     {
2151       enum optab_methods methods;
2152
2153       if (attempt == 0)
2154         methods = OPTAB_DIRECT;
2155       else if (attempt == 1)
2156         methods = OPTAB_WIDEN;
2157       else
2158         methods = OPTAB_LIB_WIDEN;
2159
2160       if (rotate)
2161         {
2162           /* Widening does not work for rotation.  */
2163           if (methods == OPTAB_WIDEN)
2164             continue;
2165           else if (methods == OPTAB_LIB_WIDEN)
2166             {
2167               /* If we have been unable to open-code this by a rotation,
2168                  do it as the IOR of two shifts.  I.e., to rotate A
2169                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2170                  where C is the bitsize of A.
2171
2172                  It is theoretically possible that the target machine might
2173                  not be able to perform either shift and hence we would
2174                  be making two libcalls rather than just the one for the
2175                  shift (similarly if IOR could not be done).  We will allow
2176                  this extremely unlikely lossage to avoid complicating the
2177                  code below.  */
2178
2179               rtx subtarget = target == shifted ? 0 : target;
2180               tree new_amount, other_amount;
2181               rtx temp1;
2182               tree type = TREE_TYPE (amount);
2183               if (GET_MODE (op1) != TYPE_MODE (type)
2184                   && GET_MODE (op1) != VOIDmode)
2185                 op1 = convert_to_mode (TYPE_MODE (type), op1, 1);
2186               new_amount = make_tree (type, op1);
2187               other_amount
2188                 = fold_build2 (MINUS_EXPR, type,
2189                                build_int_cst (type, GET_MODE_BITSIZE (mode)),
2190                                new_amount);
2191
2192               shifted = force_reg (mode, shifted);
2193
2194               temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2195                                    mode, shifted, new_amount, 0, 1);
2196               temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2197                                     mode, shifted, other_amount, subtarget, 1);
2198               return expand_binop (mode, ior_optab, temp, temp1, target,
2199                                    unsignedp, methods);
2200             }
2201
2202           temp = expand_binop (mode,
2203                                left ? lrotate_optab : rrotate_optab,
2204                                shifted, op1, target, unsignedp, methods);
2205         }
2206       else if (unsignedp)
2207         temp = expand_binop (mode,
2208                              left ? lshift_optab : rshift_uns_optab,
2209                              shifted, op1, target, unsignedp, methods);
2210
2211       /* Do arithmetic shifts.
2212          Also, if we are going to widen the operand, we can just as well
2213          use an arithmetic right-shift instead of a logical one.  */
2214       if (temp == 0 && ! rotate
2215           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2216         {
2217           enum optab_methods methods1 = methods;
2218
2219           /* If trying to widen a log shift to an arithmetic shift,
2220              don't accept an arithmetic shift of the same size.  */
2221           if (unsignedp)
2222             methods1 = OPTAB_MUST_WIDEN;
2223
2224           /* Arithmetic shift */
2225
2226           temp = expand_binop (mode,
2227                                left ? lshift_optab : rshift_arith_optab,
2228                                shifted, op1, target, unsignedp, methods1);
2229         }
2230
2231       /* We used to try extzv here for logical right shifts, but that was
2232          only useful for one machine, the VAX, and caused poor code
2233          generation there for lshrdi3, so the code was deleted and a
2234          define_expand for lshrsi3 was added to vax.md.  */
2235     }
2236
2237   gcc_assert (temp);
2238   return temp;
2239 }
2240 \f
2241 enum alg_code {
2242   alg_unknown,
2243   alg_zero,
2244   alg_m, alg_shift,
2245   alg_add_t_m2,
2246   alg_sub_t_m2,
2247   alg_add_factor,
2248   alg_sub_factor,
2249   alg_add_t2_m,
2250   alg_sub_t2_m,
2251   alg_impossible
2252 };
2253
2254 /* This structure holds the "cost" of a multiply sequence.  The
2255    "cost" field holds the total rtx_cost of every operator in the
2256    synthetic multiplication sequence, hence cost(a op b) is defined
2257    as rtx_cost(op) + cost(a) + cost(b), where cost(leaf) is zero.
2258    The "latency" field holds the minimum possible latency of the
2259    synthetic multiply, on a hypothetical infinitely parallel CPU.
2260    This is the critical path, or the maximum height, of the expression
2261    tree which is the sum of rtx_costs on the most expensive path from
2262    any leaf to the root.  Hence latency(a op b) is defined as zero for
2263    leaves and rtx_cost(op) + max(latency(a), latency(b)) otherwise.  */
2264
2265 struct mult_cost {
2266   short cost;     /* Total rtx_cost of the multiplication sequence.  */
2267   short latency;  /* The latency of the multiplication sequence.  */
2268 };
2269
2270 /* This macro is used to compare a pointer to a mult_cost against an
2271    single integer "rtx_cost" value.  This is equivalent to the macro
2272    CHEAPER_MULT_COST(X,Z) where Z = {Y,Y}.  */
2273 #define MULT_COST_LESS(X,Y) ((X)->cost < (Y)    \
2274                              || ((X)->cost == (Y) && (X)->latency < (Y)))
2275
2276 /* This macro is used to compare two pointers to mult_costs against
2277    each other.  The macro returns true if X is cheaper than Y.
2278    Currently, the cheaper of two mult_costs is the one with the
2279    lower "cost".  If "cost"s are tied, the lower latency is cheaper.  */
2280 #define CHEAPER_MULT_COST(X,Y)  ((X)->cost < (Y)->cost          \
2281                                  || ((X)->cost == (Y)->cost     \
2282                                      && (X)->latency < (Y)->latency))
2283
2284 /* This structure records a sequence of operations.
2285    `ops' is the number of operations recorded.
2286    `cost' is their total cost.
2287    The operations are stored in `op' and the corresponding
2288    logarithms of the integer coefficients in `log'.
2289
2290    These are the operations:
2291    alg_zero             total := 0;
2292    alg_m                total := multiplicand;
2293    alg_shift            total := total * coeff
2294    alg_add_t_m2         total := total + multiplicand * coeff;
2295    alg_sub_t_m2         total := total - multiplicand * coeff;
2296    alg_add_factor       total := total * coeff + total;
2297    alg_sub_factor       total := total * coeff - total;
2298    alg_add_t2_m         total := total * coeff + multiplicand;
2299    alg_sub_t2_m         total := total * coeff - multiplicand;
2300
2301    The first operand must be either alg_zero or alg_m.  */
2302
2303 struct algorithm
2304 {
2305   struct mult_cost cost;
2306   short ops;
2307   /* The size of the OP and LOG fields are not directly related to the
2308      word size, but the worst-case algorithms will be if we have few
2309      consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2310      In that case we will generate shift-by-2, add, shift-by-2, add,...,
2311      in total wordsize operations.  */
2312   enum alg_code op[MAX_BITS_PER_WORD];
2313   char log[MAX_BITS_PER_WORD];
2314 };
2315
2316 /* The entry for our multiplication cache/hash table.  */
2317 struct alg_hash_entry {
2318   /* The number we are multiplying by.  */
2319   unsigned HOST_WIDE_INT t;
2320
2321   /* The mode in which we are multiplying something by T.  */
2322   enum machine_mode mode;
2323
2324   /* The best multiplication algorithm for t.  */
2325   enum alg_code alg;
2326
2327   /* The cost of multiplication if ALG_CODE is not alg_impossible.
2328      Otherwise, the cost within which multiplication by T is
2329      impossible.  */
2330   struct mult_cost cost;
2331
2332   /* OPtimized for speed? */
2333   bool speed;
2334 };
2335
2336 /* The number of cache/hash entries.  */
2337 #if HOST_BITS_PER_WIDE_INT == 64
2338 #define NUM_ALG_HASH_ENTRIES 1031
2339 #else
2340 #define NUM_ALG_HASH_ENTRIES 307
2341 #endif
2342
2343 /* Each entry of ALG_HASH caches alg_code for some integer.  This is
2344    actually a hash table.  If we have a collision, that the older
2345    entry is kicked out.  */
2346 static struct alg_hash_entry alg_hash[NUM_ALG_HASH_ENTRIES];
2347
2348 /* Indicates the type of fixup needed after a constant multiplication.
2349    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2350    the result should be negated, and ADD_VARIANT means that the
2351    multiplicand should be added to the result.  */
2352 enum mult_variant {basic_variant, negate_variant, add_variant};
2353
2354 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2355                         const struct mult_cost *, enum machine_mode mode);
2356 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2357                                  struct algorithm *, enum mult_variant *, int);
2358 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2359                               const struct algorithm *, enum mult_variant);
2360 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2361                                                  int, rtx *, int *, int *);
2362 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2363 static rtx extract_high_half (enum machine_mode, rtx);
2364 static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2365 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2366                                        int, int);
2367 /* Compute and return the best algorithm for multiplying by T.
2368    The algorithm must cost less than cost_limit
2369    If retval.cost >= COST_LIMIT, no algorithm was found and all
2370    other field of the returned struct are undefined.
2371    MODE is the machine mode of the multiplication.  */
2372
2373 static void
2374 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2375             const struct mult_cost *cost_limit, enum machine_mode mode)
2376 {
2377   int m;
2378   struct algorithm *alg_in, *best_alg;
2379   struct mult_cost best_cost;
2380   struct mult_cost new_limit;
2381   int op_cost, op_latency;
2382   unsigned HOST_WIDE_INT orig_t = t;
2383   unsigned HOST_WIDE_INT q;
2384   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2385   int hash_index;
2386   bool cache_hit = false;
2387   enum alg_code cache_alg = alg_zero;
2388   bool speed = optimize_insn_for_speed_p ();
2389
2390   /* Indicate that no algorithm is yet found.  If no algorithm
2391      is found, this value will be returned and indicate failure.  */
2392   alg_out->cost.cost = cost_limit->cost + 1;
2393   alg_out->cost.latency = cost_limit->latency + 1;
2394
2395   if (cost_limit->cost < 0
2396       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2397     return;
2398
2399   /* Restrict the bits of "t" to the multiplication's mode.  */
2400   t &= GET_MODE_MASK (mode);
2401
2402   /* t == 1 can be done in zero cost.  */
2403   if (t == 1)
2404     {
2405       alg_out->ops = 1;
2406       alg_out->cost.cost = 0;
2407       alg_out->cost.latency = 0;
2408       alg_out->op[0] = alg_m;
2409       return;
2410     }
2411
2412   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2413      fail now.  */
2414   if (t == 0)
2415     {
2416       if (MULT_COST_LESS (cost_limit, zero_cost[speed]))
2417         return;
2418       else
2419         {
2420           alg_out->ops = 1;
2421           alg_out->cost.cost = zero_cost[speed];
2422           alg_out->cost.latency = zero_cost[speed];
2423           alg_out->op[0] = alg_zero;
2424           return;
2425         }
2426     }
2427
2428   /* We'll be needing a couple extra algorithm structures now.  */
2429
2430   alg_in = XALLOCA (struct algorithm);
2431   best_alg = XALLOCA (struct algorithm);
2432   best_cost = *cost_limit;
2433
2434   /* Compute the hash index.  */
2435   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2436
2437   /* See if we already know what to do for T.  */
2438   if (alg_hash[hash_index].t == t
2439       && alg_hash[hash_index].mode == mode
2440       && alg_hash[hash_index].mode == mode
2441       && alg_hash[hash_index].speed == speed
2442       && alg_hash[hash_index].alg != alg_unknown)
2443     {
2444       cache_alg = alg_hash[hash_index].alg;
2445
2446       if (cache_alg == alg_impossible)
2447         {
2448           /* The cache tells us that it's impossible to synthesize
2449              multiplication by T within alg_hash[hash_index].cost.  */
2450           if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit))
2451             /* COST_LIMIT is at least as restrictive as the one
2452                recorded in the hash table, in which case we have no
2453                hope of synthesizing a multiplication.  Just
2454                return.  */
2455             return;
2456
2457           /* If we get here, COST_LIMIT is less restrictive than the
2458              one recorded in the hash table, so we may be able to
2459              synthesize a multiplication.  Proceed as if we didn't
2460              have the cache entry.  */
2461         }
2462       else
2463         {
2464           if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost))
2465             /* The cached algorithm shows that this multiplication
2466                requires more cost than COST_LIMIT.  Just return.  This
2467                way, we don't clobber this cache entry with
2468                alg_impossible but retain useful information.  */
2469             return;
2470
2471           cache_hit = true;
2472
2473           switch (cache_alg)
2474             {
2475             case alg_shift:
2476               goto do_alg_shift;
2477
2478             case alg_add_t_m2:
2479             case alg_sub_t_m2:
2480               goto do_alg_addsub_t_m2;
2481
2482             case alg_add_factor:
2483             case alg_sub_factor:
2484               goto do_alg_addsub_factor;
2485
2486             case alg_add_t2_m:
2487               goto do_alg_add_t2_m;
2488
2489             case alg_sub_t2_m:
2490               goto do_alg_sub_t2_m;
2491
2492             default:
2493               gcc_unreachable ();
2494             }
2495         }
2496     }
2497
2498   /* If we have a group of zero bits at the low-order part of T, try
2499      multiplying by the remaining bits and then doing a shift.  */
2500
2501   if ((t & 1) == 0)
2502     {
2503     do_alg_shift:
2504       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2505       if (m < maxm)
2506         {
2507           q = t >> m;
2508           /* The function expand_shift will choose between a shift and
2509              a sequence of additions, so the observed cost is given as
2510              MIN (m * add_cost[speed][mode], shift_cost[speed][mode][m]).  */
2511           op_cost = m * add_cost[speed][mode];
2512           if (shift_cost[speed][mode][m] < op_cost)
2513             op_cost = shift_cost[speed][mode][m];
2514           new_limit.cost = best_cost.cost - op_cost;
2515           new_limit.latency = best_cost.latency - op_cost;
2516           synth_mult (alg_in, q, &new_limit, mode);
2517
2518           alg_in->cost.cost += op_cost;
2519           alg_in->cost.latency += op_cost;
2520           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2521             {
2522               struct algorithm *x;
2523               best_cost = alg_in->cost;
2524               x = alg_in, alg_in = best_alg, best_alg = x;
2525               best_alg->log[best_alg->ops] = m;
2526               best_alg->op[best_alg->ops] = alg_shift;
2527             }
2528
2529           /* See if treating ORIG_T as a signed number yields a better
2530              sequence.  Try this sequence only for a negative ORIG_T
2531              as it would be useless for a non-negative ORIG_T.  */
2532           if ((HOST_WIDE_INT) orig_t < 0)
2533             {
2534               /* Shift ORIG_T as follows because a right shift of a
2535                  negative-valued signed type is implementation
2536                  defined.  */
2537               q = ~(~orig_t >> m);
2538               /* The function expand_shift will choose between a shift
2539                  and a sequence of additions, so the observed cost is
2540                  given as MIN (m * add_cost[speed][mode],
2541                  shift_cost[speed][mode][m]).  */
2542               op_cost = m * add_cost[speed][mode];
2543               if (shift_cost[speed][mode][m] < op_cost)
2544                 op_cost = shift_cost[speed][mode][m];
2545               new_limit.cost = best_cost.cost - op_cost;
2546               new_limit.latency = best_cost.latency - op_cost;
2547               synth_mult (alg_in, q, &new_limit, mode);
2548
2549               alg_in->cost.cost += op_cost;
2550               alg_in->cost.latency += op_cost;
2551               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2552                 {
2553                   struct algorithm *x;
2554                   best_cost = alg_in->cost;
2555                   x = alg_in, alg_in = best_alg, best_alg = x;
2556                   best_alg->log[best_alg->ops] = m;
2557                   best_alg->op[best_alg->ops] = alg_shift;
2558                 }
2559             }
2560         }
2561       if (cache_hit)
2562         goto done;
2563     }
2564
2565   /* If we have an odd number, add or subtract one.  */
2566   if ((t & 1) != 0)
2567     {
2568       unsigned HOST_WIDE_INT w;
2569
2570     do_alg_addsub_t_m2:
2571       for (w = 1; (w & t) != 0; w <<= 1)
2572         ;
2573       /* If T was -1, then W will be zero after the loop.  This is another
2574          case where T ends with ...111.  Handling this with (T + 1) and
2575          subtract 1 produces slightly better code and results in algorithm
2576          selection much faster than treating it like the ...0111 case
2577          below.  */
2578       if (w == 0
2579           || (w > 2
2580               /* Reject the case where t is 3.
2581                  Thus we prefer addition in that case.  */
2582               && t != 3))
2583         {
2584           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2585
2586           op_cost = add_cost[speed][mode];
2587           new_limit.cost = best_cost.cost - op_cost;
2588           new_limit.latency = best_cost.latency - op_cost;
2589           synth_mult (alg_in, t + 1, &new_limit, mode);
2590
2591           alg_in->cost.cost += op_cost;
2592           alg_in->cost.latency += op_cost;
2593           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2594             {
2595               struct algorithm *x;
2596               best_cost = alg_in->cost;
2597               x = alg_in, alg_in = best_alg, best_alg = x;
2598               best_alg->log[best_alg->ops] = 0;
2599               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2600             }
2601         }
2602       else
2603         {
2604           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2605
2606           op_cost = add_cost[speed][mode];
2607           new_limit.cost = best_cost.cost - op_cost;
2608           new_limit.latency = best_cost.latency - op_cost;
2609           synth_mult (alg_in, t - 1, &new_limit, mode);
2610
2611           alg_in->cost.cost += op_cost;
2612           alg_in->cost.latency += op_cost;
2613           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2614             {
2615               struct algorithm *x;
2616               best_cost = alg_in->cost;
2617               x = alg_in, alg_in = best_alg, best_alg = x;
2618               best_alg->log[best_alg->ops] = 0;
2619               best_alg->op[best_alg->ops] = alg_add_t_m2;
2620             }
2621         }
2622
2623       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2624          quickly with a - a * n for some appropriate constant n.  */
2625       m = exact_log2 (-orig_t + 1);
2626       if (m >= 0 && m < maxm)
2627         {
2628           op_cost = shiftsub1_cost[speed][mode][m];
2629           new_limit.cost = best_cost.cost - op_cost;
2630           new_limit.latency = best_cost.latency - op_cost;
2631           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m, &new_limit, mode);
2632
2633           alg_in->cost.cost += op_cost;
2634           alg_in->cost.latency += op_cost;
2635           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2636             {
2637               struct algorithm *x;
2638               best_cost = alg_in->cost;
2639               x = alg_in, alg_in = best_alg, best_alg = x;
2640               best_alg->log[best_alg->ops] = m;
2641               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2642             }
2643         }
2644
2645       if (cache_hit)
2646         goto done;
2647     }
2648
2649   /* Look for factors of t of the form
2650      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2651      If we find such a factor, we can multiply by t using an algorithm that
2652      multiplies by q, shift the result by m and add/subtract it to itself.
2653
2654      We search for large factors first and loop down, even if large factors
2655      are less probable than small; if we find a large factor we will find a
2656      good sequence quickly, and therefore be able to prune (by decreasing
2657      COST_LIMIT) the search.  */
2658
2659  do_alg_addsub_factor:
2660   for (m = floor_log2 (t - 1); m >= 2; m--)
2661     {
2662       unsigned HOST_WIDE_INT d;
2663
2664       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2665       if (t % d == 0 && t > d && m < maxm
2666           && (!cache_hit || cache_alg == alg_add_factor))
2667         {
2668           /* If the target has a cheap shift-and-add instruction use
2669              that in preference to a shift insn followed by an add insn.
2670              Assume that the shift-and-add is "atomic" with a latency
2671              equal to its cost, otherwise assume that on superscalar
2672              hardware the shift may be executed concurrently with the
2673              earlier steps in the algorithm.  */
2674           op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2675           if (shiftadd_cost[speed][mode][m] < op_cost)
2676             {
2677               op_cost = shiftadd_cost[speed][mode][m];
2678               op_latency = op_cost;
2679             }
2680           else
2681             op_latency = add_cost[speed][mode];
2682
2683           new_limit.cost = best_cost.cost - op_cost;
2684           new_limit.latency = best_cost.latency - op_latency;
2685           synth_mult (alg_in, t / d, &new_limit, mode);
2686
2687           alg_in->cost.cost += op_cost;
2688           alg_in->cost.latency += op_latency;
2689           if (alg_in->cost.latency < op_cost)
2690             alg_in->cost.latency = op_cost;
2691           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2692             {
2693               struct algorithm *x;
2694               best_cost = alg_in->cost;
2695               x = alg_in, alg_in = best_alg, best_alg = x;
2696               best_alg->log[best_alg->ops] = m;
2697               best_alg->op[best_alg->ops] = alg_add_factor;
2698             }
2699           /* Other factors will have been taken care of in the recursion.  */
2700           break;
2701         }
2702
2703       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2704       if (t % d == 0 && t > d && m < maxm
2705           && (!cache_hit || cache_alg == alg_sub_factor))
2706         {
2707           /* If the target has a cheap shift-and-subtract insn use
2708              that in preference to a shift insn followed by a sub insn.
2709              Assume that the shift-and-sub is "atomic" with a latency
2710              equal to it's cost, otherwise assume that on superscalar
2711              hardware the shift may be executed concurrently with the
2712              earlier steps in the algorithm.  */
2713           op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2714           if (shiftsub0_cost[speed][mode][m] < op_cost)
2715             {
2716               op_cost = shiftsub0_cost[speed][mode][m];
2717               op_latency = op_cost;
2718             }
2719           else
2720             op_latency = add_cost[speed][mode];
2721
2722           new_limit.cost = best_cost.cost - op_cost;
2723           new_limit.latency = best_cost.latency - op_latency;
2724           synth_mult (alg_in, t / d, &new_limit, mode);
2725
2726           alg_in->cost.cost += op_cost;
2727           alg_in->cost.latency += op_latency;
2728           if (alg_in->cost.latency < op_cost)
2729             alg_in->cost.latency = op_cost;
2730           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2731             {
2732               struct algorithm *x;
2733               best_cost = alg_in->cost;
2734               x = alg_in, alg_in = best_alg, best_alg = x;
2735               best_alg->log[best_alg->ops] = m;
2736               best_alg->op[best_alg->ops] = alg_sub_factor;
2737             }
2738           break;
2739         }
2740     }
2741   if (cache_hit)
2742     goto done;
2743
2744   /* Try shift-and-add (load effective address) instructions,
2745      i.e. do a*3, a*5, a*9.  */
2746   if ((t & 1) != 0)
2747     {
2748     do_alg_add_t2_m:
2749       q = t - 1;
2750       q = q & -q;
2751       m = exact_log2 (q);
2752       if (m >= 0 && m < maxm)
2753         {
2754           op_cost = shiftadd_cost[speed][mode][m];
2755           new_limit.cost = best_cost.cost - op_cost;
2756           new_limit.latency = best_cost.latency - op_cost;
2757           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2758
2759           alg_in->cost.cost += op_cost;
2760           alg_in->cost.latency += op_cost;
2761           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2762             {
2763               struct algorithm *x;
2764               best_cost = alg_in->cost;
2765               x = alg_in, alg_in = best_alg, best_alg = x;
2766               best_alg->log[best_alg->ops] = m;
2767               best_alg->op[best_alg->ops] = alg_add_t2_m;
2768             }
2769         }
2770       if (cache_hit)
2771         goto done;
2772
2773     do_alg_sub_t2_m:
2774       q = t + 1;
2775       q = q & -q;
2776       m = exact_log2 (q);
2777       if (m >= 0 && m < maxm)
2778         {
2779           op_cost = shiftsub0_cost[speed][mode][m];
2780           new_limit.cost = best_cost.cost - op_cost;
2781           new_limit.latency = best_cost.latency - op_cost;
2782           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2783
2784           alg_in->cost.cost += op_cost;
2785           alg_in->cost.latency += op_cost;
2786           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2787             {
2788               struct algorithm *x;
2789               best_cost = alg_in->cost;
2790               x = alg_in, alg_in = best_alg, best_alg = x;
2791               best_alg->log[best_alg->ops] = m;
2792               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2793             }
2794         }
2795       if (cache_hit)
2796         goto done;
2797     }
2798
2799  done:
2800   /* If best_cost has not decreased, we have not found any algorithm.  */
2801   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2802     {
2803       /* We failed to find an algorithm.  Record alg_impossible for
2804          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2805          we are asked to find an algorithm for T within the same or
2806          lower COST_LIMIT, we can immediately return to the
2807          caller.  */
2808       alg_hash[hash_index].t = t;
2809       alg_hash[hash_index].mode = mode;
2810       alg_hash[hash_index].speed = speed;
2811       alg_hash[hash_index].alg = alg_impossible;
2812       alg_hash[hash_index].cost = *cost_limit;
2813       return;
2814     }
2815
2816   /* Cache the result.  */
2817   if (!cache_hit)
2818     {
2819       alg_hash[hash_index].t = t;
2820       alg_hash[hash_index].mode = mode;
2821       alg_hash[hash_index].speed = speed;
2822       alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2823       alg_hash[hash_index].cost.cost = best_cost.cost;
2824       alg_hash[hash_index].cost.latency = best_cost.latency;
2825     }
2826
2827   /* If we are getting a too long sequence for `struct algorithm'
2828      to record, make this search fail.  */
2829   if (best_alg->ops == MAX_BITS_PER_WORD)
2830     return;
2831
2832   /* Copy the algorithm from temporary space to the space at alg_out.
2833      We avoid using structure assignment because the majority of
2834      best_alg is normally undefined, and this is a critical function.  */
2835   alg_out->ops = best_alg->ops + 1;
2836   alg_out->cost = best_cost;
2837   memcpy (alg_out->op, best_alg->op,
2838           alg_out->ops * sizeof *alg_out->op);
2839   memcpy (alg_out->log, best_alg->log,
2840           alg_out->ops * sizeof *alg_out->log);
2841 }
2842 \f
2843 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2844    Try three variations:
2845
2846        - a shift/add sequence based on VAL itself
2847        - a shift/add sequence based on -VAL, followed by a negation
2848        - a shift/add sequence based on VAL - 1, followed by an addition.
2849
2850    Return true if the cheapest of these cost less than MULT_COST,
2851    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2852
2853 static bool
2854 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2855                      struct algorithm *alg, enum mult_variant *variant,
2856                      int mult_cost)
2857 {
2858   struct algorithm alg2;
2859   struct mult_cost limit;
2860   int op_cost;
2861   bool speed = optimize_insn_for_speed_p ();
2862
2863   /* Fail quickly for impossible bounds.  */
2864   if (mult_cost < 0)
2865     return false;
2866
2867   /* Ensure that mult_cost provides a reasonable upper bound.
2868      Any constant multiplication can be performed with less
2869      than 2 * bits additions.  */
2870   op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[speed][mode];
2871   if (mult_cost > op_cost)
2872     mult_cost = op_cost;
2873
2874   *variant = basic_variant;
2875   limit.cost = mult_cost;
2876   limit.latency = mult_cost;
2877   synth_mult (alg, val, &limit, mode);
2878
2879   /* This works only if the inverted value actually fits in an
2880      `unsigned int' */
2881   if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2882     {
2883       op_cost = neg_cost[speed][mode];
2884       if (MULT_COST_LESS (&alg->cost, mult_cost))
2885         {
2886           limit.cost = alg->cost.cost - op_cost;
2887           limit.latency = alg->cost.latency - op_cost;
2888         }
2889       else
2890         {
2891           limit.cost = mult_cost - op_cost;
2892           limit.latency = mult_cost - op_cost;
2893         }
2894
2895       synth_mult (&alg2, -val, &limit, mode);
2896       alg2.cost.cost += op_cost;
2897       alg2.cost.latency += op_cost;
2898       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2899         *alg = alg2, *variant = negate_variant;
2900     }
2901
2902   /* This proves very useful for division-by-constant.  */
2903   op_cost = add_cost[speed][mode];
2904   if (MULT_COST_LESS (&alg->cost, mult_cost))
2905     {
2906       limit.cost = alg->cost.cost - op_cost;
2907       limit.latency = alg->cost.latency - op_cost;
2908     }
2909   else
2910     {
2911       limit.cost = mult_cost - op_cost;
2912       limit.latency = mult_cost - op_cost;
2913     }
2914
2915   synth_mult (&alg2, val - 1, &limit, mode);
2916   alg2.cost.cost += op_cost;
2917   alg2.cost.latency += op_cost;
2918   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2919     *alg = alg2, *variant = add_variant;
2920
2921   return MULT_COST_LESS (&alg->cost, mult_cost);
2922 }
2923
2924 /* A subroutine of expand_mult, used for constant multiplications.
2925    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2926    convenient.  Use the shift/add sequence described by ALG and apply
2927    the final fixup specified by VARIANT.  */
2928
2929 static rtx
2930 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2931                    rtx target, const struct algorithm *alg,
2932                    enum mult_variant variant)
2933 {
2934   HOST_WIDE_INT val_so_far;
2935   rtx insn, accum, tem;
2936   int opno;
2937   enum machine_mode nmode;
2938
2939   /* Avoid referencing memory over and over and invalid sharing
2940      on SUBREGs.  */
2941   op0 = force_reg (mode, op0);
2942
2943   /* ACCUM starts out either as OP0 or as a zero, depending on
2944      the first operation.  */
2945
2946   if (alg->op[0] == alg_zero)
2947     {
2948       accum = copy_to_mode_reg (mode, const0_rtx);
2949       val_so_far = 0;
2950     }
2951   else if (alg->op[0] == alg_m)
2952     {
2953       accum = copy_to_mode_reg (mode, op0);
2954       val_so_far = 1;
2955     }
2956   else
2957     gcc_unreachable ();
2958
2959   for (opno = 1; opno < alg->ops; opno++)
2960     {
2961       int log = alg->log[opno];
2962       rtx shift_subtarget = optimize ? 0 : accum;
2963       rtx add_target
2964         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2965            && !optimize)
2966           ? target : 0;
2967       rtx accum_target = optimize ? 0 : accum;
2968
2969       switch (alg->op[opno])
2970         {
2971         case alg_shift:
2972           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2973                                 build_int_cst (NULL_TREE, log),
2974                                 NULL_RTX, 0);
2975           val_so_far <<= log;
2976           break;
2977
2978         case alg_add_t_m2:
2979           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2980                               build_int_cst (NULL_TREE, log),
2981                               NULL_RTX, 0);
2982           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2983                                  add_target ? add_target : accum_target);
2984           val_so_far += (HOST_WIDE_INT) 1 << log;
2985           break;
2986
2987         case alg_sub_t_m2:
2988           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2989                               build_int_cst (NULL_TREE, log),
2990                               NULL_RTX, 0);
2991           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2992                                  add_target ? add_target : accum_target);
2993           val_so_far -= (HOST_WIDE_INT) 1 << log;
2994           break;
2995
2996         case alg_add_t2_m:
2997           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2998                                 build_int_cst (NULL_TREE, log),
2999                                 shift_subtarget,
3000                                 0);
3001           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3002                                  add_target ? add_target : accum_target);
3003           val_so_far = (val_so_far << log) + 1;
3004           break;
3005
3006         case alg_sub_t2_m:
3007           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3008                                 build_int_cst (NULL_TREE, log),
3009                                 shift_subtarget, 0);
3010           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3011                                  add_target ? add_target : accum_target);
3012           val_so_far = (val_so_far << log) - 1;
3013           break;
3014
3015         case alg_add_factor:
3016           tem = expand_shift (LSHIFT_EXPR, mode, accum,
3017                               build_int_cst (NULL_TREE, log),
3018                               NULL_RTX, 0);
3019           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3020                                  add_target ? add_target : accum_target);
3021           val_so_far += val_so_far << log;
3022           break;
3023
3024         case alg_sub_factor:
3025           tem = expand_shift (LSHIFT_EXPR, mode, accum,
3026                               build_int_cst (NULL_TREE, log),
3027                               NULL_RTX, 0);
3028           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3029                                  (add_target
3030                                   ? add_target : (optimize ? 0 : tem)));
3031           val_so_far = (val_so_far << log) - val_so_far;
3032           break;
3033
3034         default:
3035           gcc_unreachable ();
3036         }
3037
3038       /* Write a REG_EQUAL note on the last insn so that we can cse
3039          multiplication sequences.  Note that if ACCUM is a SUBREG,
3040          we've set the inner register and must properly indicate
3041          that.  */
3042
3043       tem = op0, nmode = mode;
3044       if (GET_CODE (accum) == SUBREG)
3045         {
3046           nmode = GET_MODE (SUBREG_REG (accum));
3047           tem = gen_lowpart (nmode, op0);
3048         }
3049
3050       insn = get_last_insn ();
3051       set_unique_reg_note (insn, REG_EQUAL,
3052                            gen_rtx_MULT (nmode, tem,
3053                                          GEN_INT (val_so_far)));
3054     }
3055
3056   if (variant == negate_variant)
3057     {
3058       val_so_far = -val_so_far;
3059       accum = expand_unop (mode, neg_optab, accum, target, 0);
3060     }
3061   else if (variant == add_variant)
3062     {
3063       val_so_far = val_so_far + 1;
3064       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3065     }
3066
3067   /* Compare only the bits of val and val_so_far that are significant
3068      in the result mode, to avoid sign-/zero-extension confusion.  */
3069   val &= GET_MODE_MASK (mode);
3070   val_so_far &= GET_MODE_MASK (mode);
3071   gcc_assert (val == val_so_far);
3072
3073   return accum;
3074 }
3075
3076 /* Perform a multiplication and return an rtx for the result.
3077    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3078    TARGET is a suggestion for where to store the result (an rtx).
3079
3080    We check specially for a constant integer as OP1.
3081    If you want this check for OP0 as well, then before calling
3082    you should swap the two operands if OP0 would be constant.  */
3083
3084 rtx
3085 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3086              int unsignedp)
3087 {
3088   enum mult_variant variant;
3089   struct algorithm algorithm;
3090   int max_cost;
3091   bool speed = optimize_insn_for_speed_p ();
3092
3093   /* Handling const0_rtx here allows us to use zero as a rogue value for
3094      coeff below.  */
3095   if (op1 == const0_rtx)
3096     return const0_rtx;
3097   if (op1 == const1_rtx)
3098     return op0;
3099   if (op1 == constm1_rtx)
3100     return expand_unop (mode,
3101                         GET_MODE_CLASS (mode) == MODE_INT
3102                         && !unsignedp && flag_trapv
3103                         ? negv_optab : neg_optab,
3104                         op0, target, 0);
3105
3106   /* These are the operations that are potentially turned into a sequence
3107      of shifts and additions.  */
3108   if (SCALAR_INT_MODE_P (mode)
3109       && (unsignedp || !flag_trapv))
3110     {
3111       HOST_WIDE_INT coeff = 0;
3112       rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3113
3114       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3115          less than or equal in size to `unsigned int' this doesn't matter.
3116          If the mode is larger than `unsigned int', then synth_mult works
3117          only if the constant value exactly fits in an `unsigned int' without
3118          any truncation.  This means that multiplying by negative values does
3119          not work; results are off by 2^32 on a 32 bit machine.  */
3120
3121       if (CONST_INT_P (op1))
3122         {
3123           /* Attempt to handle multiplication of DImode values by negative
3124              coefficients, by performing the multiplication by a positive
3125              multiplier and then inverting the result.  */
3126           if (INTVAL (op1) < 0
3127               && GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3128             {
3129               /* Its safe to use -INTVAL (op1) even for INT_MIN, as the
3130                  result is interpreted as an unsigned coefficient.
3131                  Exclude cost of op0 from max_cost to match the cost
3132                  calculation of the synth_mult.  */
3133               max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET, speed)
3134                          - neg_cost[speed][mode];
3135               if (max_cost > 0
3136                   && choose_mult_variant (mode, -INTVAL (op1), &algorithm,
3137                                           &variant, max_cost))
3138                 {
3139                   rtx temp = expand_mult_const (mode, op0, -INTVAL (op1),
3140                                                 NULL_RTX, &algorithm,
3141                                                 variant);
3142                   return expand_unop (mode, neg_optab, temp, target, 0);
3143                 }
3144             }
3145           else coeff = INTVAL (op1);
3146         }
3147       else if (GET_CODE (op1) == CONST_DOUBLE)
3148         {
3149           /* If we are multiplying in DImode, it may still be a win
3150              to try to work with shifts and adds.  */
3151           if (CONST_DOUBLE_HIGH (op1) == 0
3152               && CONST_DOUBLE_LOW (op1) > 0)
3153             coeff = CONST_DOUBLE_LOW (op1);
3154           else if (CONST_DOUBLE_LOW (op1) == 0
3155                    && EXACT_POWER_OF_2_OR_ZERO_P (CONST_DOUBLE_HIGH (op1)))
3156             {
3157               int shift = floor_log2 (CONST_DOUBLE_HIGH (op1))
3158                           + HOST_BITS_PER_WIDE_INT;
3159               return expand_shift (LSHIFT_EXPR, mode, op0,
3160                                    build_int_cst (NULL_TREE, shift),
3161                                    target, unsignedp);
3162             }
3163         }
3164
3165       /* We used to test optimize here, on the grounds that it's better to
3166          produce a smaller program when -O is not used.  But this causes
3167          such a terrible slowdown sometimes that it seems better to always
3168          use synth_mult.  */
3169       if (coeff != 0)
3170         {
3171           /* Special case powers of two.  */
3172           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3173             return expand_shift (LSHIFT_EXPR, mode, op0,
3174                                  build_int_cst (NULL_TREE, floor_log2 (coeff)),
3175                                  target, unsignedp);
3176
3177           /* Exclude cost of op0 from max_cost to match the cost
3178              calculation of the synth_mult.  */
3179           max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET, speed);
3180           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3181                                    max_cost))
3182             return expand_mult_const (mode, op0, coeff, target,
3183                                       &algorithm, variant);
3184         }
3185     }
3186
3187   if (GET_CODE (op0) == CONST_DOUBLE)
3188     {
3189       rtx temp = op0;
3190       op0 = op1;
3191       op1 = temp;
3192     }
3193
3194   /* Expand x*2.0 as x+x.  */
3195   if (GET_CODE (op1) == CONST_DOUBLE
3196       && SCALAR_FLOAT_MODE_P (mode))
3197     {
3198       REAL_VALUE_TYPE d;
3199       REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3200
3201       if (REAL_VALUES_EQUAL (d, dconst2))
3202         {
3203           op0 = force_reg (GET_MODE (op0), op0);
3204           return expand_binop (mode, add_optab, op0, op0,
3205                                target, unsignedp, OPTAB_LIB_WIDEN);
3206         }
3207     }
3208
3209   /* This used to use umul_optab if unsigned, but for non-widening multiply
3210      there is no difference between signed and unsigned.  */
3211   op0 = expand_binop (mode,
3212                       ! unsignedp
3213                       && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3214                       ? smulv_optab : smul_optab,
3215                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3216   gcc_assert (op0);
3217   return op0;
3218 }
3219
3220 /* Perform a widening multiplication and return an rtx for the result.
3221    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3222    TARGET is a suggestion for where to store the result (an rtx).
3223    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3224    or smul_widen_optab.
3225
3226    We check specially for a constant integer as OP1, comparing the
3227    cost of a widening multiply against the cost of a sequence of shifts
3228    and adds.  */
3229
3230 rtx
3231 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3232                       int unsignedp, optab this_optab)
3233 {
3234   bool speed = optimize_insn_for_speed_p ();
3235
3236   if (CONST_INT_P (op1)
3237       && (INTVAL (op1) >= 0
3238           || GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT))
3239     {
3240       HOST_WIDE_INT coeff = INTVAL (op1);
3241       int max_cost;
3242       enum mult_variant variant;
3243       struct algorithm algorithm;
3244
3245       /* Special case powers of two.  */
3246       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3247         {
3248           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3249           return expand_shift (LSHIFT_EXPR, mode, op0,
3250                                build_int_cst (NULL_TREE, floor_log2 (coeff)),
3251                                target, unsignedp);
3252         }
3253
3254       /* Exclude cost of op0 from max_cost to match the cost
3255          calculation of the synth_mult.  */
3256       max_cost = mul_widen_cost[speed][mode];
3257       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3258                                max_cost))
3259         {
3260           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3261           return expand_mult_const (mode, op0, coeff, target,
3262                                     &algorithm, variant);
3263         }
3264     }
3265   return expand_binop (mode, this_optab, op0, op1, target,
3266                        unsignedp, OPTAB_LIB_WIDEN);
3267 }
3268 \f
3269 /* Return the smallest n such that 2**n >= X.  */
3270
3271 int
3272 ceil_log2 (unsigned HOST_WIDE_INT x)
3273 {
3274   return floor_log2 (x - 1) + 1;
3275 }
3276
3277 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3278    replace division by D, and put the least significant N bits of the result
3279    in *MULTIPLIER_PTR and return the most significant bit.
3280
3281    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3282    needed precision is in PRECISION (should be <= N).
3283
3284    PRECISION should be as small as possible so this function can choose
3285    multiplier more freely.
3286
3287    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3288    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3289
3290    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3291    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3292
3293 static
3294 unsigned HOST_WIDE_INT
3295 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3296                    rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr)
3297 {
3298   HOST_WIDE_INT mhigh_hi, mlow_hi;
3299   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3300   int lgup, post_shift;
3301   int pow, pow2;
3302   unsigned HOST_WIDE_INT nl, dummy1;
3303   HOST_WIDE_INT nh, dummy2;
3304
3305   /* lgup = ceil(log2(divisor)); */
3306   lgup = ceil_log2 (d);
3307
3308   gcc_assert (lgup <= n);
3309
3310   pow = n + lgup;
3311   pow2 = n + lgup - precision;
3312
3313   /* We could handle this with some effort, but this case is much
3314      better handled directly with a scc insn, so rely on caller using
3315      that.  */
3316   gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3317
3318   /* mlow = 2^(N + lgup)/d */
3319  if (pow >= HOST_BITS_PER_WIDE_INT)
3320     {
3321       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3322       nl = 0;
3323     }
3324   else
3325     {
3326       nh = 0;
3327       nl = (unsigned HOST_WIDE_INT) 1 << pow;
3328     }
3329   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3330                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3331
3332   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3333   if (pow2 >= HOST_BITS_PER_WIDE_INT)
3334     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3335   else
3336     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3337   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3338                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3339
3340   gcc_assert (!mhigh_hi || nh - d < d);
3341   gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3342   /* Assert that mlow < mhigh.  */
3343   gcc_assert (mlow_hi < mhigh_hi
3344               || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3345
3346   /* If precision == N, then mlow, mhigh exceed 2^N
3347      (but they do not exceed 2^(N+1)).  */
3348
3349   /* Reduce to lowest terms.  */
3350   for (post_shift = lgup; post_shift > 0; post_shift--)
3351     {
3352       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3353       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3354       if (ml_lo >= mh_lo)
3355         break;
3356
3357       mlow_hi = 0;
3358       mlow_lo = ml_lo;
3359       mhigh_hi = 0;
3360       mhigh_lo = mh_lo;
3361     }
3362
3363   *post_shift_ptr = post_shift;
3364   *lgup_ptr = lgup;
3365   if (n < HOST_BITS_PER_WIDE_INT)
3366     {
3367       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3368       *multiplier_ptr = GEN_INT (mhigh_lo & mask);
3369       return mhigh_lo >= mask;
3370     }
3371   else
3372     {
3373       *multiplier_ptr = GEN_INT (mhigh_lo);
3374       return mhigh_hi;
3375     }
3376 }
3377
3378 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3379    congruent to 1 (mod 2**N).  */
3380
3381 static unsigned HOST_WIDE_INT
3382 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3383 {
3384   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3385
3386   /* The algorithm notes that the choice y = x satisfies
3387      x*y == 1 mod 2^3, since x is assumed odd.
3388      Each iteration doubles the number of bits of significance in y.  */
3389
3390   unsigned HOST_WIDE_INT mask;
3391   unsigned HOST_WIDE_INT y = x;
3392   int nbit = 3;
3393
3394   mask = (n == HOST_BITS_PER_WIDE_INT
3395           ? ~(unsigned HOST_WIDE_INT) 0
3396           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3397
3398   while (nbit < n)
3399     {
3400       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3401       nbit *= 2;
3402     }
3403   return y;
3404 }
3405
3406 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3407    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3408    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3409    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3410    become signed.
3411
3412    The result is put in TARGET if that is convenient.
3413
3414    MODE is the mode of operation.  */
3415
3416 rtx
3417 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3418                              rtx op1, rtx target, int unsignedp)
3419 {
3420   rtx tem;
3421   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3422
3423   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3424                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3425                       NULL_RTX, 0);
3426   tem = expand_and (mode, tem, op1, NULL_RTX);
3427   adj_operand
3428     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3429                      adj_operand);
3430
3431   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3432                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3433                       NULL_RTX, 0);
3434   tem = expand_and (mode, tem, op0, NULL_RTX);
3435   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3436                           target);
3437
3438   return target;
3439 }
3440
3441 /* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */
3442
3443 static rtx
3444 extract_high_half (enum machine_mode mode, rtx op)
3445 {
3446   enum machine_mode wider_mode;
3447
3448   if (mode == word_mode)
3449     return gen_highpart (mode, op);
3450
3451   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3452
3453   wider_mode = GET_MODE_WIDER_MODE (mode);
3454   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3455                      build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode)), 0, 1);
3456   return convert_modes (mode, wider_mode, op, 0);
3457 }
3458
3459 /* Like expand_mult_highpart, but only consider using a multiplication
3460    optab.  OP1 is an rtx for the constant operand.  */
3461
3462 static rtx
3463 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3464                             rtx target, int unsignedp, int max_cost)
3465 {
3466   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3467   enum machine_mode wider_mode;
3468   optab moptab;
3469   rtx tem;
3470   int size;
3471   bool speed = optimize_insn_for_speed_p ();
3472
3473   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3474
3475   wider_mode = GET_MODE_WIDER_MODE (mode);
3476   size = GET_MODE_BITSIZE (mode);
3477
3478   /* Firstly, try using a multiplication insn that only generates the needed
3479      high part of the product, and in the sign flavor of unsignedp.  */
3480   if (mul_highpart_cost[speed][mode] < max_cost)
3481     {
3482       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3483       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3484                           unsignedp, OPTAB_DIRECT);
3485       if (tem)
3486         return tem;
3487     }
3488
3489   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3490      Need to adjust the result after the multiplication.  */
3491   if (size - 1 < BITS_PER_WORD
3492       && (mul_highpart_cost[speed][mode] + 2 * shift_cost[speed][mode][size-1]
3493           + 4 * add_cost[speed][mode] < max_cost))
3494     {
3495       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3496       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3497                           unsignedp, OPTAB_DIRECT);
3498       if (tem)
3499         /* We used the wrong signedness.  Adjust the result.  */
3500         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3501                                             tem, unsignedp);
3502     }
3503
3504   /* Try widening multiplication.  */
3505   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3506   if (optab_handler (moptab, wider_mode)->insn_code != CODE_FOR_nothing
3507       && mul_widen_cost[speed][wider_mode] < max_cost)
3508     {
3509       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3510                           unsignedp, OPTAB_WIDEN);
3511       if (tem)
3512         return extract_high_half (mode, tem);
3513     }
3514
3515   /* Try widening the mode and perform a non-widening multiplication.  */
3516   if (optab_handler (smul_optab, wider_mode)->insn_code != CODE_FOR_nothing
3517       && size - 1 < BITS_PER_WORD
3518       && mul_cost[speed][wider_mode] + shift_cost[speed][mode][size-1] < max_cost)
3519     {
3520       rtx insns, wop0, wop1;
3521
3522       /* We need to widen the operands, for example to ensure the
3523          constant multiplier is correctly sign or zero extended.
3524          Use a sequence to clean-up any instructions emitted by
3525          the conversions if things don't work out.  */
3526       start_sequence ();
3527       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3528       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3529       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3530                           unsignedp, OPTAB_WIDEN);
3531       insns = get_insns ();
3532       end_sequence ();
3533
3534       if (tem)
3535         {
3536           emit_insn (insns);
3537           return extract_high_half (mode, tem);
3538         }
3539     }
3540
3541   /* Try widening multiplication of opposite signedness, and adjust.  */
3542   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3543   if (optab_handler (moptab, wider_mode)->insn_code != CODE_FOR_nothing
3544       && size - 1 < BITS_PER_WORD
3545       && (mul_widen_cost[speed][wider_mode] + 2 * shift_cost[speed][mode][size-1]
3546           + 4 * add_cost[speed][mode] < max_cost))
3547     {
3548       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3549                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3550       if (tem != 0)
3551         {
3552           tem = extract_high_half (mode, tem);
3553           /* We used the wrong signedness.  Adjust the result.  */
3554           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3555                                               target, unsignedp);
3556         }
3557     }
3558
3559   return 0;
3560 }
3561
3562 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3563    putting the high half of the result in TARGET if that is convenient,
3564    and return where the result is.  If the operation can not be performed,
3565    0 is returned.
3566
3567    MODE is the mode of operation and result.
3568
3569    UNSIGNEDP nonzero means unsigned multiply.
3570
3571    MAX_COST is the total allowed cost for the expanded RTL.  */
3572
3573 static rtx
3574 expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3575                       rtx target, int unsignedp, int max_cost)
3576 {
3577   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3578   unsigned HOST_WIDE_INT cnst1;
3579   int extra_cost;
3580   bool sign_adjust = false;
3581   enum mult_variant variant;
3582   struct algorithm alg;
3583   rtx tem;
3584   bool speed = optimize_insn_for_speed_p ();
3585
3586   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3587   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3588   gcc_assert (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT);
3589
3590   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3591
3592   /* We can't optimize modes wider than BITS_PER_WORD.
3593      ??? We might be able to perform double-word arithmetic if
3594      mode == word_mode, however all the cost calculations in
3595      synth_mult etc. assume single-word operations.  */
3596   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3597     return expand_mult_highpart_optab (mode, op0, op1, target,
3598                                        unsignedp, max_cost);
3599
3600   extra_cost = shift_cost[speed][mode][GET_MODE_BITSIZE (mode) - 1];
3601
3602   /* Check whether we try to multiply by a negative constant.  */
3603   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3604     {
3605       sign_adjust = true;
3606       extra_cost += add_cost[speed][mode];
3607     }
3608
3609   /* See whether shift/add multiplication is cheap enough.  */
3610   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3611                            max_cost - extra_cost))
3612     {
3613       /* See whether the specialized multiplication optabs are
3614          cheaper than the shift/add version.  */
3615       tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3616                                         alg.cost.cost + extra_cost);
3617       if (tem)
3618         return tem;
3619
3620       tem = convert_to_mode (wider_mode, op0, unsignedp);
3621       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3622       tem = extract_high_half (mode, tem);
3623
3624       /* Adjust result for signedness.  */
3625       if (sign_adjust)
3626         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3627
3628       return tem;
3629     }
3630   return expand_mult_highpart_optab (mode, op0, op1, target,
3631                                      unsignedp, max_cost);
3632 }
3633
3634
3635 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3636
3637 static rtx
3638 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3639 {
3640   unsigned HOST_WIDE_INT masklow, maskhigh;
3641   rtx result, temp, shift, label;
3642   int logd;
3643
3644   logd = floor_log2 (d);
3645   result = gen_reg_rtx (mode);
3646
3647   /* Avoid conditional branches when they're expensive.  */
3648   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3649       && optimize_insn_for_speed_p ())
3650     {
3651       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3652                                       mode, 0, -1);
3653       if (signmask)
3654         {
3655           signmask = force_reg (mode, signmask);
3656           masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3657           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3658
3659           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3660              which instruction sequence to use.  If logical right shifts
3661              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3662              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3663
3664           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3665           if (optab_handler (lshr_optab, mode)->insn_code == CODE_FOR_nothing
3666               || rtx_cost (temp, SET, optimize_insn_for_speed_p ()) > COSTS_N_INSNS (2))
3667             {
3668               temp = expand_binop (mode, xor_optab, op0, signmask,
3669                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3670               temp = expand_binop (mode, sub_optab, temp, signmask,
3671                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3672               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3673                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3674               temp = expand_binop (mode, xor_optab, temp, signmask,
3675                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3676               temp = expand_binop (mode, sub_optab, temp, signmask,
3677                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3678             }
3679           else
3680             {
3681               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3682                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3683               signmask = force_reg (mode, signmask);
3684
3685               temp = expand_binop (mode, add_optab, op0, signmask,
3686                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3687               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3688                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3689               temp = expand_binop (mode, sub_optab, temp, signmask,
3690                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3691             }
3692           return temp;
3693         }
3694     }
3695
3696   /* Mask contains the mode's signbit and the significant bits of the
3697      modulus.  By including the signbit in the operation, many targets
3698      can avoid an explicit compare operation in the following comparison
3699      against zero.  */
3700
3701   masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3702   if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3703     {
3704       masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3705       maskhigh = -1;
3706     }
3707   else
3708     maskhigh = (HOST_WIDE_INT) -1
3709                  << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3710
3711   temp = expand_binop (mode, and_optab, op0,
3712                        immed_double_const (masklow, maskhigh, mode),
3713                        result, 1, OPTAB_LIB_WIDEN);
3714   if (temp != result)
3715     emit_move_insn (result, temp);
3716
3717   label = gen_label_rtx ();
3718   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3719
3720   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3721                        0, OPTAB_LIB_WIDEN);
3722   masklow = (HOST_WIDE_INT) -1 << logd;
3723   maskhigh = -1;
3724   temp = expand_binop (mode, ior_optab, temp,
3725                        immed_double_const (masklow, maskhigh, mode),
3726                        result, 1, OPTAB_LIB_WIDEN);
3727   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3728                        0, OPTAB_LIB_WIDEN);
3729   if (temp != result)
3730     emit_move_insn (result, temp);
3731   emit_label (label);
3732   return result;
3733 }
3734
3735 /* Expand signed division of OP0 by a power of two D in mode MODE.
3736    This routine is only called for positive values of D.  */
3737
3738 static rtx
3739 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3740 {
3741   rtx temp, label;
3742   tree shift;
3743   int logd;
3744
3745   logd = floor_log2 (d);
3746   shift = build_int_cst (NULL_TREE, logd);
3747
3748   if (d == 2
3749       && BRANCH_COST (optimize_insn_for_speed_p (),
3750                       false) >= 1)
3751     {
3752       temp = gen_reg_rtx (mode);
3753       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3754       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3755                            0, OPTAB_LIB_WIDEN);
3756       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3757     }
3758
3759 #ifdef HAVE_conditional_move
3760   if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3761       >= 2)
3762     {
3763       rtx temp2;
3764
3765       /* ??? emit_conditional_move forces a stack adjustment via
3766          compare_from_rtx so, if the sequence is discarded, it will
3767          be lost.  Do it now instead.  */
3768       do_pending_stack_adjust ();
3769
3770       start_sequence ();
3771       temp2 = copy_to_mode_reg (mode, op0);
3772       temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3773                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3774       temp = force_reg (mode, temp);
3775
3776       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3777       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3778                                      mode, temp, temp2, mode, 0);
3779       if (temp2)
3780         {
3781           rtx seq = get_insns ();
3782           end_sequence ();
3783           emit_insn (seq);
3784           return expand_shift (RSHIFT_EXPR, mode, temp2, shift, NULL_RTX, 0);
3785         }
3786       end_sequence ();
3787     }
3788 #endif
3789
3790   if (BRANCH_COST (optimize_insn_for_speed_p (),
3791                    false) >= 2)
3792     {
3793       int ushift = GET_MODE_BITSIZE (mode) - logd;
3794
3795       temp = gen_reg_rtx (mode);
3796       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3797       if (shift_cost[optimize_insn_for_speed_p ()][mode][ushift] > COSTS_N_INSNS (1))
3798         temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3799                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3800       else
3801         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3802                              build_int_cst (NULL_TREE, ushift),
3803                              NULL_RTX, 1);
3804       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3805                            0, OPTAB_LIB_WIDEN);
3806       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3807     }
3808
3809   label = gen_label_rtx ();
3810   temp = copy_to_mode_reg (mode, op0);
3811   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3812   expand_inc (temp, GEN_INT (d - 1));
3813   emit_label (label);
3814   return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3815 }
3816 \f
3817 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3818    if that is convenient, and returning where the result is.
3819    You may request either the quotient or the remainder as the result;
3820    specify REM_FLAG nonzero to get the remainder.
3821
3822    CODE is the expression code for which kind of division this is;
3823    it controls how rounding is done.  MODE is the machine mode to use.
3824    UNSIGNEDP nonzero means do unsigned division.  */
3825
3826 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3827    and then correct it by or'ing in missing high bits
3828    if result of ANDI is nonzero.
3829    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3830    This could optimize to a bfexts instruction.
3831    But C doesn't use these operations, so their optimizations are
3832    left for later.  */
3833 /* ??? For modulo, we don't actually need the highpart of the first product,
3834    the low part will do nicely.  And for small divisors, the second multiply
3835    can also be a low-part only multiply or even be completely left out.
3836    E.g. to calculate the remainder of a division by 3 with a 32 bit
3837    multiply, multiply with 0x55555556 and extract the upper two bits;
3838    the result is exact for inputs up to 0x1fffffff.
3839    The input range can be reduced by using cross-sum rules.
3840    For odd divisors >= 3, the following table gives right shift counts
3841    so that if a number is shifted by an integer multiple of the given
3842    amount, the remainder stays the same:
3843    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3844    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3845    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3846    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3847    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3848
3849    Cross-sum rules for even numbers can be derived by leaving as many bits
3850    to the right alone as the divisor has zeros to the right.
3851    E.g. if x is an unsigned 32 bit number:
3852    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3853    */
3854
3855 rtx
3856 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3857                rtx op0, rtx op1, rtx target, int unsignedp)
3858 {
3859   enum machine_mode compute_mode;
3860   rtx tquotient;
3861   rtx quotient = 0, remainder = 0;
3862   rtx last;
3863   int size;
3864   rtx insn, set;
3865   optab optab1, optab2;
3866   int op1_is_constant, op1_is_pow2 = 0;
3867   int max_cost, extra_cost;
3868   static HOST_WIDE_INT last_div_const = 0;
3869   static HOST_WIDE_INT ext_op1;
3870   bool speed = optimize_insn_for_speed_p ();
3871
3872   op1_is_constant = CONST_INT_P (op1);
3873   if (op1_is_constant)
3874     {
3875       ext_op1 = INTVAL (op1);
3876       if (unsignedp)
3877         ext_op1 &= GET_MODE_MASK (mode);
3878       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3879                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3880     }
3881
3882   /*
3883      This is the structure of expand_divmod:
3884
3885      First comes code to fix up the operands so we can perform the operations
3886      correctly and efficiently.
3887
3888      Second comes a switch statement with code specific for each rounding mode.
3889      For some special operands this code emits all RTL for the desired
3890      operation, for other cases, it generates only a quotient and stores it in
3891      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3892      to indicate that it has not done anything.
3893
3894      Last comes code that finishes the operation.  If QUOTIENT is set and
3895      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3896      QUOTIENT is not set, it is computed using trunc rounding.
3897
3898      We try to generate special code for division and remainder when OP1 is a
3899      constant.  If |OP1| = 2**n we can use shifts and some other fast
3900      operations.  For other values of OP1, we compute a carefully selected
3901      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3902      by m.
3903
3904      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3905      half of the product.  Different strategies for generating the product are
3906      implemented in expand_mult_highpart.
3907
3908      If what we actually want is the remainder, we generate that by another
3909      by-constant multiplication and a subtraction.  */
3910
3911   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3912      code below will malfunction if we are, so check here and handle
3913      the special case if so.  */
3914   if (op1 == const1_rtx)
3915     return rem_flag ? const0_rtx : op0;
3916
3917     /* When dividing by -1, we could get an overflow.
3918      negv_optab can handle overflows.  */
3919   if (! unsignedp && op1 == constm1_rtx)
3920     {
3921       if (rem_flag)
3922         return const0_rtx;
3923       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3924                           ? negv_optab : neg_optab, op0, target, 0);
3925     }
3926
3927   if (target
3928       /* Don't use the function value register as a target
3929          since we have to read it as well as write it,
3930          and function-inlining gets confused by this.  */
3931       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3932           /* Don't clobber an operand while doing a multi-step calculation.  */
3933           || ((rem_flag || op1_is_constant)
3934               && (reg_mentioned_p (target, op0)
3935                   || (MEM_P (op0) && MEM_P (target))))
3936           || reg_mentioned_p (target, op1)
3937           || (MEM_P (op1) && MEM_P (target))))
3938     target = 0;
3939
3940   /* Get the mode in which to perform this computation.  Normally it will
3941      be MODE, but sometimes we can't do the desired operation in MODE.
3942      If so, pick a wider mode in which we can do the operation.  Convert
3943      to that mode at the start to avoid repeated conversions.
3944
3945      First see what operations we need.  These depend on the expression
3946      we are evaluating.  (We assume that divxx3 insns exist under the
3947      same conditions that modxx3 insns and that these insns don't normally
3948      fail.  If these assumptions are not correct, we may generate less
3949      efficient code in some cases.)
3950
3951      Then see if we find a mode in which we can open-code that operation
3952      (either a division, modulus, or shift).  Finally, check for the smallest
3953      mode for which we can do the operation with a library call.  */
3954
3955   /* We might want to refine this now that we have division-by-constant
3956      optimization.  Since expand_mult_highpart tries so many variants, it is
3957      not straightforward to generalize this.  Maybe we should make an array
3958      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3959
3960   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3961             ? (unsignedp ? lshr_optab : ashr_optab)
3962             : (unsignedp ? udiv_optab : sdiv_optab));
3963   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3964             ? optab1
3965             : (unsignedp ? udivmod_optab : sdivmod_optab));
3966
3967   for (compute_mode = mode; compute_mode != VOIDmode;
3968        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3969     if (optab_handler (optab1, compute_mode)->insn_code != CODE_FOR_nothing
3970         || optab_handler (optab2, compute_mode)->insn_code != CODE_FOR_nothing)
3971       break;
3972
3973   if (compute_mode == VOIDmode)
3974     for (compute_mode = mode; compute_mode != VOIDmode;
3975          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3976       if (optab_libfunc (optab1, compute_mode)
3977           || optab_libfunc (optab2, compute_mode))
3978         break;
3979
3980   /* If we still couldn't find a mode, use MODE, but expand_binop will
3981      probably die.  */
3982   if (compute_mode == VOIDmode)
3983     compute_mode = mode;
3984
3985   if (target && GET_MODE (target) == compute_mode)
3986     tquotient = target;
3987   else
3988     tquotient = gen_reg_rtx (compute_mode);
3989
3990   size = GET_MODE_BITSIZE (compute_mode);
3991 #if 0
3992   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3993      (mode), and thereby get better code when OP1 is a constant.  Do that
3994      later.  It will require going over all usages of SIZE below.  */
3995   size = GET_MODE_BITSIZE (mode);
3996 #endif
3997
3998   /* Only deduct something for a REM if the last divide done was
3999      for a different constant.   Then set the constant of the last
4000      divide.  */
4001   max_cost = unsignedp ? udiv_cost[speed][compute_mode] : sdiv_cost[speed][compute_mode];
4002   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4003                      && INTVAL (op1) == last_div_const))
4004     max_cost -= mul_cost[speed][compute_mode] + add_cost[speed][compute_mode];
4005
4006   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4007
4008   /* Now convert to the best mode to use.  */
4009   if (compute_mode != mode)
4010     {
4011       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4012       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4013
4014       /* convert_modes may have placed op1 into a register, so we
4015          must recompute the following.  */
4016       op1_is_constant = CONST_INT_P (op1);
4017       op1_is_pow2 = (op1_is_constant
4018                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4019                           || (! unsignedp
4020                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
4021     }
4022
4023   /* If one of the operands is a volatile MEM, copy it into a register.  */
4024
4025   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4026     op0 = force_reg (compute_mode, op0);
4027   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4028     op1 = force_reg (compute_mode, op1);
4029
4030   /* If we need the remainder or if OP1 is constant, we need to
4031      put OP0 in a register in case it has any queued subexpressions.  */
4032   if (rem_flag || op1_is_constant)
4033     op0 = force_reg (compute_mode, op0);
4034
4035   last = get_last_insn ();
4036
4037   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4038   if (unsignedp)
4039     {
4040       if (code == FLOOR_DIV_EXPR)
4041         code = TRUNC_DIV_EXPR;
4042       if (code == FLOOR_MOD_EXPR)
4043         code = TRUNC_MOD_EXPR;
4044       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4045         code = TRUNC_DIV_EXPR;
4046     }
4047
4048   if (op1 != const0_rtx)
4049     switch (code)
4050       {
4051       case TRUNC_MOD_EXPR:
4052       case TRUNC_DIV_EXPR:
4053         if (op1_is_constant)
4054           {
4055             if (unsignedp)
4056               {
4057                 unsigned HOST_WIDE_INT mh;
4058                 int pre_shift, post_shift;
4059                 int dummy;
4060                 rtx ml;
4061                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4062                                             & GET_MODE_MASK (compute_mode));
4063
4064                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4065                   {
4066                     pre_shift = floor_log2 (d);
4067                     if (rem_flag)
4068                       {
4069                         remainder
4070                           = expand_binop (compute_mode, and_optab, op0,
4071                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4072                                           remainder, 1,
4073                                           OPTAB_LIB_WIDEN);
4074                         if (remainder)
4075                           return gen_lowpart (mode, remainder);
4076                       }
4077                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4078                                              build_int_cst (NULL_TREE,
4079                                                             pre_shift),
4080                                              tquotient, 1);
4081                   }
4082                 else if (size <= HOST_BITS_PER_WIDE_INT)
4083                   {
4084                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4085                       {
4086                         /* Most significant bit of divisor is set; emit an scc
4087                            insn.  */
4088                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4089                                                           compute_mode, 1, 1);
4090                       }
4091                     else
4092                       {
4093                         /* Find a suitable multiplier and right shift count
4094                            instead of multiplying with D.  */
4095
4096                         mh = choose_multiplier (d, size, size,
4097                                                 &ml, &post_shift, &dummy);
4098
4099                         /* If the suggested multiplier is more than SIZE bits,
4100                            we can do better for even divisors, using an
4101                            initial right shift.  */
4102                         if (mh != 0 && (d & 1) == 0)
4103                           {
4104                             pre_shift = floor_log2 (d & -d);
4105                             mh = choose_multiplier (d >> pre_shift, size,
4106                                                     size - pre_shift,
4107                                                     &ml, &post_shift, &dummy);
4108                             gcc_assert (!mh);
4109                           }
4110                         else
4111                           pre_shift = 0;
4112
4113                         if (mh != 0)
4114                           {
4115                             rtx t1, t2, t3, t4;
4116
4117                             if (post_shift - 1 >= BITS_PER_WORD)
4118                               goto fail1;
4119
4120                             extra_cost
4121                               = (shift_cost[speed][compute_mode][post_shift - 1]
4122                                  + shift_cost[speed][compute_mode][1]
4123                                  + 2 * add_cost[speed][compute_mode]);
4124                             t1 = expand_mult_highpart (compute_mode, op0, ml,
4125                                                        NULL_RTX, 1,
4126                                                        max_cost - extra_cost);
4127                             if (t1 == 0)
4128                               goto fail1;
4129                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4130                                                                op0, t1),
4131                                                 NULL_RTX);
4132                             t3 = expand_shift
4133                               (RSHIFT_EXPR, compute_mode, t2,
4134                                build_int_cst (NULL_TREE, 1),
4135                                NULL_RTX,1);
4136                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4137                                                               t1, t3),
4138                                                 NULL_RTX);
4139                             quotient = expand_shift
4140                               (RSHIFT_EXPR, compute_mode, t4,
4141                                build_int_cst (NULL_TREE, post_shift - 1),
4142                                tquotient, 1);
4143                           }
4144                         else
4145                           {
4146                             rtx t1, t2;
4147
4148                             if (pre_shift >= BITS_PER_WORD
4149                                 || post_shift >= BITS_PER_WORD)
4150                               goto fail1;
4151
4152                             t1 = expand_shift
4153                               (RSHIFT_EXPR, compute_mode, op0,
4154                                build_int_cst (NULL_TREE, pre_shift),
4155                                NULL_RTX, 1);
4156                             extra_cost
4157                               = (shift_cost[speed][compute_mode][pre_shift]
4158                                  + shift_cost[speed][compute_mode][post_shift]);
4159                             t2 = expand_mult_highpart (compute_mode, t1, ml,
4160                                                        NULL_RTX, 1,
4161                                                        max_cost - extra_cost);
4162                             if (t2 == 0)
4163                               goto fail1;
4164                             quotient = expand_shift
4165                               (RSHIFT_EXPR, compute_mode, t2,
4166                                build_int_cst (NULL_TREE, post_shift),
4167                                tquotient, 1);
4168                           }
4169                       }
4170                   }
4171                 else            /* Too wide mode to use tricky code */
4172                   break;
4173
4174                 insn = get_last_insn ();
4175                 if (insn != last
4176                     && (set = single_set (insn)) != 0
4177                     && SET_DEST (set) == quotient)
4178                   set_unique_reg_note (insn,
4179                                        REG_EQUAL,
4180                                        gen_rtx_UDIV (compute_mode, op0, op1));
4181               }
4182             else                /* TRUNC_DIV, signed */
4183               {
4184                 unsigned HOST_WIDE_INT ml;
4185                 int lgup, post_shift;
4186                 rtx mlr;
4187                 HOST_WIDE_INT d = INTVAL (op1);
4188                 unsigned HOST_WIDE_INT abs_d;
4189
4190                 /* Since d might be INT_MIN, we have to cast to
4191                    unsigned HOST_WIDE_INT before negating to avoid
4192                    undefined signed overflow.  */
4193                 abs_d = (d >= 0
4194                          ? (unsigned HOST_WIDE_INT) d
4195                          : - (unsigned HOST_WIDE_INT) d);
4196
4197                 /* n rem d = n rem -d */
4198                 if (rem_flag && d < 0)
4199                   {
4200                     d = abs_d;
4201                     op1 = gen_int_mode (abs_d, compute_mode);
4202                   }
4203
4204                 if (d == 1)
4205                   quotient = op0;
4206                 else if (d == -1)
4207                   quotient = expand_unop (compute_mode, neg_optab, op0,
4208                                           tquotient, 0);
4209                 else if (HOST_BITS_PER_WIDE_INT >= size
4210                          && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4211                   {
4212                     /* This case is not handled correctly below.  */
4213                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4214                                                 compute_mode, 1, 1);
4215                     if (quotient == 0)
4216                       goto fail1;
4217                   }
4218                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4219                          && (rem_flag ? smod_pow2_cheap[speed][compute_mode]
4220                                       : sdiv_pow2_cheap[speed][compute_mode])
4221                          /* We assume that cheap metric is true if the
4222                             optab has an expander for this mode.  */
4223                          && ((optab_handler ((rem_flag ? smod_optab
4224                                               : sdiv_optab),
4225                                               compute_mode)->insn_code
4226                               != CODE_FOR_nothing)
4227                              || (optab_handler(sdivmod_optab,
4228                                                compute_mode)
4229                                  ->insn_code != CODE_FOR_nothing)))
4230                   ;
4231                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4232                   {
4233                     if (rem_flag)
4234                       {
4235                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4236                         if (remainder)
4237                           return gen_lowpart (mode, remainder);
4238                       }
4239
4240                     if (sdiv_pow2_cheap[speed][compute_mode]
4241                         && ((optab_handler (sdiv_optab, compute_mode)->insn_code
4242                              != CODE_FOR_nothing)
4243                             || (optab_handler (sdivmod_optab, compute_mode)->insn_code
4244                                 != CODE_FOR_nothing)))
4245                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4246                                                 compute_mode, op0,
4247                                                 gen_int_mode (abs_d,
4248                                                               compute_mode),
4249                                                 NULL_RTX, 0);
4250                     else
4251                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4252
4253                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4254                        negate the quotient.  */
4255                     if (d < 0)
4256                       {
4257                         insn = get_last_insn ();
4258                         if (insn != last
4259                             && (set = single_set (insn)) != 0
4260                             && SET_DEST (set) == quotient
4261                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4262                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4263                           set_unique_reg_note (insn,
4264                                                REG_EQUAL,
4265                                                gen_rtx_DIV (compute_mode,
4266                                                             op0,
4267                                                             GEN_INT
4268                                                             (trunc_int_for_mode
4269                                                              (abs_d,
4270                                                               compute_mode))));
4271
4272                         quotient = expand_unop (compute_mode, neg_optab,
4273                                                 quotient, quotient, 0);
4274                       }
4275                   }
4276                 else if (size <= HOST_BITS_PER_WIDE_INT)
4277                   {
4278                     choose_multiplier (abs_d, size, size - 1,
4279                                        &mlr, &post_shift, &lgup);
4280                     ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
4281                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4282                       {
4283                         rtx t1, t2, t3;
4284
4285                         if (post_shift >= BITS_PER_WORD
4286                             || size - 1 >= BITS_PER_WORD)
4287                           goto fail1;
4288
4289                         extra_cost = (shift_cost[speed][compute_mode][post_shift]
4290                                       + shift_cost[speed][compute_mode][size - 1]
4291                                       + add_cost[speed][compute_mode]);
4292                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4293                                                    NULL_RTX, 0,
4294                                                    max_cost - extra_cost);
4295                         if (t1 == 0)
4296                           goto fail1;
4297                         t2 = expand_shift
4298                           (RSHIFT_EXPR, compute_mode, t1,
4299                            build_int_cst (NULL_TREE, post_shift),
4300                            NULL_RTX, 0);
4301                         t3 = expand_shift
4302                           (RSHIFT_EXPR, compute_mode, op0,
4303                            build_int_cst (NULL_TREE, size - 1),
4304                            NULL_RTX, 0);
4305                         if (d < 0)
4306                           quotient
4307                             = force_operand (gen_rtx_MINUS (compute_mode,
4308                                                             t3, t2),
4309                                              tquotient);
4310                         else
4311                           quotient
4312                             = force_operand (gen_rtx_MINUS (compute_mode,
4313                                                             t2, t3),
4314                                              tquotient);
4315                       }
4316                     else
4317                       {
4318                         rtx t1, t2, t3, t4;
4319
4320                         if (post_shift >= BITS_PER_WORD
4321                             || size - 1 >= BITS_PER_WORD)
4322                           goto fail1;
4323
4324                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4325                         mlr = gen_int_mode (ml, compute_mode);
4326                         extra_cost = (shift_cost[speed][compute_mode][post_shift]
4327                                       + shift_cost[speed][compute_mode][size - 1]
4328                                       + 2 * add_cost[speed][compute_mode]);
4329                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4330                                                    NULL_RTX, 0,
4331                                                    max_cost - extra_cost);
4332                         if (t1 == 0)
4333                           goto fail1;
4334                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4335                                                           t1, op0),
4336                                             NULL_RTX);
4337                         t3 = expand_shift
4338                           (RSHIFT_EXPR, compute_mode, t2,
4339                            build_int_cst (NULL_TREE, post_shift),
4340                            NULL_RTX, 0);
4341                         t4 = expand_shift
4342                           (RSHIFT_EXPR, compute_mode, op0,
4343                            build_int_cst (NULL_TREE, size - 1),
4344                            NULL_RTX, 0);
4345                         if (d < 0)
4346                           quotient
4347                             = force_operand (gen_rtx_MINUS (compute_mode,
4348                                                             t4, t3),
4349                                              tquotient);
4350                         else
4351                           quotient
4352                             = force_operand (gen_rtx_MINUS (compute_mode,
4353                                                             t3, t4),
4354                                              tquotient);
4355                       }
4356                   }
4357                 else            /* Too wide mode to use tricky code */
4358                   break;
4359
4360                 insn = get_last_insn ();
4361                 if (insn != last
4362                     && (set = single_set (insn)) != 0
4363                     && SET_DEST (set) == quotient)
4364                   set_unique_reg_note (insn,
4365                                        REG_EQUAL,
4366                                        gen_rtx_DIV (compute_mode, op0, op1));
4367               }
4368             break;
4369           }
4370       fail1:
4371         delete_insns_since (last);
4372         break;
4373
4374       case FLOOR_DIV_EXPR:
4375       case FLOOR_MOD_EXPR:
4376       /* We will come here only for signed operations.  */
4377         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4378           {
4379             unsigned HOST_WIDE_INT mh;
4380             int pre_shift, lgup, post_shift;
4381             HOST_WIDE_INT d = INTVAL (op1);
4382             rtx ml;
4383
4384             if (d > 0)
4385               {
4386                 /* We could just as easily deal with negative constants here,
4387                    but it does not seem worth the trouble for GCC 2.6.  */
4388                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4389                   {
4390                     pre_shift = floor_log2 (d);
4391                     if (rem_flag)
4392                       {
4393                         remainder = expand_binop (compute_mode, and_optab, op0,
4394                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4395                                                   remainder, 0, OPTAB_LIB_WIDEN);
4396                         if (remainder)
4397                           return gen_lowpart (mode, remainder);
4398                       }
4399                     quotient = expand_shift
4400                       (RSHIFT_EXPR, compute_mode, op0,
4401                        build_int_cst (NULL_TREE, pre_shift),
4402                        tquotient, 0);
4403                   }
4404                 else
4405                   {
4406                     rtx t1, t2, t3, t4;
4407
4408                     mh = choose_multiplier (d, size, size - 1,
4409                                             &ml, &post_shift, &lgup);
4410                     gcc_assert (!mh);
4411
4412                     if (post_shift < BITS_PER_WORD
4413                         && size - 1 < BITS_PER_WORD)
4414                       {
4415                         t1 = expand_shift
4416                           (RSHIFT_EXPR, compute_mode, op0,
4417                            build_int_cst (NULL_TREE, size - 1),
4418                            NULL_RTX, 0);
4419                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4420                                            NULL_RTX, 0, OPTAB_WIDEN);
4421                         extra_cost = (shift_cost[speed][compute_mode][post_shift]
4422                                       + shift_cost[speed][compute_mode][size - 1]
4423                                       + 2 * add_cost[speed][compute_mode]);
4424                         t3 = expand_mult_highpart (compute_mode, t2, ml,
4425                                                    NULL_RTX, 1,
4426                                                    max_cost - extra_cost);
4427                         if (t3 != 0)
4428                           {
4429                             t4 = expand_shift
4430                               (RSHIFT_EXPR, compute_mode, t3,
4431                                build_int_cst (NULL_TREE, post_shift),
4432                                NULL_RTX, 1);
4433                             quotient = expand_binop (compute_mode, xor_optab,
4434                                                      t4, t1, tquotient, 0,
4435                                                      OPTAB_WIDEN);
4436                           }
4437                       }
4438                   }
4439               }
4440             else
4441               {
4442                 rtx nsign, t1, t2, t3, t4;
4443                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4444                                                   op0, constm1_rtx), NULL_RTX);
4445                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4446                                    0, OPTAB_WIDEN);
4447                 nsign = expand_shift
4448                   (RSHIFT_EXPR, compute_mode, t2,
4449                    build_int_cst (NULL_TREE, size - 1),
4450                    NULL_RTX, 0);
4451                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4452                                     NULL_RTX);
4453                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4454                                     NULL_RTX, 0);
4455                 if (t4)
4456                   {
4457                     rtx t5;
4458                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4459                                       NULL_RTX, 0);
4460                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4461                                                             t4, t5),
4462                                               tquotient);
4463                   }
4464               }
4465           }
4466
4467         if (quotient != 0)
4468           break;
4469         delete_insns_since (last);
4470
4471         /* Try using an instruction that produces both the quotient and
4472            remainder, using truncation.  We can easily compensate the quotient
4473            or remainder to get floor rounding, once we have the remainder.
4474            Notice that we compute also the final remainder value here,
4475            and return the result right away.  */
4476         if (target == 0 || GET_MODE (target) != compute_mode)
4477           target = gen_reg_rtx (compute_mode);
4478
4479         if (rem_flag)
4480           {
4481             remainder
4482               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4483             quotient = gen_reg_rtx (compute_mode);
4484           }
4485         else
4486           {
4487             quotient
4488               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4489             remainder = gen_reg_rtx (compute_mode);
4490           }
4491
4492         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4493                                  quotient, remainder, 0))
4494           {
4495             /* This could be computed with a branch-less sequence.
4496                Save that for later.  */
4497             rtx tem;
4498             rtx label = gen_label_rtx ();
4499             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4500             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4501                                 NULL_RTX, 0, OPTAB_WIDEN);
4502             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4503             expand_dec (quotient, const1_rtx);
4504             expand_inc (remainder, op1);
4505             emit_label (label);
4506             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4507           }
4508
4509         /* No luck with division elimination or divmod.  Have to do it
4510            by conditionally adjusting op0 *and* the result.  */
4511         {
4512           rtx label1, label2, label3, label4, label5;
4513           rtx adjusted_op0;
4514           rtx tem;
4515
4516           quotient = gen_reg_rtx (compute_mode);
4517           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4518           label1 = gen_label_rtx ();
4519           label2 = gen_label_rtx ();
4520           label3 = gen_label_rtx ();
4521           label4 = gen_label_rtx ();
4522           label5 = gen_label_rtx ();
4523           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4524           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4525           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4526                               quotient, 0, OPTAB_LIB_WIDEN);
4527           if (tem != quotient)
4528             emit_move_insn (quotient, tem);
4529           emit_jump_insn (gen_jump (label5));
4530           emit_barrier ();
4531           emit_label (label1);
4532           expand_inc (adjusted_op0, const1_rtx);
4533           emit_jump_insn (gen_jump (label4));
4534           emit_barrier ();
4535           emit_label (label2);
4536           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4537           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4538                               quotient, 0, OPTAB_LIB_WIDEN);
4539           if (tem != quotient)
4540             emit_move_insn (quotient, tem);
4541           emit_jump_insn (gen_jump (label5));
4542           emit_barrier ();
4543           emit_label (label3);
4544           expand_dec (adjusted_op0, const1_rtx);
4545           emit_label (label4);
4546           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4547                               quotient, 0, OPTAB_LIB_WIDEN);
4548           if (tem != quotient)
4549             emit_move_insn (quotient, tem);
4550           expand_dec (quotient, const1_rtx);
4551           emit_label (label5);
4552         }
4553         break;
4554
4555       case CEIL_DIV_EXPR:
4556       case CEIL_MOD_EXPR:
4557         if (unsignedp)
4558           {
4559             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4560               {
4561                 rtx t1, t2, t3;
4562                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4563                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4564                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4565                                    tquotient, 1);
4566                 t2 = expand_binop (compute_mode, and_optab, op0,
4567                                    GEN_INT (d - 1),
4568                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4569                 t3 = gen_reg_rtx (compute_mode);
4570                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4571                                       compute_mode, 1, 1);
4572                 if (t3 == 0)
4573                   {
4574                     rtx lab;
4575                     lab = gen_label_rtx ();
4576                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4577                     expand_inc (t1, const1_rtx);
4578                     emit_label (lab);
4579                     quotient = t1;
4580                   }
4581                 else
4582                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4583                                                           t1, t3),
4584                                             tquotient);
4585                 break;
4586               }
4587
4588             /* Try using an instruction that produces both the quotient and
4589                remainder, using truncation.  We can easily compensate the
4590                quotient or remainder to get ceiling rounding, once we have the
4591                remainder.  Notice that we compute also the final remainder
4592                value here, and return the result right away.  */
4593             if (target == 0 || GET_MODE (target) != compute_mode)
4594               target = gen_reg_rtx (compute_mode);
4595
4596             if (rem_flag)
4597               {
4598                 remainder = (REG_P (target)
4599                              ? target : gen_reg_rtx (compute_mode));
4600                 quotient = gen_reg_rtx (compute_mode);
4601               }
4602             else
4603               {
4604                 quotient = (REG_P (target)
4605                             ? target : gen_reg_rtx (compute_mode));
4606                 remainder = gen_reg_rtx (compute_mode);
4607               }
4608
4609             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4610                                      remainder, 1))
4611               {
4612                 /* This could be computed with a branch-less sequence.
4613                    Save that for later.  */
4614                 rtx label = gen_label_rtx ();
4615                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4616                                  compute_mode, label);
4617                 expand_inc (quotient, const1_rtx);
4618                 expand_dec (remainder, op1);
4619                 emit_label (label);
4620                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4621               }
4622
4623             /* No luck with division elimination or divmod.  Have to do it
4624                by conditionally adjusting op0 *and* the result.  */
4625             {
4626               rtx label1, label2;
4627               rtx adjusted_op0, tem;
4628
4629               quotient = gen_reg_rtx (compute_mode);
4630               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4631               label1 = gen_label_rtx ();
4632               label2 = gen_label_rtx ();
4633               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4634                                compute_mode, label1);
4635               emit_move_insn  (quotient, const0_rtx);
4636               emit_jump_insn (gen_jump (label2));
4637               emit_barrier ();
4638               emit_label (label1);
4639               expand_dec (adjusted_op0, const1_rtx);
4640               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4641                                   quotient, 1, OPTAB_LIB_WIDEN);
4642               if (tem != quotient)
4643                 emit_move_insn (quotient, tem);
4644               expand_inc (quotient, const1_rtx);
4645               emit_label (label2);
4646             }
4647           }
4648         else /* signed */
4649           {
4650             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4651                 && INTVAL (op1) >= 0)
4652               {
4653                 /* This is extremely similar to the code for the unsigned case
4654                    above.  For 2.7 we should merge these variants, but for
4655                    2.6.1 I don't want to touch the code for unsigned since that
4656                    get used in C.  The signed case will only be used by other
4657                    languages (Ada).  */
4658
4659                 rtx t1, t2, t3;
4660                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4661                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4662                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4663                                    tquotient, 0);
4664                 t2 = expand_binop (compute_mode, and_optab, op0,
4665                                    GEN_INT (d - 1),
4666                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4667                 t3 = gen_reg_rtx (compute_mode);
4668                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4669                                       compute_mode, 1, 1);
4670                 if (t3 == 0)
4671                   {
4672                     rtx lab;
4673                     lab = gen_label_rtx ();
4674                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4675                     expand_inc (t1, const1_rtx);
4676                     emit_label (lab);
4677                     quotient = t1;
4678                   }
4679                 else
4680                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4681                                                           t1, t3),
4682                                             tquotient);
4683                 break;
4684               }
4685
4686             /* Try using an instruction that produces both the quotient and
4687                remainder, using truncation.  We can easily compensate the
4688                quotient or remainder to get ceiling rounding, once we have the
4689                remainder.  Notice that we compute also the final remainder
4690                value here, and return the result right away.  */
4691             if (target == 0 || GET_MODE (target) != compute_mode)
4692               target = gen_reg_rtx (compute_mode);
4693             if (rem_flag)
4694               {
4695                 remainder= (REG_P (target)
4696                             ? target : gen_reg_rtx (compute_mode));
4697                 quotient = gen_reg_rtx (compute_mode);
4698               }
4699             else
4700               {
4701                 quotient = (REG_P (target)
4702                             ? target : gen_reg_rtx (compute_mode));
4703                 remainder = gen_reg_rtx (compute_mode);
4704               }
4705
4706             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4707                                      remainder, 0))
4708               {
4709                 /* This could be computed with a branch-less sequence.
4710                    Save that for later.  */
4711                 rtx tem;
4712                 rtx label = gen_label_rtx ();
4713                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4714                                  compute_mode, label);
4715                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4716                                     NULL_RTX, 0, OPTAB_WIDEN);
4717                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4718                 expand_inc (quotient, const1_rtx);
4719                 expand_dec (remainder, op1);
4720                 emit_label (label);
4721                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4722               }
4723
4724             /* No luck with division elimination or divmod.  Have to do it
4725                by conditionally adjusting op0 *and* the result.  */
4726             {
4727               rtx label1, label2, label3, label4, label5;
4728               rtx adjusted_op0;
4729               rtx tem;
4730
4731               quotient = gen_reg_rtx (compute_mode);
4732               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4733               label1 = gen_label_rtx ();
4734               label2 = gen_label_rtx ();
4735               label3 = gen_label_rtx ();
4736               label4 = gen_label_rtx ();
4737               label5 = gen_label_rtx ();
4738               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4739               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4740                                compute_mode, label1);
4741               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4742                                   quotient, 0, OPTAB_LIB_WIDEN);
4743               if (tem != quotient)
4744                 emit_move_insn (quotient, tem);
4745               emit_jump_insn (gen_jump (label5));
4746               emit_barrier ();
4747               emit_label (label1);
4748               expand_dec (adjusted_op0, const1_rtx);
4749               emit_jump_insn (gen_jump (label4));
4750               emit_barrier ();
4751               emit_label (label2);
4752               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4753                                compute_mode, label3);
4754               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4755                                   quotient, 0, OPTAB_LIB_WIDEN);
4756               if (tem != quotient)
4757                 emit_move_insn (quotient, tem);
4758               emit_jump_insn (gen_jump (label5));
4759               emit_barrier ();
4760               emit_label (label3);
4761               expand_inc (adjusted_op0, const1_rtx);
4762               emit_label (label4);
4763               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4764                                   quotient, 0, OPTAB_LIB_WIDEN);
4765               if (tem != quotient)
4766                 emit_move_insn (quotient, tem);
4767               expand_inc (quotient, const1_rtx);
4768               emit_label (label5);
4769             }
4770           }
4771         break;
4772
4773       case EXACT_DIV_EXPR:
4774         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4775           {
4776             HOST_WIDE_INT d = INTVAL (op1);
4777             unsigned HOST_WIDE_INT ml;
4778             int pre_shift;
4779             rtx t1;
4780
4781             pre_shift = floor_log2 (d & -d);
4782             ml = invert_mod2n (d >> pre_shift, size);
4783             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4784                                build_int_cst (NULL_TREE, pre_shift),
4785                                NULL_RTX, unsignedp);
4786             quotient = expand_mult (compute_mode, t1,
4787                                     gen_int_mode (ml, compute_mode),
4788                                     NULL_RTX, 1);
4789
4790             insn = get_last_insn ();
4791             set_unique_reg_note (insn,
4792                                  REG_EQUAL,
4793                                  gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4794                                                  compute_mode,
4795                                                  op0, op1));
4796           }
4797         break;
4798
4799       case ROUND_DIV_EXPR:
4800       case ROUND_MOD_EXPR:
4801         if (unsignedp)
4802           {
4803             rtx tem;
4804             rtx label;
4805             label = gen_label_rtx ();
4806             quotient = gen_reg_rtx (compute_mode);
4807             remainder = gen_reg_rtx (compute_mode);
4808             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4809               {
4810                 rtx tem;
4811                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4812                                          quotient, 1, OPTAB_LIB_WIDEN);
4813                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4814                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4815                                           remainder, 1, OPTAB_LIB_WIDEN);
4816               }
4817             tem = plus_constant (op1, -1);
4818             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4819                                 build_int_cst (NULL_TREE, 1),
4820                                 NULL_RTX, 1);
4821             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4822             expand_inc (quotient, const1_rtx);
4823             expand_dec (remainder, op1);
4824             emit_label (label);
4825           }
4826         else
4827           {
4828             rtx abs_rem, abs_op1, tem, mask;
4829             rtx label;
4830             label = gen_label_rtx ();
4831             quotient = gen_reg_rtx (compute_mode);
4832             remainder = gen_reg_rtx (compute_mode);
4833             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4834               {
4835                 rtx tem;
4836                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4837                                          quotient, 0, OPTAB_LIB_WIDEN);
4838                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4839                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4840                                           remainder, 0, OPTAB_LIB_WIDEN);
4841               }
4842             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4843             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4844             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4845                                 build_int_cst (NULL_TREE, 1),
4846                                 NULL_RTX, 1);
4847             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4848             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4849                                 NULL_RTX, 0, OPTAB_WIDEN);
4850             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4851                                  build_int_cst (NULL_TREE, size - 1),
4852                                  NULL_RTX, 0);
4853             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4854                                 NULL_RTX, 0, OPTAB_WIDEN);
4855             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4856                                 NULL_RTX, 0, OPTAB_WIDEN);
4857             expand_inc (quotient, tem);
4858             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4859                                 NULL_RTX, 0, OPTAB_WIDEN);
4860             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4861                                 NULL_RTX, 0, OPTAB_WIDEN);
4862             expand_dec (remainder, tem);
4863             emit_label (label);
4864           }
4865         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4866
4867       default:
4868         gcc_unreachable ();
4869       }
4870
4871   if (quotient == 0)
4872     {
4873       if (target && GET_MODE (target) != compute_mode)
4874         target = 0;
4875
4876       if (rem_flag)
4877         {
4878           /* Try to produce the remainder without producing the quotient.
4879              If we seem to have a divmod pattern that does not require widening,
4880              don't try widening here.  We should really have a WIDEN argument
4881              to expand_twoval_binop, since what we'd really like to do here is
4882              1) try a mod insn in compute_mode
4883              2) try a divmod insn in compute_mode
4884              3) try a div insn in compute_mode and multiply-subtract to get
4885                 remainder
4886              4) try the same things with widening allowed.  */
4887           remainder
4888             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4889                                  op0, op1, target,
4890                                  unsignedp,
4891                                  ((optab_handler (optab2, compute_mode)->insn_code
4892                                    != CODE_FOR_nothing)
4893                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4894           if (remainder == 0)
4895             {
4896               /* No luck there.  Can we do remainder and divide at once
4897                  without a library call?  */
4898               remainder = gen_reg_rtx (compute_mode);
4899               if (! expand_twoval_binop ((unsignedp
4900                                           ? udivmod_optab
4901                                           : sdivmod_optab),
4902                                          op0, op1,
4903                                          NULL_RTX, remainder, unsignedp))
4904                 remainder = 0;
4905             }
4906
4907           if (remainder)
4908             return gen_lowpart (mode, remainder);
4909         }
4910
4911       /* Produce the quotient.  Try a quotient insn, but not a library call.
4912          If we have a divmod in this mode, use it in preference to widening
4913          the div (for this test we assume it will not fail). Note that optab2
4914          is set to the one of the two optabs that the call below will use.  */
4915       quotient
4916         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4917                              op0, op1, rem_flag ? NULL_RTX : target,
4918                              unsignedp,
4919                              ((optab_handler (optab2, compute_mode)->insn_code
4920                                != CODE_FOR_nothing)
4921                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4922
4923       if (quotient == 0)
4924         {
4925           /* No luck there.  Try a quotient-and-remainder insn,
4926              keeping the quotient alone.  */
4927           quotient = gen_reg_rtx (compute_mode);
4928           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4929                                      op0, op1,
4930                                      quotient, NULL_RTX, unsignedp))
4931             {
4932               quotient = 0;
4933               if (! rem_flag)
4934                 /* Still no luck.  If we are not computing the remainder,
4935                    use a library call for the quotient.  */
4936                 quotient = sign_expand_binop (compute_mode,
4937                                               udiv_optab, sdiv_optab,
4938                                               op0, op1, target,
4939                                               unsignedp, OPTAB_LIB_WIDEN);
4940             }
4941         }
4942     }
4943
4944   if (rem_flag)
4945     {
4946       if (target && GET_MODE (target) != compute_mode)
4947         target = 0;
4948
4949       if (quotient == 0)
4950         {
4951           /* No divide instruction either.  Use library for remainder.  */
4952           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4953                                          op0, op1, target,
4954                                          unsignedp, OPTAB_LIB_WIDEN);
4955           /* No remainder function.  Try a quotient-and-remainder
4956              function, keeping the remainder.  */
4957           if (!remainder)
4958             {
4959               remainder = gen_reg_rtx (compute_mode);
4960               if (!expand_twoval_binop_libfunc
4961                   (unsignedp ? udivmod_optab : sdivmod_optab,
4962                    op0, op1,
4963                    NULL_RTX, remainder,
4964                    unsignedp ? UMOD : MOD))
4965                 remainder = NULL_RTX;
4966             }
4967         }
4968       else
4969         {
4970           /* We divided.  Now finish doing X - Y * (X / Y).  */
4971           remainder = expand_mult (compute_mode, quotient, op1,
4972                                    NULL_RTX, unsignedp);
4973           remainder = expand_binop (compute_mode, sub_optab, op0,
4974                                     remainder, target, unsignedp,
4975                                     OPTAB_LIB_WIDEN);
4976         }
4977     }
4978
4979   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4980 }
4981 \f
4982 /* Return a tree node with data type TYPE, describing the value of X.
4983    Usually this is an VAR_DECL, if there is no obvious better choice.
4984    X may be an expression, however we only support those expressions
4985    generated by loop.c.  */
4986
4987 tree
4988 make_tree (tree type, rtx x)
4989 {
4990   tree t;
4991
4992   switch (GET_CODE (x))
4993     {
4994     case CONST_INT:
4995       {
4996         HOST_WIDE_INT hi = 0;
4997
4998         if (INTVAL (x) < 0
4999             && !(TYPE_UNSIGNED (type)
5000                  && (GET_MODE_BITSIZE (TYPE_MODE (type))
5001                      < HOST_BITS_PER_WIDE_INT)))
5002           hi = -1;
5003
5004         t = build_int_cst_wide (type, INTVAL (x), hi);
5005
5006         return t;
5007       }
5008
5009     case CONST_DOUBLE:
5010       if (GET_MODE (x) == VOIDmode)
5011         t = build_int_cst_wide (type,
5012                                 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
5013       else
5014         {
5015           REAL_VALUE_TYPE d;
5016
5017           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
5018           t = build_real (type, d);
5019         }
5020
5021       return t;
5022
5023     case CONST_VECTOR:
5024       {
5025         int units = CONST_VECTOR_NUNITS (x);
5026         tree itype = TREE_TYPE (type);
5027         tree t = NULL_TREE;
5028         int i;
5029
5030
5031         /* Build a tree with vector elements.  */
5032         for (i = units - 1; i >= 0; --i)
5033           {
5034             rtx elt = CONST_VECTOR_ELT (x, i);
5035             t = tree_cons (NULL_TREE, make_tree (itype, elt), t);
5036           }
5037
5038         return build_vector (type, t);
5039       }
5040
5041     case PLUS:
5042       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5043                           make_tree (type, XEXP (x, 1)));
5044
5045     case MINUS:
5046       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5047                           make_tree (type, XEXP (x, 1)));
5048
5049     case NEG:
5050       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5051
5052     case MULT:
5053       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5054                           make_tree (type, XEXP (x, 1)));
5055
5056     case ASHIFT:
5057       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5058                           make_tree (type, XEXP (x, 1)));
5059
5060     case LSHIFTRT:
5061       t = unsigned_type_for (type);
5062       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5063                                          make_tree (t, XEXP (x, 0)),
5064                                          make_tree (type, XEXP (x, 1))));
5065
5066     case ASHIFTRT:
5067       t = signed_type_for (type);
5068       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5069                                          make_tree (t, XEXP (x, 0)),
5070                                          make_tree (type, XEXP (x, 1))));
5071
5072     case DIV:
5073       if (TREE_CODE (type) != REAL_TYPE)
5074         t = signed_type_for (type);
5075       else
5076         t = type;
5077
5078       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5079                                          make_tree (t, XEXP (x, 0)),
5080                                          make_tree (t, XEXP (x, 1))));
5081     case UDIV:
5082       t = unsigned_type_for (type);
5083       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5084                                          make_tree (t, XEXP (x, 0)),
5085                                          make_tree (t, XEXP (x, 1))));
5086
5087     case SIGN_EXTEND:
5088     case ZERO_EXTEND:
5089       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5090                                           GET_CODE (x) == ZERO_EXTEND);
5091       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5092
5093     case CONST:
5094       return make_tree (type, XEXP (x, 0));
5095
5096     case SYMBOL_REF:
5097       t = SYMBOL_REF_DECL (x);
5098       if (t)
5099         return fold_convert (type, build_fold_addr_expr (t));
5100       /* else fall through.  */
5101
5102     default:
5103       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5104
5105       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5106          address mode to pointer mode.  */
5107       if (POINTER_TYPE_P (type))
5108         x = convert_memory_address_addr_space
5109               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5110
5111       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5112          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5113       t->decl_with_rtl.rtl = x;
5114
5115       return t;
5116     }
5117 }
5118 \f
5119 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5120    and returning TARGET.
5121
5122    If TARGET is 0, a pseudo-register or constant is returned.  */
5123
5124 rtx
5125 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5126 {
5127   rtx tem = 0;
5128
5129   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5130     tem = simplify_binary_operation (AND, mode, op0, op1);
5131   if (tem == 0)
5132     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5133
5134   if (target == 0)
5135     target = tem;
5136   else if (tem != target)
5137     emit_move_insn (target, tem);
5138   return target;
5139 }
5140
5141 /* Helper function for emit_store_flag.  */
5142 static rtx
5143 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5144              enum machine_mode mode, enum machine_mode compare_mode,
5145              int unsignedp, rtx x, rtx y, int normalizep,
5146              enum machine_mode target_mode)
5147 {
5148   rtx op0, last, comparison, subtarget, pattern;
5149   enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode;
5150
5151   last = get_last_insn ();
5152   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5153   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5154   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5155   if (!x || !y
5156       || !insn_data[icode].operand[2].predicate
5157           (x, insn_data[icode].operand[2].mode)
5158       || !insn_data[icode].operand[3].predicate
5159           (y, insn_data[icode].operand[3].mode)
5160       || !insn_data[icode].operand[1].predicate (comparison, VOIDmode))
5161     {
5162       delete_insns_since (last);
5163       return NULL_RTX;
5164     }
5165
5166   if (target_mode == VOIDmode)
5167     target_mode = result_mode;
5168   if (!target)
5169     target = gen_reg_rtx (target_mode);
5170
5171   if (optimize
5172       || !(insn_data[(int) icode].operand[0].predicate (target, result_mode)))
5173     subtarget = gen_reg_rtx (result_mode);
5174   else
5175     subtarget = target;
5176
5177   pattern = GEN_FCN (icode) (subtarget, comparison, x, y);
5178   if (!pattern)
5179     return NULL_RTX;
5180   emit_insn (pattern);
5181
5182   /* If we are converting to a wider mode, first convert to
5183      TARGET_MODE, then normalize.  This produces better combining
5184      opportunities on machines that have a SIGN_EXTRACT when we are
5185      testing a single bit.  This mostly benefits the 68k.
5186
5187      If STORE_FLAG_VALUE does not have the sign bit set when
5188      interpreted in MODE, we can do this conversion as unsigned, which
5189      is usually more efficient.  */
5190   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5191     {
5192       convert_move (target, subtarget,
5193                     (GET_MODE_BITSIZE (result_mode) <= HOST_BITS_PER_WIDE_INT)
5194                     && 0 == (STORE_FLAG_VALUE
5195                              & ((HOST_WIDE_INT) 1
5196                                 << (GET_MODE_BITSIZE (result_mode) -1))));
5197       op0 = target;
5198       result_mode = target_mode;
5199     }
5200   else
5201     op0 = subtarget;
5202
5203   /* If we want to keep subexpressions around, don't reuse our last
5204      target.  */
5205   if (optimize)
5206     subtarget = 0;
5207
5208   /* Now normalize to the proper value in MODE.  Sometimes we don't
5209      have to do anything.  */
5210   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5211     ;
5212   /* STORE_FLAG_VALUE might be the most negative number, so write
5213      the comparison this way to avoid a compiler-time warning.  */
5214   else if (- normalizep == STORE_FLAG_VALUE)
5215     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5216
5217   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5218      it hard to use a value of just the sign bit due to ANSI integer
5219      constant typing rules.  */
5220   else if (GET_MODE_BITSIZE (result_mode) <= HOST_BITS_PER_WIDE_INT
5221            && (STORE_FLAG_VALUE
5222                & ((HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (result_mode) - 1))))
5223     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5224                         size_int (GET_MODE_BITSIZE (result_mode) - 1), subtarget,
5225                         normalizep == 1);
5226   else
5227     {
5228       gcc_assert (STORE_FLAG_VALUE & 1);
5229
5230       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5231       if (normalizep == -1)
5232         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5233     }
5234
5235   /* If we were converting to a smaller mode, do the conversion now.  */
5236   if (target_mode != result_mode)
5237     {
5238       convert_move (target, op0, 0);
5239       return target;
5240     }
5241   else
5242     return op0;
5243 }
5244
5245
5246 /* A subroutine of emit_store_flag only including "tricks" that do not
5247    need a recursive call.  These are kept separate to avoid infinite
5248    loops.  */
5249
5250 static rtx
5251 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5252                    enum machine_mode mode, int unsignedp, int normalizep,
5253                    enum machine_mode target_mode)
5254 {
5255   rtx subtarget;
5256   enum insn_code icode;
5257   enum machine_mode compare_mode;
5258   enum mode_class mclass;
5259   enum rtx_code scode;
5260   rtx tem;
5261
5262   if (unsignedp)
5263     code = unsigned_condition (code);
5264   scode = swap_condition (code);
5265
5266   /* If one operand is constant, make it the second one.  Only do this
5267      if the other operand is not constant as well.  */
5268
5269   if (swap_commutative_operands_p (op0, op1))
5270     {
5271       tem = op0;
5272       op0 = op1;
5273       op1 = tem;
5274       code = swap_condition (code);
5275     }
5276
5277   if (mode == VOIDmode)
5278     mode = GET_MODE (op0);
5279
5280   /* For some comparisons with 1 and -1, we can convert this to
5281      comparisons with zero.  This will often produce more opportunities for
5282      store-flag insns.  */
5283
5284   switch (code)
5285     {
5286     case LT:
5287       if (op1 == const1_rtx)
5288         op1 = const0_rtx, code = LE;
5289       break;
5290     case LE:
5291       if (op1 == constm1_rtx)
5292         op1 = const0_rtx, code = LT;
5293       break;
5294     case GE:
5295       if (op1 == const1_rtx)
5296         op1 = const0_rtx, code = GT;
5297       break;
5298     case GT:
5299       if (op1 == constm1_rtx)
5300         op1 = const0_rtx, code = GE;
5301       break;
5302     case GEU:
5303       if (op1 == const1_rtx)
5304         op1 = const0_rtx, code = NE;
5305       break;
5306     case LTU:
5307       if (op1 == const1_rtx)
5308         op1 = const0_rtx, code = EQ;
5309       break;
5310     default:
5311       break;
5312     }
5313
5314   /* If we are comparing a double-word integer with zero or -1, we can
5315      convert the comparison into one involving a single word.  */
5316   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5317       && GET_MODE_CLASS (mode) == MODE_INT
5318       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5319     {
5320       if ((code == EQ || code == NE)
5321           && (op1 == const0_rtx || op1 == constm1_rtx))
5322         {
5323           rtx op00, op01;
5324
5325           /* Do a logical OR or AND of the two words and compare the
5326              result.  */
5327           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5328           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5329           tem = expand_binop (word_mode,
5330                               op1 == const0_rtx ? ior_optab : and_optab,
5331                               op00, op01, NULL_RTX, unsignedp,
5332                               OPTAB_DIRECT);
5333
5334           if (tem != 0)
5335             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5336                                    unsignedp, normalizep);
5337         }
5338       else if ((code == LT || code == GE) && op1 == const0_rtx)
5339         {
5340           rtx op0h;
5341
5342           /* If testing the sign bit, can just test on high word.  */
5343           op0h = simplify_gen_subreg (word_mode, op0, mode,
5344                                       subreg_highpart_offset (word_mode,
5345                                                               mode));
5346           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5347                                  unsignedp, normalizep);
5348         }
5349       else
5350         tem = NULL_RTX;
5351
5352       if (tem)
5353         {
5354           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5355             return tem;
5356           if (!target)
5357             target = gen_reg_rtx (target_mode);
5358
5359           convert_move (target, tem,
5360                         0 == ((normalizep ? normalizep : STORE_FLAG_VALUE)
5361                               & ((HOST_WIDE_INT) 1
5362                                  << (GET_MODE_BITSIZE (word_mode) -1))));
5363           return target;
5364         }
5365     }
5366
5367   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5368      complement of A (for GE) and shifting the sign bit to the low bit.  */
5369   if (op1 == const0_rtx && (code == LT || code == GE)
5370       && GET_MODE_CLASS (mode) == MODE_INT
5371       && (normalizep || STORE_FLAG_VALUE == 1
5372           || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5373               && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5374                   == ((unsigned HOST_WIDE_INT) 1
5375                       << (GET_MODE_BITSIZE (mode) - 1))))))
5376     {
5377       subtarget = target;
5378
5379       if (!target)
5380         target_mode = mode;
5381
5382       /* If the result is to be wider than OP0, it is best to convert it
5383          first.  If it is to be narrower, it is *incorrect* to convert it
5384          first.  */
5385       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5386         {
5387           op0 = convert_modes (target_mode, mode, op0, 0);
5388           mode = target_mode;
5389         }
5390
5391       if (target_mode != mode)
5392         subtarget = 0;
5393
5394       if (code == GE)
5395         op0 = expand_unop (mode, one_cmpl_optab, op0,
5396                            ((STORE_FLAG_VALUE == 1 || normalizep)
5397                             ? 0 : subtarget), 0);
5398
5399       if (STORE_FLAG_VALUE == 1 || normalizep)
5400         /* If we are supposed to produce a 0/1 value, we want to do
5401            a logical shift from the sign bit to the low-order bit; for
5402            a -1/0 value, we do an arithmetic shift.  */
5403         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5404                             size_int (GET_MODE_BITSIZE (mode) - 1),
5405                             subtarget, normalizep != -1);
5406
5407       if (mode != target_mode)
5408         op0 = convert_modes (target_mode, mode, op0, 0);
5409
5410       return op0;
5411     }
5412
5413   mclass = GET_MODE_CLASS (mode);
5414   for (compare_mode = mode; compare_mode != VOIDmode;
5415        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5416     {
5417      enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5418      icode = optab_handler (cstore_optab, optab_mode)->insn_code;
5419      if (icode != CODE_FOR_nothing)
5420         {
5421           do_pending_stack_adjust ();
5422           tem = emit_cstore (target, icode, code, mode, compare_mode,
5423                              unsignedp, op0, op1, normalizep, target_mode);
5424           if (tem)
5425             return tem;
5426
5427           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5428             {
5429               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5430                                  unsignedp, op1, op0, normalizep, target_mode);
5431               if (tem)
5432                 return tem;
5433             }
5434           break;
5435         }
5436     }
5437
5438   return 0;
5439 }
5440
5441 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5442    and storing in TARGET.  Normally return TARGET.
5443    Return 0 if that cannot be done.
5444
5445    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5446    it is VOIDmode, they cannot both be CONST_INT.
5447
5448    UNSIGNEDP is for the case where we have to widen the operands
5449    to perform the operation.  It says to use zero-extension.
5450
5451    NORMALIZEP is 1 if we should convert the result to be either zero
5452    or one.  Normalize is -1 if we should convert the result to be
5453    either zero or -1.  If NORMALIZEP is zero, the result will be left
5454    "raw" out of the scc insn.  */
5455
5456 rtx
5457 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5458                  enum machine_mode mode, int unsignedp, int normalizep)
5459 {
5460   enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5461   enum rtx_code rcode;
5462   rtx subtarget;
5463   rtx tem, last, trueval;
5464
5465   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5466                            target_mode);
5467   if (tem)
5468     return tem;
5469
5470   /* If we reached here, we can't do this with a scc insn, however there
5471      are some comparisons that can be done in other ways.  Don't do any
5472      of these cases if branches are very cheap.  */
5473   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5474     return 0;
5475
5476   /* See what we need to return.  We can only return a 1, -1, or the
5477      sign bit.  */
5478
5479   if (normalizep == 0)
5480     {
5481       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5482         normalizep = STORE_FLAG_VALUE;
5483
5484       else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5485                && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5486                    == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
5487         ;
5488       else
5489         return 0;
5490     }
5491
5492   last = get_last_insn ();
5493
5494   /* If optimizing, use different pseudo registers for each insn, instead
5495      of reusing the same pseudo.  This leads to better CSE, but slows
5496      down the compiler, since there are more pseudos */
5497   subtarget = (!optimize
5498                && (target_mode == mode)) ? target : NULL_RTX;
5499   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5500
5501   /* For floating-point comparisons, try the reverse comparison or try
5502      changing the "orderedness" of the comparison.  */
5503   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5504     {
5505       enum rtx_code first_code;
5506       bool and_them;
5507
5508       rcode = reverse_condition_maybe_unordered (code);
5509       if (can_compare_p (rcode, mode, ccp_store_flag)
5510           && (code == ORDERED || code == UNORDERED
5511               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5512               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5513         {
5514           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5515                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5516
5517           /* For the reverse comparison, use either an addition or a XOR.  */
5518           if (want_add
5519               && rtx_cost (GEN_INT (normalizep), PLUS,
5520                            optimize_insn_for_speed_p ()) == 0)
5521             {
5522               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5523                                        STORE_FLAG_VALUE, target_mode);
5524               if (tem)
5525                 return expand_binop (target_mode, add_optab, tem,
5526                                      GEN_INT (normalizep),
5527                                      target, 0, OPTAB_WIDEN);
5528             }
5529           else if (!want_add
5530                    && rtx_cost (trueval, XOR,
5531                                 optimize_insn_for_speed_p ()) == 0)
5532             {
5533               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5534                                        normalizep, target_mode);
5535               if (tem)
5536                 return expand_binop (target_mode, xor_optab, tem, trueval,
5537                                      target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5538             }
5539         }
5540
5541       delete_insns_since (last);
5542
5543       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5544       if (code == ORDERED || code == UNORDERED)
5545         return 0;
5546
5547       and_them = split_comparison (code, mode, &first_code, &code);
5548
5549       /* If there are no NaNs, the first comparison should always fall through.
5550          Effectively change the comparison to the other one.  */
5551       if (!HONOR_NANS (mode))
5552         {
5553           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5554           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5555                                     target_mode);
5556         }
5557
5558 #ifdef HAVE_conditional_move
5559       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5560          conditional move.  */
5561       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5562                                normalizep, target_mode);
5563       if (tem == 0)
5564         return 0;
5565
5566       if (and_them)
5567         tem = emit_conditional_move (target, code, op0, op1, mode,
5568                                      tem, const0_rtx, GET_MODE (tem), 0);
5569       else
5570         tem = emit_conditional_move (target, code, op0, op1, mode,
5571                                      trueval, tem, GET_MODE (tem), 0);
5572
5573       if (tem == 0)
5574         delete_insns_since (last);
5575       return tem;
5576 #else
5577       return 0;
5578 #endif
5579     }
5580
5581   /* The remaining tricks only apply to integer comparisons.  */
5582
5583   if (GET_MODE_CLASS (mode) != MODE_INT)
5584     return 0;
5585
5586   /* If this is an equality comparison of integers, we can try to exclusive-or
5587      (or subtract) the two operands and use a recursive call to try the
5588      comparison with zero.  Don't do any of these cases if branches are
5589      very cheap.  */
5590
5591   if ((code == EQ || code == NE) && op1 != const0_rtx)
5592     {
5593       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5594                           OPTAB_WIDEN);
5595
5596       if (tem == 0)
5597         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5598                             OPTAB_WIDEN);
5599       if (tem != 0)
5600         tem = emit_store_flag (target, code, tem, const0_rtx,
5601                                mode, unsignedp, normalizep);
5602       if (tem != 0)
5603         return tem;
5604
5605       delete_insns_since (last);
5606     }
5607
5608   /* For integer comparisons, try the reverse comparison.  However, for
5609      small X and if we'd have anyway to extend, implementing "X != 0"
5610      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5611   rcode = reverse_condition (code);
5612   if (can_compare_p (rcode, mode, ccp_store_flag)
5613       && ! (optab_handler (cstore_optab, mode)->insn_code == CODE_FOR_nothing
5614             && code == NE
5615             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5616             && op1 == const0_rtx))
5617     {
5618       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5619                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5620
5621       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5622       if (want_add
5623           && rtx_cost (GEN_INT (normalizep), PLUS,
5624                        optimize_insn_for_speed_p ()) == 0)
5625         {
5626           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5627                                    STORE_FLAG_VALUE, target_mode);
5628           if (tem != 0)
5629             tem = expand_binop (target_mode, add_optab, tem,
5630                                 GEN_INT (normalizep), target, 0, OPTAB_WIDEN);
5631         }
5632       else if (!want_add
5633                && rtx_cost (trueval, XOR,
5634                             optimize_insn_for_speed_p ()) == 0)
5635         {
5636           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5637                                    normalizep, target_mode);
5638           if (tem != 0)
5639             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5640                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5641         }
5642
5643       if (tem != 0)
5644         return tem;
5645       delete_insns_since (last);
5646     }
5647
5648   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5649      the constant zero.  Reject all other comparisons at this point.  Only
5650      do LE and GT if branches are expensive since they are expensive on
5651      2-operand machines.  */
5652
5653   if (op1 != const0_rtx
5654       || (code != EQ && code != NE
5655           && (BRANCH_COST (optimize_insn_for_speed_p (),
5656                            false) <= 1 || (code != LE && code != GT))))
5657     return 0;
5658
5659   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5660      do the necessary operation below.  */
5661
5662   tem = 0;
5663
5664   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5665      the sign bit set.  */
5666
5667   if (code == LE)
5668     {
5669       /* This is destructive, so SUBTARGET can't be OP0.  */
5670       if (rtx_equal_p (subtarget, op0))
5671         subtarget = 0;
5672
5673       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5674                           OPTAB_WIDEN);
5675       if (tem)
5676         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5677                             OPTAB_WIDEN);
5678     }
5679
5680   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5681      number of bits in the mode of OP0, minus one.  */
5682
5683   if (code == GT)
5684     {
5685       if (rtx_equal_p (subtarget, op0))
5686         subtarget = 0;
5687
5688       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5689                           size_int (GET_MODE_BITSIZE (mode) - 1),
5690                           subtarget, 0);
5691       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5692                           OPTAB_WIDEN);
5693     }
5694
5695   if (code == EQ || code == NE)
5696     {
5697       /* For EQ or NE, one way to do the comparison is to apply an operation
5698          that converts the operand into a positive number if it is nonzero
5699          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5700          for NE we negate.  This puts the result in the sign bit.  Then we
5701          normalize with a shift, if needed.
5702
5703          Two operations that can do the above actions are ABS and FFS, so try
5704          them.  If that doesn't work, and MODE is smaller than a full word,
5705          we can use zero-extension to the wider mode (an unsigned conversion)
5706          as the operation.  */
5707
5708       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5709          that is compensated by the subsequent overflow when subtracting
5710          one / negating.  */
5711
5712       if (optab_handler (abs_optab, mode)->insn_code != CODE_FOR_nothing)
5713         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5714       else if (optab_handler (ffs_optab, mode)->insn_code != CODE_FOR_nothing)
5715         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5716       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5717         {
5718           tem = convert_modes (word_mode, mode, op0, 1);
5719           mode = word_mode;
5720         }
5721
5722       if (tem != 0)
5723         {
5724           if (code == EQ)
5725             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5726                                 0, OPTAB_WIDEN);
5727           else
5728             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5729         }
5730
5731       /* If we couldn't do it that way, for NE we can "or" the two's complement
5732          of the value with itself.  For EQ, we take the one's complement of
5733          that "or", which is an extra insn, so we only handle EQ if branches
5734          are expensive.  */
5735
5736       if (tem == 0
5737           && (code == NE
5738               || BRANCH_COST (optimize_insn_for_speed_p (),
5739                               false) > 1))
5740         {
5741           if (rtx_equal_p (subtarget, op0))
5742             subtarget = 0;
5743
5744           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5745           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5746                               OPTAB_WIDEN);
5747
5748           if (tem && code == EQ)
5749             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5750         }
5751     }
5752
5753   if (tem && normalizep)
5754     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5755                         size_int (GET_MODE_BITSIZE (mode) - 1),
5756                         subtarget, normalizep == 1);
5757
5758   if (tem)
5759     {
5760       if (!target)
5761         ;
5762       else if (GET_MODE (tem) != target_mode)
5763         {
5764           convert_move (target, tem, 0);
5765           tem = target;
5766         }
5767       else if (!subtarget)
5768         {
5769           emit_move_insn (target, tem);
5770           tem = target;
5771         }
5772     }
5773   else
5774     delete_insns_since (last);
5775
5776   return tem;
5777 }
5778
5779 /* Like emit_store_flag, but always succeeds.  */
5780
5781 rtx
5782 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5783                        enum machine_mode mode, int unsignedp, int normalizep)
5784 {
5785   rtx tem, label;
5786   rtx trueval, falseval;
5787
5788   /* First see if emit_store_flag can do the job.  */
5789   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5790   if (tem != 0)
5791     return tem;
5792
5793   if (!target)
5794     target = gen_reg_rtx (word_mode);
5795
5796   /* If this failed, we have to do this with set/compare/jump/set code.
5797      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5798   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5799   if (code == NE
5800       && GET_MODE_CLASS (mode) == MODE_INT
5801       && REG_P (target)
5802       && op0 == target
5803       && op1 == const0_rtx)
5804     {
5805       label = gen_label_rtx ();
5806       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5807                                mode, NULL_RTX, NULL_RTX, label, -1);
5808       emit_move_insn (target, trueval);
5809       emit_label (label);
5810       return target;
5811     }
5812
5813   if (!REG_P (target)
5814       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5815     target = gen_reg_rtx (GET_MODE (target));
5816
5817   /* Jump in the right direction if the target cannot implement CODE
5818      but can jump on its reverse condition.  */
5819   falseval = const0_rtx;
5820   if (! can_compare_p (code, mode, ccp_jump)
5821       && (! FLOAT_MODE_P (mode)
5822           || code == ORDERED || code == UNORDERED
5823           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5824           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5825     {
5826       enum rtx_code rcode;
5827       if (FLOAT_MODE_P (mode))
5828         rcode = reverse_condition_maybe_unordered (code);
5829       else
5830         rcode = reverse_condition (code);
5831
5832       /* Canonicalize to UNORDERED for the libcall.  */
5833       if (can_compare_p (rcode, mode, ccp_jump)
5834           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5835         {
5836           falseval = trueval;
5837           trueval = const0_rtx;
5838           code = rcode;
5839         }
5840     }
5841
5842   emit_move_insn (target, trueval);
5843   label = gen_label_rtx ();
5844   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5845                            NULL_RTX, label, -1);
5846
5847   emit_move_insn (target, falseval);
5848   emit_label (label);
5849
5850   return target;
5851 }
5852 \f
5853 /* Perform possibly multi-word comparison and conditional jump to LABEL
5854    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5855    now a thin wrapper around do_compare_rtx_and_jump.  */
5856
5857 static void
5858 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5859                  rtx label)
5860 {
5861   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5862   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5863                            NULL_RTX, NULL_RTX, label, -1);
5864 }